diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000000..d4a27a7e44 --- /dev/null +++ b/.clang-format @@ -0,0 +1,126 @@ +--- +# General options +Language: Cpp +Standard: c++17 +DisableFormat: false + +AccessModifierOffset: -4 +AlignAfterOpenBracket: AlwaysBreak +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignConsecutiveMacros: false +AlignEscapedNewlines: Right +AlignOperands: false +AlignTrailingComments: false +AllowAllArgumentsOnNextLine: false +AllowAllConstructorInitializersOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: Never +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: false +BinPackParameters: false +BreakBeforeBinaryOperators: All +BreakBeforeBraces: Allman +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeComma +BreakInheritanceList: BeforeComma +BreakStringLiterals: true +ColumnLimit: 119 +CommentPragmas: '^ COMMENT pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +IncludeBlocks: Regroup +IncludeIsMainRegex: '(Test)?$' +IncludeIsMainSourceRegex: '' +IndentCaseLabels: false +IndentGotoLabels: true +IndentPPDirectives: AfterHash +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 2 +NamespaceIndentation: All +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 1000 +PointerAlignment: Left +ReflowComments: true +SortIncludes: false +SortUsingDeclarations: true +SpaceAfterCStyleCast: true +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: Never +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +TabWidth: 4 +UseCRLF: false +UseTab: Never + +# Project specific options +IncludeCategories: + # Local headers (in "") above all else + - Regex: '"([A-Za-z0-9.\/-_])+"' + Priority: 1 + # + - Regex: '' + Priority: 2 + # + - Regex: '' + Priority: 3 + # + - Regex: '' + Priority: 4 + # + - Regex: '' + Priority: 5 + # C++ standard library headers are the last group to be included + - Regex: '<([A-Za-z0-9\/-_])+>' + Priority: 6 + # Includes that made it this far are third-party headers and will be placed + # below alpaka's includes + - Regex: '<([A-Za-z0-9.\/-_])+>' + Priority: 7 + +# Future options - not supported in clang-format 11 +# AlignConsecutiveBitFields: false +# AllowShortEnumsOnASingleLine: false +# BitFieldColonSpacing: Both +# IndentCaseBlocks: true +# IndentExternBlock: AfterExternBlock +# OperandAlignmentStyle: Align +... diff --git a/.gitignore b/.gitignore index 0bc62f425b..3d9c18fcf4 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,9 @@ # Visual Studio Code configuration files .vscode +# JetBrains project files +.idea/ + # python byte code *.pyc diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000..7f8cb77a21 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,69 @@ + +stages: + - validate + - generate + - compile + +.base_generate-reduced-matrix: + stage: generate + script: + - apt update + - apt install -y python3-pip + - pip3 install allpairspy + - $CI_PROJECT_DIR/share/ci/git_merge.sh + - $CI_PROJECT_DIR/share/ci/generate_reduced_matrix.sh -n ${TEST_TUPLE_NUM_ELEM} > compile.yml + - cat compile.yml + artifacts: + paths: + - compile.yml + +# pull request validation: +# - check PR destination +# - check python code style: flake8, pyflake +# - rebase the PR to the destination branch +# - check C++ code style +pull-request-validation: + stage: validate + image: ubuntu:focal + script: + - apt update + - apt install -y -q git curl wget python3 python3-pip + # Test if pull request can be merged into the destination branch + - $CI_PROJECT_DIR/test/correctBranchPR + - source $CI_PROJECT_DIR/share/ci/git_merge.sh + - pip3 install -U flake8 pyflakes + # Test Python Files for PEP8 conformance + - flake8 --exclude=thirdParty . + # Warnings, unused code, etc. + - pyflakes . + # install clang-format-11 + - apt install -y -q gnupg2 + - wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - + - echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-11 main" | tee -a /etc/apt/sources.list + - apt update + - apt install -y clang-format-11 + # Check C++ code style + - source $CI_PROJECT_DIR/share/ci/check_cpp_code_style.sh + tags: + - x86_64 + +# generate reduced test matrix +# required variables (space separated lists): +# PIC_INPUTS - path to examples relative to share/picongpu +# e.g. +# "examples" starts one gitlab job per directory in `examples/*` +# "examples/" compile all directories in `examples/*` within one gitlab job +# "examples/KelvinHelmholtz" compile all cases within one gitlab job +generate-reduced-matrix: + variables: + PIC_INPUTS: "examples tests benchmarks" + TEST_TUPLE_NUM_ELEM: 1 + extends: ".base_generate-reduced-matrix" + +compile-reduced-matrix: + stage: compile + trigger: + include: + - artifact: compile.yml + job: generate-reduced-matrix + strategy: depend diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index a9545f476a..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,180 +0,0 @@ -language: none -sudo: false -dist: trusty - -cache: - apt: true - directories: - - $HOME/.cache/spack - - $HOME/.cache/cmake-3.11.4 - pip: true - -addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-4.9 - - gfortran-4.9 # spack OpenMPI dependency - - environment-modules - - openmpi-bin - - libopenmpi-dev - # clang 5.0.0 is pre-installed - # - clang-tidy-3.9 - -env: - global: - - SPACK_ROOT: $HOME/.cache/spack - - PATH: $PATH:$HOME/.cache/spack/bin - - CXXFLAGS: "-std=c++11" - -install: - ############################################################################# - # PMacc CPU-only dependencies # - ############################################################################# - - SPACK_FOUND=$(which spack >/dev/null && { echo 0; } || { echo 1; }) - - if [ $SPACK_FOUND -ne 0 ]; then - mkdir -p $SPACK_ROOT && - git clone --depth 50 https://github.com/spack/spack.git $SPACK_ROOT && - echo -e "config:""\n build_jobs:"" 2" > $SPACK_ROOT/etc/spack/config.yaml && - echo -e "packages:""\n cmake:""\n version:"" [3.11.4]""\n paths:""\n cmake@3.11.4:"" /home/travis/.cache/cmake-3.11.4""\n buildable:"" False" > $SPACK_ROOT/etc/spack/packages.yaml; - fi - - spack compiler add - # required dependencies - CMake 3.11.4 - - if [ "$TRAVIS_OS_NAME" == "linux" ]; then - if [ ! -f $HOME/.cache/cmake-3.11.4/bin/cmake ]; then - wget -O cmake.sh https://cmake.org/files/v3.11/cmake-3.11.4-Linux-x86_64.sh && - sh cmake.sh --skip-license --exclude-subdir --prefix=$HOME/.cache/cmake-3.11.4 && - rm cmake.sh; - fi; - elif [ "$TRAVIS_OS_NAME" == "osx" ]; then - if [ ! -d /Applications/CMake.app/Contents/ ]; then - curl -L -s -o cmake.dmg https://cmake.org/files/v3.11/cmake-3.11.4-Darwin-x86_64.dmg && - yes | hdiutil mount cmake.dmg && - sudo cp -R "/Volumes/cmake-3.11.4-Darwin-x86_64/CMake.app" /Applications && - hdiutil detach /dev/disk1s1 && - rm cmake.dmg; - fi; - fi - - travis_wait spack install - cmake - $COMPILERSPEC - # required dependencies - Boost 1.65.1 - - travis_wait spack install - boost@1.65.1~date_time~graph~iostreams~locale~log~random~thread~timer~wave - $COMPILERSPEC - - spack clean -a - - source /etc/profile && - source $SPACK_ROOT/share/spack/setup-env.sh - - spack load cmake - - spack load boost $COMPILERSPEC - -jobs: - fast_finish: true - include: - - stage: 'Target Branch' - install: skip - script: - ############################################################################# - # Disallow PRs to `ComputationalRadiationPhysics/picongpu` branch `master` # - # if not an other mainline branch such as `dev` or `release-...` # - ############################################################################# - - . test/correctBranchPR - - &style-python - stage: 'Style' - language: python - python: "2.7" - install: pip install -U flake8 - script: - ############################################################################# - # Test Python Files for PEP8 conformance # - ############################################################################# - - flake8 --exclude=thirdParty . - - <<: *style-python - python: "3.6" - - install: skip - language: cpp - script: - ############################################################################# - # Conformance with Alpaka: Do not write __global__ CUDA kernels directly # - ############################################################################# - - test/hasCudaGlobalKeyword include/pmacc - - test/hasCudaGlobalKeyword share/pmacc/examples - - test/hasCudaGlobalKeyword include/picongpu - - test/hasCudaGlobalKeyword share/picongpu/examples - - ############################################################################# - # Disallow end-of-line (EOL) white spaces # - ############################################################################# - - test/hasEOLwhiteSpace - - ############################################################################# - # Disallow TABs, use white spaces # - ############################################################################# - - test/hasTabs - - ############################################################################# - # Disallow non-ASCII in source files and scripts # - ############################################################################# - - test/hasNonASCII - - ############################################################################# - # Disallow spaces before pre-compiler macros # - ############################################################################# - - test/hasSpaceBeforePrecompiler - - ############################################################################# - # Enforce angle brackets <...> for includes of external library files # - ############################################################################# - - test/hasExtLibIncludeBrackets include boost - - test/hasExtLibIncludeBrackets include alpaka - - test/hasExtLibIncludeBrackets include cupla - - test/hasExtLibIncludeBrackets include splash - - test/hasExtLibIncludeBrackets include mallocMC - - test/hasExtLibIncludeBrackets include/picongpu pmacc - - test/hasExtLibIncludeBrackets share/picongpu/examples pmacc - - test/hasExtLibIncludeBrackets share/picongpu/examples boost - - test/hasExtLibIncludeBrackets share/picongpu/examples alpaka - - test/hasExtLibIncludeBrackets share/picongpu/examples cupla - - test/hasExtLibIncludeBrackets share/picongpu/examples splash - - test/hasExtLibIncludeBrackets share/picongpu/examples mallocMC - - test/hasExtLibIncludeBrackets share/pmacc/examples pmacc - - &static-code-python - stage: 'Static Code Analysis' - language: python - python: "2.7" - install: pip install -U pyflakes - script: - ############################################################################# - # Warnings, unused code, etc. # - ############################################################################# - - pyflakes . - - <<: *static-code-python - python: "3.6" - - &test-cpp-unit - stage: 'C++ Unit Tests' - language: cpp - env: [ COMPILERSPEC='%gcc@4.9.4' ] - before_install: - - export CXX=g++-4.9 - - export CC=gcc-4.9 - - export FC=gfortran-4.9 - script: - - $CXX --version - - $CC --version - - $FC --version - ############################################################################# - # PMacc CPU-only tests # - ############################################################################# - - mkdir -p $HOME/build - - cd $HOME/build - - cmake $TRAVIS_BUILD_DIR/include/pmacc - -DALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE=ON - - make -j 2 - # - make test # reduce memory and RT costs first - - <<: *test-cpp-unit - env: [ COMPILERSPEC='%clang@5.0.0' ] - before_install: - - export CXX=clang++ - - export CC=clang - - export FC=gfortran-4.9 diff --git a/INSTALL.rst b/INSTALL.rst index 58e9b5f200..1b9f583c23 100644 --- a/INSTALL.rst +++ b/INSTALL.rst @@ -31,27 +31,28 @@ Mandatory gcc """ -- 4.9 - 7 (if you want to build for Nvidia GPUs, supported compilers depend on your current `CUDA version `_) +- 5.5 - 10.0 (if you want to build for Nvidia GPUs, supported compilers depend on your current `CUDA version `_) - - CUDA 9.2 - 10.0: Use gcc 4.9 - 7 - - CUDA 10.1/10.2: Use gcc 4.9 - 8 + - CUDA 9.2 - 10.0: Use gcc 5.5 - 7 + - CUDA 10.1/10.2: Use gcc 5.5 - 8 + - CUDA 11.x: Used gcc 5.5 - 10.0 - *note:* be sure to build all libraries/dependencies with the *same* gcc version; GCC 5 or newer is recommended - *Debian/Ubuntu:* - - ``sudo apt-get install gcc-5.3 g++-5.3 build-essential`` - - ``sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5.3 60 --slave /usr/bin/g++ g++ /usr/bin/g++-5.3`` + - ``sudo apt-get install gcc-5 g++-5 build-essential`` + - ``sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 60 --slave /usr/bin/g++ g++ /usr/bin/g++-5`` - *Arch Linux:* - ``sudo pacman --sync base-devel`` - if the installed version of **gcc** is too new, `compile an older gcc `_ - *Spack:* - - ``spack install gcc@5.3.0`` - - make it the default in your `packages.yaml `_ or *suffix* `all following `_ ``spack install`` commands with a *space* and ``%gcc@5.3.0`` + - ``spack install gcc@5.5.0`` + - make it the default in your `packages.yaml `_ or *suffix* `all following `_ ``spack install`` commands with a *space* and ``%gcc@5.5.0`` CMake """"" -- 3.11.4 or higher +- 3.15.0 or higher - *Debian/Ubuntu:* ``sudo apt-get install cmake file cmake-curses-gui`` - *Arch Linux:* ``sudo pacman --sync cmake`` - *Spack:* ``spack install cmake`` @@ -325,6 +326,38 @@ ADIOS - ``export ADIOS_ROOT=$HOME/lib/adios`` - ``export LD_LIBRARY_PATH=$ADIOS_ROOT/lib:$LD_LIBRARY_PATH`` +openPMD API +""""""""""" +- 0.12.0+ (bare minimum) / 0.13.0+ (for streaming IO) +- *Spack*: ``spack install openpmd-api`` +- For usage in PIConGPU, the openPMD API must have been built either with support for ADIOS2 or HDF5 (or both). + When building the openPMD API from source (described below), these dependencies must be built and installed first. + + - For ADIOS2, CMake build instructions can be found in the `official documentation `_. + The default configuration should generally be sufficient, the ``CMAKE_INSTALL_PREFIX`` should be set to a fitting location. + - For HDF5, CMake build instructions can be found in the `official documentation `_. + The parameters ``-DHDF5_BUILD_CPP_LIB=OFF -DHDF5_ENABLE_PARALLEL=ON`` are required, the ``CMAKE_INSTALL_PREFIX`` should be set to a fitting location. +- *from source:* + + - ``mkdir -p ~/src ~/lib`` + - ``cd ~/src`` + - ``git clone https://github.com/openPMD/openPMD-api.git`` + - ``cd openPMD-api`` + - ``mkdir build && cd build`` + - ``cmake .. -DopenPMD_USE_MPI=ON -DCMAKE_INSTALL_PREFIX=~/lib/openPMD-api`` + Optionally, specify the parameters ``-DopenPMD_USE_ADIOS2=ON -DopenPMD_USE_HDF5=ON``. Otherwise, these parameters are set to ``ON`` automatically if CMake detects the dependencies on your system. + - ``make -j $(nproc) install`` +- environment:* (assumes install from source in ``$HOME/lib/openPMD-api``) + + - ``export CMAKE_PREFIX_PATH="$HOME/lib/openPMD-api:$CMAKE_PREFIX_PATH"`` + - ``export LD_LIBRARY_PATH="$HOME/lib/openPMD-api/lib:$LD_LIBRARY_PATH"`` +- If PIConGPU is built with openPMD output enabled, the JSON library + nlohmann_json will automatically be used, found in the ``thirdParty/`` + directory. + By setting the CMake parameter ``PIC_nlohmann_json_PROVIDER=extern``, CMake + can be instructed to search for an installation of nlohmann_json externally. + Refer to LICENSE.md for further information. + ISAAC """"" - 1.4.0+ diff --git a/LICENSE.md b/LICENSE.md index 188597079f..589c477222 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -59,14 +59,6 @@ of an easier install of `PIConGPU`. Contributions to these parts of the repository should *not* be made in the `thirdParty/` directory but in *their according repositories* (that we import). - - `thirdParty/alpaka`: - alpaka is a header-only C++11 abstraction library for parallel - kernel development on accelerator hardware. It provides a single-source, - performance portable programming model for PIConGPU and PMacc. - Please visit - https://github.com/ComputationalRadiationPhysics/alpaka - for further details and contributions. - - `thirdParty/mallocMC`: mallocMC is a fast memory allocator for many core accelerators and was originally forked from the `ScatterAlloc` project. @@ -100,3 +92,11 @@ repository should *not* be made in the `thirdParty/` directory but in Please visit https://github.com/ComputationalRadiationPhysics/cupla for further details and contributions. + +- `thirdParty/nlohmann_json`: + nlohmann_json is a modern C++ library for working with JSON data, developed + by Niels Lohmann, published under the MIT License. + Please refer to the file `thirdParty/nlohmann_json/LICENSE.MIT` for license + information. + Please visit https://github.com/nlohmann/json for further details + and contributions. diff --git a/README.md b/README.md index 2346f3f5b5..667cd9360e 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ PIConGPU - Particle-in-Cell Simulations for the Exascale Era ============================================================ -[![Code Status master](https://img.shields.io/travis/ComputationalRadiationPhysics/picongpu/master.svg?label=master)](https://travis-ci.org/ComputationalRadiationPhysics/picongpu/branches) -[![Code Status dev](https://img.shields.io/travis/ComputationalRadiationPhysics/picongpu/dev.svg?label=dev)](https://travis-ci.org/ComputationalRadiationPhysics/picongpu/branches) +[![Code Status master](https://gitlab.com/hzdr/crp/picongpu/badges/master/pipeline.svg?key_text=master)](https://gitlab.com/hzdr/crp/picongpu/pipelines/master/latest) +[![Code Status dev](https://gitlab.com/hzdr/crp/picongpu/badges/dev/pipeline.svg?key_text=dev)](https://gitlab.com/hzdr/crp/picongpu/pipelines/dev/latest) [![Documentation Status](https://readthedocs.org/projects/picongpu/badge/?version=latest)](http://picongpu.readthedocs.io) [![Doxygen](https://img.shields.io/badge/API-Doxygen-blue.svg)](http://computationalradiationphysics.github.io/picongpu) [![GitHub commits since last release](https://img.shields.io/github/commits-since/ComputationalRadiationPhysics/picongpu/latest/dev.svg)](https://github.com/ComputationalRadiationPhysics/picongpu/compare/master...dev) -[![Language](https://img.shields.io/badge/language-C%2B%2B11-orange.svg)](https://isocpp.org/) +[![Language](https://img.shields.io/badge/language-C%2B%2B14-orange.svg)](https://isocpp.org/) [![License PIConGPU](https://img.shields.io/badge/license-GPLv3-blue.svg?label=PIConGPU)](https://www.gnu.org/licenses/gpl-3.0.html) [![License PMacc](https://img.shields.io/badge/license-LGPLv3-blue.svg?label=PMacc)](https://www.gnu.org/licenses/lgpl-3.0.html) @@ -38,7 +38,7 @@ Its features for the electro-magnetic PIC algorithm include: [*Esirkepov*](http://dx.doi.org/10.1016/S0010-4655%2800%2900228-9) and *ZigZag* - macro-particle form factors ranging from NGP (0th order), CIC (1st), - TSC (2nd), PSQ (3rd) to P4S (4th) + TSC (2nd), PQS (3rd) to PCS (4th) and the electro-magnetic PIC algorithm is further self-consistently coupled to: - classical radiation reaction diff --git a/USAGE.rst b/USAGE.rst index 11564e857d..b1d3376fc2 100644 --- a/USAGE.rst +++ b/USAGE.rst @@ -2,7 +2,14 @@ .. seealso:: - You need to have an :ref:`environment loaded ` (``source $HOME/picongpu.profile``) that provides all :ref:`PIConGPU dependencies ` to complete this chapter. + You need to have an :ref:`environment loaded ` (``source $HOME/picongpu.profile`` when installing from source or ``spack load picongpu`` when using spack) that provides all :ref:`PIConGPU dependencies ` to complete this chapter. + +.. warning:: + + PIConGPU source code is portable and can be compiled on all major operating systems. + However, helper tools like ``pic-create`` and ``pic-build`` described in this section rely on Linux utilities and thus are not expected to work on other platforms out-of-the-box. + Note that building and using PIConGPU on other operating systems is still possible but has to be done manually or with custom tools. + This case is not covered in the documentation, but we can assist users with it when needed. Basics ====== @@ -101,16 +108,16 @@ tbg The ``tbg`` tool is explained in detail :ref:`in its own section `. Its primary purpose is to abstract the options in runtime ``.cfg`` files from the technical details on how to run on various supercomputers. -For example, if you want to run on the HPC System `"Hypnos" at HZDR `_, your ``tbg`` submit command would just change to: +For example, if you want to run on the HPC System `"Hemera" at HZDR `_, your ``tbg`` submit command would just change to: .. code-block:: bash :emphasize-lines: 2 # request 1 GPU from the PBS batch system and run on the queue "k20" - tbg -s qsub -c etc/picongpu/1.cfg -t etc/picongpu/hypnos-hzdr/k20.tpl $SCRATCH/runs/lwfa_002 + tbg -s sbatch -c etc/picongpu/1.cfg -t etc/picongpu/hemera-hzdr/k20.tpl $SCRATCH/runs/lwfa_002 # run again, this time on 16 GPUs - tbg -s qsub -c etc/picongpu/16.cfg -t etc/picongpu/hypnos-hzdr/k20.tpl $SCRATCH/runs/lwfa_003 + tbg -s sbatch -c etc/picongpu/16.cfg -t etc/picongpu/hemera-hzdr/k20.tpl $SCRATCH/runs/lwfa_003 Note that we can use the same ``1.cfg`` file, your input set is *portable*. diff --git a/bin/cuda_memtest.sh b/bin/cuda_memtest.sh index 6f432dc192..be022f4551 100755 --- a/bin/cuda_memtest.sh +++ b/bin/cuda_memtest.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Rene Widera +# Copyright 2013-2021 Rene Widera # # This file is part of PIConGPU. # diff --git a/bin/egetopt b/bin/egetopt index 57f76f1279..f421563c0a 100755 --- a/bin/egetopt +++ b/bin/egetopt @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2014-2020 Rene Widera +# Copyright 2014-2021 Rene Widera # # This file is part of PIConGPU. # diff --git a/bin/pic-build b/bin/pic-build index 84d42c3771..bfde3856d0 100755 --- a/bin/pic-build +++ b/bin/pic-build @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2017-2020 Axel Huebl +# Copyright 2017-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/bin/pic-compile b/bin/pic-compile index ce4370a9da..0090006cc7 100755 --- a/bin/pic-compile +++ b/bin/pic-compile @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # @@ -131,7 +131,7 @@ do $picongpu_prefix/buildsystem/CompileSuite/compileSet.sh \ "$example_name" "$testFlagNr" "$globalCMakeOptions" \ "$tmpRun_path" "$buildDir" "$examples_path" \ - "$quiet_run" | tee $buildDir"/compile.log" || exit $? + "$quiet_run" &> $buildDir"/compile.log" fi testFlagNr=$(( testFlagNr + 1 )) diff --git a/bin/pic-configure b/bin/pic-configure index ae8a33497c..00d14b0174 100755 --- a/bin/pic-configure +++ b/bin/pic-configure @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # @@ -39,8 +39,8 @@ help() echo " (default is )" echo "-b | --backend - set compute backend and optionally the architecture" echo " syntax: backend[:architecture]" - echo " supported backends: cuda, omp2b, serial, tbb, threads" - echo " (e.g.: \"cuda:20;35;37;52;60\" or \"omp2b:native\" or \"omp2b\")" + echo " supported backends: cuda, hip, omp2b, serial, tbb, threads" + echo " (e.g.: \"cuda:35;37;52;60\" or \"omp2b:native\" or \"omp2b\")" echo " default: \"cuda\" if not set via environment variable PIC_BACKEND" echo " note: architecture names are compiler dependent" echo "-c | --cmake - overwrite options for cmake" @@ -64,25 +64,30 @@ get_backend_flags() result+=" -DALPAKA_CUDA_ARCH=\"${backend_cfg[1]}\"" fi elif [ "${backend_cfg[0]}" == "omp2b" ] ; then - result+=" -DALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE=ON" + result+=" -DALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE=ON -DCUPLA_STREAM_ASYNC_ENABLE=OFF" if [ $num_options -eq 2 ] ; then result+=" -DPMACC_CPU_ARCH=\"${backend_cfg[1]}\"" fi elif [ "${backend_cfg[0]}" == "serial" ] ; then - result+=" -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE=ON" + result+=" -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE=ON -DCUPLA_STREAM_ASYNC_ENABLE=OFF" if [ $num_options -eq 2 ] ; then result+=" -DPMACC_CPU_ARCH=\"${backend_cfg[1]}\"" fi elif [ "${backend_cfg[0]}" == "tbb" ] ; then - result+=" -DALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE=ON" + result+=" -DALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE=ON -DCUPLA_STREAM_ASYNC_ENABLE=OFF" if [ $num_options -eq 2 ] ; then result+=" -DPMACC_CPU_ARCH=\"${backend_cfg[1]}\"" fi elif [ "${backend_cfg[0]}" == "threads" ] ; then - result+=" -DALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE=ON" + result+=" -DALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE=ON -DCUPLA_STREAM_ASYNC_ENABLE=OFF" if [ $num_options -eq 2 ] ; then result+=" -DPMACC_CPU_ARCH=\"${backend_cfg[1]}\"" fi + elif [ "${backend_cfg[0]}" == "hip" ] ; then + result+=" -DALPAKA_ACC_GPU_HIP_ENABLE=ON -DALPAKA_ACC_GPU_HIP_ONLY_MODE=ON" + if [ $num_options -eq 2 ] ; then + result+=" -DALPAKA_HIP_ARCH=\"${backend_cfg[1]}\"" + fi else echo "unsupported backend given '$1'" >&2 exit 1 diff --git a/bin/pic-create b/bin/pic-create index f700107975..42cf13a0b5 100755 --- a/bin/pic-create +++ b/bin/pic-create @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # @@ -36,7 +36,7 @@ help() echo "" echo "usage: pic-create [OPTION] [src_dir] dest_dir" echo "If no src_dir is set picongpu a default case is cloned" - echo "If src_dir is not in the currrent directory, pic-create will" + echo "If src_dir is not in the current directory, pic-create will" echo 'look for it in $PIC_EXAMPLES' echo "" echo "-f | --force - merge data if destination already exists" @@ -73,7 +73,7 @@ done cmake_path="$*" if [ $# -eq 0 ] || [ $# -gt 2 ] ; then - echo "Missing destination directory or to many directories were given." >&2 + echo "Missing destination directory or too many directories were given." >&2 exit fi diff --git a/bin/pic-edit b/bin/pic-edit index 2eedc5b466..c35fe78dac 100755 --- a/bin/pic-edit +++ b/bin/pic-edit @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2017-2020 Axel Huebl +# Copyright 2017-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/bin/tbg b/bin/tbg index c4a51db88f..4c061f57d6 100755 --- a/bin/tbg +++ b/bin/tbg @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch +# Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch # # This file is part of PIConGPU. # @@ -272,7 +272,12 @@ help() } #!/usr/bin/env bash -initCall="$0 $*" +#Check whether the provided path is relative or absolute and +#convert it to an abolute path +script=$0 + +absScriptPath=$(realpath $0) +initCall="cd $(pwd); $absScriptPath $*" projectPath="." pathToegetopt=$(which egetopt 2>/dev/null) diff --git a/buildsystem/CompileSuite/autoTests/config.sh b/buildsystem/CompileSuite/autoTests/config.sh index eb1e2025a1..9bd6cadad2 100755 --- a/buildsystem/CompileSuite/autoTests/config.sh +++ b/buildsystem/CompileSuite/autoTests/config.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/buildsystem/CompileSuite/autoTests/get_work.sh b/buildsystem/CompileSuite/autoTests/get_work.sh index b0164fc2da..b4faa82ce4 100755 --- a/buildsystem/CompileSuite/autoTests/get_work.sh +++ b/buildsystem/CompileSuite/autoTests/get_work.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/buildsystem/CompileSuite/autoTests/new_commits.sh b/buildsystem/CompileSuite/autoTests/new_commits.sh index ff429f7d48..5a846ac844 100755 --- a/buildsystem/CompileSuite/autoTests/new_commits.sh +++ b/buildsystem/CompileSuite/autoTests/new_commits.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # @@ -94,7 +94,7 @@ touch "$thisDir"runGuard #export PIC_COMPILE_SUITE_CMAKE="-DPIC_ENABLE_PNG=OFF -DALPAKA_CUDA_ARCH=35" export PIC_BACKEND="cuda" . /etc/profile - module load gcc/5.1.0 boost/1.65.1 cmake/3.11.4 cuda/9.2.148 openmpi/3.0.4 + module load gcc/5.5.0 boost/1.65.1 cmake/3.15.0 cuda/9.2.148 openmpi/3.0.4 module load libSplash/1.7.0 adios/1.13.1 module load pngwriter/0.7.0 zlib/1.2.11 module load libjpeg-turbo/1.5.1 icet/2.1.1 jansson/2.9 isaac/1.4.0 diff --git a/buildsystem/CompileSuite/autoTests/report.sh b/buildsystem/CompileSuite/autoTests/report.sh index 6f67c91026..018d946096 100755 --- a/buildsystem/CompileSuite/autoTests/report.sh +++ b/buildsystem/CompileSuite/autoTests/report.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/buildsystem/CompileSuite/color.sh b/buildsystem/CompileSuite/color.sh index 86650a4641..52038ad590 100755 --- a/buildsystem/CompileSuite/color.sh +++ b/buildsystem/CompileSuite/color.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # diff --git a/buildsystem/CompileSuite/compileSet.sh b/buildsystem/CompileSuite/compileSet.sh index 4c13e365d9..94bee74ce7 100755 --- a/buildsystem/CompileSuite/compileSet.sh +++ b/buildsystem/CompileSuite/compileSet.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/buildsystem/CompileSuite/exec_helper.sh b/buildsystem/CompileSuite/exec_helper.sh index 905609a83d..f731c8982c 100755 --- a/buildsystem/CompileSuite/exec_helper.sh +++ b/buildsystem/CompileSuite/exec_helper.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/buildsystem/CompileSuite/help.sh b/buildsystem/CompileSuite/help.sh index 582c11b6e6..4fe8cb19ff 100755 --- a/buildsystem/CompileSuite/help.sh +++ b/buildsystem/CompileSuite/help.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/buildsystem/CompileSuite/options.sh b/buildsystem/CompileSuite/options.sh index 9a67952cd6..f847e3eb0e 100755 --- a/buildsystem/CompileSuite/options.sh +++ b/buildsystem/CompileSuite/options.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/buildsystem/CompileSuite/path.sh b/buildsystem/CompileSuite/path.sh index 9e107302fd..507e86ab39 100755 --- a/buildsystem/CompileSuite/path.sh +++ b/buildsystem/CompileSuite/path.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/docs/COMMIT.md b/docs/COMMIT.md index 7e14e74a9a..6b4490b929 100644 --- a/docs/COMMIT.md +++ b/docs/COMMIT.md @@ -6,7 +6,30 @@ We agree on the following simple rules to make our lives easier :) - Stick to the **style** below for **commit messages** - **Commit compiling patches** for the *main* branches (`master` and `dev`), you can be less strict for (unshared) *topic branches* - +- Commits should be formated with clang-format-11 + +Format Code +----------- + +- Install *ClangFormat 11* +- To format all files in your working copy, you can run this command in bash from the root folder of PIConGPU: + ```bash + find include/ share/picongpu/ share/pmacc -iname "*.def" \ + -o -iname "*.h" -o -iname "*.cpp" -o -iname "*.cu" \ + -o -iname "*.hpp" -o -iname "*.tpp" -o -iname "*.kernel" \ + -o -iname "*.loader" -o -iname "*.param" -o -iname "*.unitless" \ + | xargs clang-format-11 -i + ``` + +Instead of using the bash command above you can use *Git* together with *ClangFormat* to format your patched code only. +Before applying this command, you must extend your local git configuration **once** with all file endings used in *PIConGPU*: + +``` +git config --local clangFormat.extensions def,h,cpp,cu,hpp,tpp,kernel,loader,param,unitless +``` + +For only formatting lines you added using `git add`, call `git clang-format-11` before you create a commit. +Please be aware that un-staged changes will not be formatted. Commit Messages --------------- diff --git a/docs/TBG_macros.cfg b/docs/TBG_macros.cfg index 7b985f3d44..d830c1a593 100644 --- a/docs/TBG_macros.cfg +++ b/docs/TBG_macros.cfg @@ -1,4 +1,5 @@ -# Copyright 2014-2020 Felix Schmitt, Axel Huebl, Richard Pausch, Heiko Burau +# Copyright 2014-2021 Felix Schmitt, Axel Huebl, Richard Pausch, Heiko Burau, +# Franz Poeschel # # This file is part of PIConGPU. # @@ -22,6 +23,7 @@ ## These variables basically wrap PIConGPU command line flags. ## To see all flags available for your PIConGPU binary, run ## picongpu --help. The avalable flags depend on your configuration flags. +## Note that this is not meant to be a complete and functioning .cfg file. ## ## Flags that target a specific species e.g. electrons (--e_png) or ions ## (--i_png) must only be used if the respective species is activated (configure flags). @@ -123,7 +125,8 @@ TBG_stopWindow="--stopWindow 1337" #--_radiation.end Time step to stop calculating the radiation #--_radiation.radPerGPU If flag is set, each GPU stores its own spectra without summing the entire simulation area #--_radiation.folderRadPerGPU Folder where the GPU specific spectras are stored -#--e__radiation.compression If flag is set, the hdf5 output will be compressed. +#--_radiation.compression If flag is set, the hdf5 output will be compressed. +#--_radiation.numJobs Number of independent jobs used for the radiation calculation. TBG_radiation="--_radiation.period 1 --_radiation.dump 2 --_radiation.totalRadiation \ --_radiation.lastRadiation --_radiation.start 2800 --_radiation.end 3000" @@ -132,6 +135,26 @@ TBG_radiation="--_radiation.period 1 --_radiation.dump 2 --_transRad.period Gives the number of time steps between which the radiation should be calculated. TBG_transRad="--_transRad.period 1000" +# The following flags are available for the xrayScattering plugin. +# For a full description, see the plugins section in the online documentation. +#--_xrayScattering.period Period at which the plugin is enabled. +#--_xrayScattering.outputPeriod Period at which the accumulated amplitude is written to the output file. +#--_xrayScattering.qx_max Upper bound of reciprocal space range in qx direction. +#--_xrayScattering.qy_max Upper bound of reciprocal space range in qy direction. +#--_xrayScattering.qx_max Lower bound of reciprocal space range in qx direction. +#--_xrayScattering.qy_max Lower bound of reciprocal space range in qy direction. +#--_xrayScattering.n_qx Number of scattering vectors needed to be calculated in qx direction. +#--_xrayScattering.n_qy Number of scattering vectors needed to be calculated in qy direction. +#--_xrayScattering.file Output file name. Default is `_xrayScatteringOutput`. +#--_xrayScattering.ext `openPMD` filename extension. This controls the backend picked by the `openPMD` API. Default is `bp` for adios backend. +#--_xrayScattering.compression Backend-specific `openPMD` compression method (e.g.) zlib. +#--_xrayScattering.memoryLayout Possible values: `mirror` and `split`. Output can be mirrored on all Host+Device pairs or uniformly split, in chunks, over all nodes. +TBG__xrayScattering="--_xrayScattering.period 1 --e_xrayScattering.outputPeriod 10 \ + --e_xrayScattering.n_qx 512 --e_xrayScattering.n_qy 512 \ + --e_xrayScattering.qx_min 0 --e_xrayScattering.qx_max 1 \ + --e_xrayScattering.qy_min -1 --e_xrayScattering.qy_max 1 \ + --e_xrayScattering.memoryLayout split" + # Create 2D images in PNG format every .period steps. # The slice plane is defined using .axis [yx,yz] and .slicePoint (offset from origin # as a float within [0.0,1.0]. @@ -143,6 +166,10 @@ TBG__pngYX="--_png.period 10 --_png.axis yx --_merger="--_merger.period 100 --_merger.minParticlesToMerge 8 --_merger.posSpreadThreshold 0.2 --_merger.absMomSpreadThreshold 0.01" +# Enable probabilistic version of particle merging +TBG__randomizedMerger="--_randomizedMerger.period 100 --_randomizedMerger.maxParticlesToMerge 8 \ + --_randomizedMerger.ratioDeletedParticles 0.9 --_randomizedMerger.posSpreadThreshold 0.01 \ + --_randomizedMerger.momSpreadThreshold 0.0005" # Notification period of position plugin (single-particle debugging) TBG__pos_dbg="--_position.period 1" @@ -155,7 +182,6 @@ TBG__histogram="--_energyHistogram.period 500 --_ener # Calculate a 2D phase space -# - requires parallel libSplash for HDF5 output # - momentum range in m_ c TBG__PSxpx="--_phaseSpace.period 10 --_phaseSpace.filter all --_phaseSpace.space x --_phaseSpace.momentum px --_phaseSpace.min -1.0 --_phaseSpace.max 1.0" TBG__PSxpz="--_phaseSpace.period 10 --_phaseSpace.filter all --_phaseSpace.space x --_phaseSpace.momentum pz --_phaseSpace.min -1.0 --_phaseSpace.max 1.0" @@ -181,12 +207,11 @@ TBG_macroCount="--_macroParticlesCount.period 100" # Count makro particles of a species per super cell TBG_countPerSuper="--_macroParticlesPerSuperCell.period 100 --_macroParticlesPerSuperCell.period 100" -# Dump simulation data (fields and particles) to HDF5 files using libSplash. -# Data selected in .source is dumped every .period steps to the fileset .file. -TBG_hdf5="--hdf5.period 100 --hdf5.file simData --hdf5.source 'species_all,fields_all'" # Dump simulation data (fields and particles) to ADIOS files. # Data is dumped every .period steps to the fileset .file. +# Warning: we do not recommend using the ADIOS plugin for output, +# but the openPMD plugin with ADIOS (2) backend instead, see TBG_openPMD below. TBG_adios="--adios.period 100 --adios.file simData --adios.source 'species_all,fields_all'" # see 'adios_config -m', e.g., for on-the-fly zlib compression # (compile ADIOS with --with-zlib=) @@ -205,13 +230,29 @@ TBG_adios="--adios.period 100 --adios.file simData --adios.source 'species_all,f # select data sources for the dump # --adios.source +# Dump simulation data (fields and particles) via the openPMD API. +# Data is dumped every .period steps to the fileset .file. +TBG_openPMD="--openPMD.period 100 \ + --openPMD.file simOutput \ + --openPMD.ext bp \ + --openPMD.json '{ \"adios2\": { \"engine\": { \"type\": \"file\", \"parameters\": { \"BufferGrowthFactor\": \"1.2\", \"InitialBufferSize\": \"2GB\" } } } }'" +# Further control over the backends used in the openPMD plugins is available +# through the mechanisms exposed by the openPMD API: +# * environment variables +# * JSON-formatted configuration string +# Further information on both is retrieved from the official documentation +# https://openpmd-api.readthedocs.io +# Notice that specifying compression settings via --openPMD.compression +# is considered legacy and backend-specific settings via the JSON string are +# preferred if available for a backend. + # Create a checkpoint that is restartable every --checkpoint.period steps # http://git.io/PToFYg TBG_checkpoint="--checkpoint.period 1000" -# Select the backend for the checkpoint, available are hdf5 and adios -# --checkpoint.backend adios -# hdf5 -# Available backend options are exactly as in --adios.* and --hdf5.* and can be set +# Select the backend for the checkpoint, available are openPMD and adios +# --checkpoint.backend openPMD +# adios +# Available backend options are exactly as in --openPMD.* and --adios.* and can be set # via: # --checkpoint..* # e.g.: @@ -223,8 +264,8 @@ TBG_checkpoint="--checkpoint.period 1000" # Restart the simulation from checkpoint created using TBG_checkpoint TBG_restart="--checkpoint.restart" # Select the backend for the restart (must fit the created checkpoint) -# --checkpoint.restart.backend adios -# hdf5 +# --checkpoint.restart.backend openPMD +# adios # By default, the last checkpoint is restarted if not specified via # --checkpoint.restart.step 1000 # To restart in a new run directory point to the old run where to start from diff --git a/docs/propose_changelog.py b/docs/propose_changelog.py index dc2861abb2..47f466bfd8 100755 --- a/docs/propose_changelog.py +++ b/docs/propose_changelog.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2017-2020 Axel Huebl +# Copyright 2017-2021 Axel Huebl # # License: GPLv3+ # diff --git a/docs/source/conf.py b/docs/source/conf.py index 533349d9da..dc0ba8723a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -105,9 +105,9 @@ # built documents. # # The short X.Y version. -version = u'0.5.0' +version = u'0.6.0' # The full version, including alpha/beta/rc tags. -release = u'0.5.0' +release = u'0.6.0-dev' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/dev/picongpu.rst b/docs/source/dev/picongpu.rst index 37e281fb8d..f058abe670 100644 --- a/docs/source/dev/picongpu.rst +++ b/docs/source/dev/picongpu.rst @@ -3,10 +3,10 @@ Important PIConGPU Classes This is very, very small selection of classes of interest to get you started. -MySimulation ------------- +Simulation +---------- -.. doxygenclass:: picongpu::MySimulation +.. doxygenclass:: picongpu::Simulation :project: PIConGPU :members: :undoc-members: diff --git a/docs/source/dev/styleguide.rst b/docs/source/dev/styleguide.rst index a5e5126a44..a53d2ca6e4 100644 --- a/docs/source/dev/styleguide.rst +++ b/docs/source/dev/styleguide.rst @@ -15,12 +15,77 @@ Source Style For contributions, *an ideal patch blends in the existing coding style around it* without being noticed as an addition when applied. Nevertheless, please make sure *new files* follow the styles linked above as strict as possible from the beginning. -Unfortunately, we currently do not have tools available to auto-format all aspects of our style guidelines. -Since we want to focus on the content of your contribution, we try to cover as much as possible by automated tests which you always have to pass. -Nevertheless, we will not enforce the still uncovered, *non-semantic aspects* of style in a *pedantic* way until we find a way to automate it fully. +clang-format-11 should be used to format the code. +There are different ways to format the code. -(That also means that we do not encourage manual style-only changes of our existing code base, since both you and us have better things to do than adding newlines and spaces manually. -Doxygen and documentation additions are always welcome!) +Format All Files +^^^^^^^^^^^^^^^^ + +To format all files in your working copy, you can run this command in bash from the root folder of PIConGPU: + +.. code-block:: bash + + find include/ share/picongpu/ share/pmacc -iname "*.def" \ + -o -iname "*.h" -o -iname "*.cpp" -o -iname "*.cu" \ + -o -iname "*.hpp" -o -iname "*.tpp" -o -iname "*.kernel" \ + -o -iname "*.loader" -o -iname "*.param" -o -iname "*.unitless" \ + | xargs clang-format-11 -i + +Format Only Changes, Using Git +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Instead of using the bash command above you can use *Git* together with *ClangFormat* to format your patched code only. + + *ClangFormat* is an external tool for code formating that can be called by *Git* on changed files only and + is part of clang tools. + +Before applying this command, you must extend your local git configuration **once** with all file endings used in *PIConGPU*: + +.. code-block:: bash + + git config --local clangFormat.extensions def,h,cpp,cu,hpp,tpp,kernel,loader,param,unitless + +After installing, or on a cluster loading the module(see introduction), clangFormat can be called by git on all **staged files** using the command: + +.. code-block:: bash + + git clangFormat + +.. warning:: + + The binary for *ClangFormat* is called `clang-format` on some operating systems. + If *clangFormat* is not recognized, try *clang-format* instead, in addition please check that `clang-format --version` returns version `11.X.X` in this case. + +The Typical workflow using git clangFormat is the following, + +1. make your patch + +2. stage the changed files in git + +.. code-block:: bash + + git add / -A + +3. format them according to guidelines + +.. code-block:: bash + + git clangFormat + +4. stage the now changed(formated) files again + +.. code-block:: bash + + git add + +5. commit changed files + +.. code-block:: bash + + git commit -m + +Please be aware that un-staged changes will not be formatted. +Formatting all changes of the previous commit can be achieved by executing the command `git clang-format-11 HEAD~1`. License Header -------------- diff --git a/docs/source/index.rst b/docs/source/index.rst index e9b35d10f0..e52983fdfb 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -80,6 +80,8 @@ Models :hidden: models/pic + models/AOFDTD + models/shapes models/LL_RR models/field_ionization models/collisional_ionization diff --git a/docs/source/install/libraryDependencies.dot b/docs/source/install/libraryDependencies.dot index 1b81068feb..10faa26002 100644 --- a/docs/source/install/libraryDependencies.dot +++ b/docs/source/install/libraryDependencies.dot @@ -35,6 +35,19 @@ digraph PIConGPU { hdf5 -> adios [style=dashed label="optional"]; fileSystem -> adios [style=dashed label="extra hints"]; + adios2 [label="ADIOS2"]; + mpi -> adios2; + zlib -> adios2; + hdf5 -> adios2 [style=dashed label="optional"]; + compiler -> libfabric; + libfabric -> adios2[style=dashed label="RDMA staging"]; + + openpmd [label="openPMD API"]; + adios2 -> openpmd; + hdf5 -> openpmd; + nlohmann_json [label="nlohmann_json\n(header-only)\n(internall shipped\nby default)"]; + nlohmann_json -> openpmd[style=dashed"] + libpng -> PNGwriter; trace [label="VampirTrace / Score-P"]; diff --git a/docs/source/install/profile.rst b/docs/source/install/profile.rst index f48bc50ce7..77b2b9baa8 100644 --- a/docs/source/install/profile.rst +++ b/docs/source/install/profile.rst @@ -59,37 +59,6 @@ Queue: k80 (8x NVIDIA K80 12GB) .. literalinclude:: profiles/hemera-hzdr/k80_picongpu.profile.example :language: bash -Hypnos (HZDR) -------------- - -**System overview:** `link (internal) `_ - -**User guide:** `link (internal) `_ - -**Production directory:** ``/bigdata/hplsim/`` with ``external/``, ``scratch/``, ``development/`` and ``production/`` - -For these profiles to work, you need to download the :ref:`PIConGPU source code ` manually. - -Queue: laser (AMD Opteron 6276 CPUs) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. literalinclude:: profiles/hypnos-hzdr/laser_picongpu.profile.example - :language: bash - -Hydra (HZDR) -------------- - -**System overview:** `link (internal) `_ - -**User guide:** `link (internal) `_ - -**Production directory:** ``/bigdata/hplsim/`` with ``external/``, ``scratch/``, ``development/`` and ``production/`` - -For this profile to work, you need to download the :ref:`PIConGPU source code ` manually. - -.. literalinclude:: profiles/hydra-hzdr/default_picongpu.profile.example - :language: bash - Summit (ORNL) ------------- diff --git a/docs/source/models/AOFDTD.rst b/docs/source/models/AOFDTD.rst new file mode 100644 index 0000000000..19bc977d70 --- /dev/null +++ b/docs/source/models/AOFDTD.rst @@ -0,0 +1,376 @@ +.. _model-AOFDTD: + +Finite-Difference Time-Domain Method +==================================== + +.. sectionauthor:: Klaus Steiniger + + +For the discretization of Maxwell's equations on a mesh in PIConGPU, only the equations + +.. math:: + + \frac{1}{c^2}\frac{\partial}{\partial t}\vec E &= \nabla \times \vec B - \mu_0 \vec J + + \frac{\partial}{\partial t}\vec B &= - \nabla \times \vec E + +are solved. +This becomes possible, first, by correctly solving Gauss's law +:math:`\nabla \cdot \vec{E} = \frac{1}{\varepsilon_0}\sum_s \rho_s` using +Esirkepov's current deposition method [Esirkepov2001]_ (or variants thereof) which solve the discretized continuity +equation exactly. +Second, by assuming that the initially given electric and magnetic field satisfy Gauss' laws. +Starting simulations in an initially charge free and magnetic-divergence-free space, i.e. + +.. math:: + + \nabla \cdot \vec E &= 0 + + \nabla \cdot \vec B &= 0 + +is standard. + + +Discretization on a staggered mesh +---------------------------------- +In the Finite-Difference Time-Domain method, above Maxwell's equations are discretized by replacing the partial space and +time derivatives with centered finite differences. +For example, the partial space derivative along :math:`x` of a scalar field :math:`u` at position +:math:`(i,j,k)` and time step :math:`n` becomes + +.. math:: + + \partial_x u(i\Delta x,j\Delta y,k\Delta z,n\Delta t) = \frac{u_{i+1/2,j,k}^n - u_{i-1/2,j,k}^n}{\Delta x} + +and the temporal derivative becomes + +.. math:: + + \partial_t u(i\Delta x,j\Delta y,k\Delta z,n\Delta t) = \frac{u_{i,j,k}^{n+1/2} - u_{i,j,k}^{n-1/2}}{\Delta t}, + +when replacing with the lowest order central differences. +Note, with this leapfrog discretization or staggering, derivatives of field quantities are calculated at positions +between positions where the field quantities are known. + +The above discretization uses one neighbor to each side from the point where the derivative is calculated yielding a +second order accurate approximation of the derivative. +Using more neighbors for the approximation of the spatial derivative is possible in PIConGPU and reduces the +discretization error. +Which is to say that the order of the method is increased. +The error order scales with twice the number of neighbors :math:`M` used to approximate the derivative. +The arbitrary order finite difference of order :math:`2M` reads + +.. math:: + + \partial_x u(i\Delta x,j\Delta y,k\Delta z,n\Delta t) &= \sum\limits_{l=1/2}^{M-1/2} + \left[ g^{2M}_l \frac{u_{i + l, j, k}^n - u_{i - l, j, k}^n}{\Delta x} \right]\,\mathrm{, where} + + g^{2M}_l &= \frac{(-1)^{l-1/2}}{2l^2} \frac{((2M-1)!!)^2}{(2M -1 - 2l)!! (2M -1 + 2l)!!} + +with :math:`l=-M+1/2, -M+1+1/2, ..., -1/2, 1/2, ..., M-1/2` [Ghrist2000]_. +A recurrence relation for the weights exists, + +.. math:: + + g^{2M}_l &= (-1)\frac{(l-1)^2}{l^2} \frac{(2M+1-2l)}{(2M-1+2l)} g^{2M}_{l-1} + + g^{2M}_\frac{1}{2} &= \frac{16^{1-M}}{M} \left( \frac{(2M-1)!}{\left[(M-1)!\right]^2} \right)^2 + + + +Maxwell's equations on the mesh +------------------------------- +When discretizing on the mesh with centered finite differences, the spatial positions of field components need to be +chosen such that a field component, whose **temporal derivative** is +calculated on the left hand side of a Maxwell equation, is spatially positioned between the two field components whose +**spatial derivative** is evaluated on the right hand side of the respective Maxwell equation. +In this way, the spatial points where a left hand side temporal derivative of a field is evaluate lies exactly at the +position where the spatial derivative of the right hand side fields is calculated. +The following image visualizes the arrangement of field components in PIConGPU. + +.. image:: media/Yee-cell.png + :width: 400 + :alt: Yee cell in PIConGPU + +Component-wise and using second order finite differences for the derivative approximation, Maxwell's equations read in +PIConGPU + +.. math:: + + \frac{E_x\rvert_{i+1/2, j, k}^{n+1} - E_x\rvert_{i+1/2, j, k}^{n}}{c^2 \Delta t} =& + \frac{B_z\rvert_{i+1/2, j+1/2, k}^{n+1/2} - B_z\rvert_{i+1/2, j-1/2, k}^{n+1/2}}{\Delta y} + + & - \frac{B_y\rvert_{i+1/2, j, k+1/2}^{n+1/2} - B_y\rvert_{i+1/2, j, k-1/2}^{n+1/2}}{\Delta z} + - \mu_0 J_x\rvert_{i+1/2, j, k}^{n+1/2} + + \frac{E_y\rvert_{i, j+1/2, k}^{n+1} - E_y\rvert_{i, j+1/2, k}^{n}}{c^2 \Delta t} =& + \frac{B_x\rvert_{i, j+1/2, k+1/2}^{n+1/2} - B_x\rvert_{i, j, k-1/2}^{n+1/2}}{\Delta z} + + & - \frac{B_z\rvert_{i+1/2, j+1/2, k}^{n+1/2} - B_z\rvert_{i-1/2, j+1/2, k}^{n+1/2}}{\Delta x} + - \mu_0 J_y\rvert_{i, j+1/2, k}^{n+1/2} + + \frac{E_z\rvert_{i, j, k+1/2}^{n+1} - E_z\rvert_{i, j, k+1/2}^{n}}{c^2 \Delta t} =& + \frac{B_y\rvert_{i+1/2, j, k+1/2}^{n+1/2} - B_y\rvert_{i-1/2, j, k+1/2}^{n+1/2}}{\Delta x} + + & - \frac{B_x\rvert_{i, j+1/2, k+1/2}^{n+1/2} - B_x\rvert_{i, j-1/2, k+1/2}^{n+1/2}}{\Delta y} + - \mu_0 J_z\rvert_{i+1/2, j+1/2, k}^{n+1/2} + + \frac{B_x\rvert_{i, j+1/2, k+1/2}^{n+3/2} - B_x\rvert_{i, j+1/2, k+1/2}^{n+1/2}}{\Delta t} =& + \frac{E_y\rvert_{i, j+1/2, k+1}^{n+1} - E_y\rvert_{i, j+1/2, k}^{n+1}}{\Delta z} + - \frac{E_z\rvert_{i, j+1, k+1/2}^{n+1} - E_z\rvert_{i, j, k+1/2}^{n+1}}{\Delta y} + + \frac{B_y\rvert_{i+1/2, j, k+1/2}^{n+3/2} - B_y\rvert_{i+1/2, j, k+1/2}^{n+1/2}}{\Delta t} =& + \frac{E_z\rvert_{i+1, j, k+1/2}^{n+1} - E_z\rvert_{i, j, k+1/2}^{n+1}}{\Delta x} + - \frac{E_x\rvert_{i+1/2, j, k+1}^{n+1} - E_x\rvert_{i+1/2, j, k}^{n+1}}{\Delta z} + + \frac{B_z\rvert_{i+1/2, j+1/2, k}^{n+3/2} - B_z\rvert_{i+1/2, j+1/2, k}^{n+1/2}}{\Delta t} =& + \frac{E_x\rvert_{i+1/2, j+1, k}^{n+1} - E_x\rvert_{i+1/2, j, k}^{n+1}}{\Delta y} + - \frac{E_y\rvert_{i+1, j+1/2, k}^{n+1} - E_y\rvert_{i, j+1/2, k}^{n+1}}{\Delta x} + +As can be seen from these equations, the components of the source current are located at the respective components of +the electric field. +Following Gauss's law, the charge density is located at the cell corner. + +Using Esirkepov's notation for the discretized differential operators, + +.. math:: + + \nabla^+ u_{i,j,k} &= \left( \frac{u_{i+1,j,k} - u_{i,j,k}}{\Delta x}, + \frac{u_{i,j+1,k} - u_{i,j,k}}{\Delta y} + \frac{u_{i,j,k+1} - u_{i,j,k}}{\Delta z} + \right) + + \nabla^- u_{i,j,k} &= \left( \frac{u_{i,j,k} - u_{i-1,j,k}}{\Delta x}, + \frac{u_{i,j,k} - u_{i,j-1,k}}{\Delta y} + \frac{u_{i,j,k} - u_{i,j,k-1}}{\Delta z} + \right)\,, + +the shorthand notation for the discretized Maxwell equations in PIConGPU reads + +.. math:: + + \frac{\vec E\rvert^{n+1} - \vec E\rvert^{n}}{c^2 \Delta t} &= + \nabla^- \times \vec B\rvert^{n+1/2} - \mu_0 \vec J\rvert^{n+1/2} + + \frac{\vec B\rvert^{n+3/2} - \vec B\rvert^{n+1/2}}{\Delta t} &= - \nabla^+ \times \vec E\rvert^{n+1} + + \nabla^- \cdot \vec E\rvert^{n+1} &= \rho\rvert^{n+1} + + \nabla^+ \cdot \vec B\rvert^{n+3/2} &= 0\,, + +with initial conditions + +.. math:: + + \nabla^- \cdot \vec E &= 0 + + \nabla^+ \cdot \vec B &= 0\,. + +The components :math:`E_x\rvert_{1/2, 0, 0}=E_y\rvert_{0, 1/2, 0}=E_z\rvert_{0, 0, 1/2} +=B_x\rvert_{I, J+1/2, K+1/2}=B_y\rvert_{I+1/2, J, K+1/2}=B_z\rvert_{I+1/2, J+1/2, K}=0` for all times when using +absorbing boundary conditions. +Here, :math:`I,J,K` are the maximum values of :math:`i,j,k` defining the total mesh size. + +Note, in PIConGPU the :math:`\vec B`-field update is split in two updates of half the time step, e.g. + +.. math:: + + \frac{B_x\rvert_{i, j+1/2, k+1/2}^{n+1} - B_x\rvert_{i, j+1/2, k+1/2}^{n+1/2}}{\Delta t / 2} = + \frac{E_y\rvert_{i, j+1/2, k+1}^{n+1} - E_y\rvert_{i, j+1/2, k}^{n+1}}{\Delta z} + - \frac{E_z\rvert_{i, j+1, k+1/2}^{n+1} - E_z\rvert_{i, j, k+1/2}^{n+1}}{\Delta y} + +and + +.. math:: + + \frac{B_x\rvert_{i, j+1/2, k+1/2}^{n+3/2} - B_x\rvert_{i, j+1/2, k+1/2}^{n+1}}{\Delta t / 2} = + \frac{E_y\rvert_{i, j+1/2, k+1}^{n+1} - E_y\rvert_{i, j+1/2, k}^{n+1}}{\Delta z} + - \frac{E_z\rvert_{i, j+1, k+1/2}^{n+1} - E_z\rvert_{i, j, k+1/2}^{n+1}}{\Delta y} + +for the :math:`B_x` component, where the second half of the update is performed at the beginning of the next time step +such that the electric and magnetic field are known at equal time in the particle pusher and at the end of a time step. + + +Dispersion relation of light waves on a mesh +-------------------------------------------- +The dispersion relation of a wave relates its oscillation period in time :math:`T` to its oscillation wavelength +:math:`\lambda`, i.e. its angular frequency :math:`\omega = \frac{2\pi}{T}` to wave vector +:math:`\vec k = \frac{2\pi}{\lambda} \vec e_k`. +For an electromagnetic wave in vacuum, + +.. math:: + + \left[ \frac{\omega}{c} \right]^2 = k_x^2 + k_y^2 + k_z^2\,. + +However, on a 2D mesh, with arbitrary order finite differences for the spatial derivatives, the dispersion relation +becomes + +.. math:: + + \left[ \frac{1}{c\Delta t} \sin\left(\frac{\omega \Delta t}{2} \right) \right]^2 = + \sum\limits_{l=1/2}^{M - 1/2} \sum\limits_{p=1/2}^{M - 1/2} g_l^{2M} g_p^{2M} + \left\lbrace + \frac{\sin(\tilde k_x l \Delta x)\sin(\tilde k_x p \Delta x)}{\Delta x^2} + + \frac{\sin(\tilde k_y l \Delta y)\sin(\tilde k_y p \Delta y)}{\Delta y^2} + \right\rbrace\,, + + +where :math:`\tilde k_x` and :math:`\tilde k_y` are the wave vector components on the mesh in :math:`x` and :math:`y` +direction, respectively. +As is obvious from the relation, the numerical wave vector will be different from the real world wave vector for a given +frequency :math:`\omega` due to discretization. + + +Dispersion Relation for Yee's Method +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Yee's Method [Yee1966]_ uses second order finite differences for the approximation of spatial derivatives. +The corresponding dispersion relation reads + +.. math:: + + \left[ \frac{1}{c\Delta t} \sin\left(\frac{\omega \Delta t}{2}\right) \right]^2 = + \left[ + \frac{1}{\Delta x} \sin\left(\frac{\tilde k_x \Delta x}{2}\right) + \right]^2 + + \left[ + \frac{1}{\Delta y} \sin\left(\frac{\tilde k_y \Delta y}{2}\right) + \right]^2\,. + +Solving for a wave's numerical frequency :math:`\omega` in dependence on its wave vector +:math:`\vec{\tilde k} = (\tilde k\cos\phi, \tilde k\sin\phi)`, where the angle :math:`\phi` is enclosed by the mesh's +:math:`x`-axis and the wave's propagation direction, + +.. math:: + + \omega = \frac{2}{\Delta t} \arcsin \xi\,\text{, where } \xi_\mathrm{max} = c\Delta t + \sqrt{ \frac{1}{\Delta x^2} + \frac{1}{\Delta y^2} + \frac{1}{\Delta z^2}} \quad \text{(in 3D)} + +reveals two important properties of the field solver. +(The 2D version is obtained by letting :math:`\Delta z \rightarrow \infty`.) + +First, only within the range :math:`\xi_\mathrm{max} \leq 1` the field solver operates stable. +This gives the *Courant-Friedrichs-Lewy* stability condition relating time step to mesh spacing + +.. math:: + + c\Delta t < \frac{1}{\sqrt{ \frac{1}{\Delta x^2} + \frac{1}{\Delta y^2} + \frac{1}{\Delta z^2} }} \quad \text{(in 3D)} + +Typically, :math:`\xi_\mathrm{max} = 0.995` is chosen. +Outside this stability region, the frequency :math:`\omega` corresponding to a certain wave vector becomes imaginary, +meaning that wave amplitudes can be nonphysical exponentially amplified [Taflove2005]_. + +Second, there exists a purely numerical anisotropy in a wave's phase velocity :math:`\tilde v_p = \omega / \tilde k` +(speed of electromagnetic wave propagation) depending on its propagation direction :math:`\phi`, as depicted in the following figure + +.. image:: media/dispersion-relation_Yee.png + :width: 400 + :alt: Velocity anisotropy for Yee + +assuming square cells :math:`\Delta x = \Delta y = \Delta` and where :math:`S=c\Delta t / \Delta`, +:math:`N_\lambda=\lambda/\Delta`. +That is, for the chosen sampling of three samples per wavelength :math:`\lambda`, the phase velocities along a cell +edge and a cell diagonal differ by approximately 20%. +The velocity error is largest for propagation along the edge. +The phase velocity error can be significantly reduced by increasing the sampling, as visualized in the following figure +by the scaling of the velocity error with wavelength sampling for propagation along the cell edge + +.. image:: media/dispersion-relation_Yee_sampling.png + :width: 400 + :alt: Dispersion for Yee + +Another conclusion from this figure is, that a short-pulse laser with a large bandwidth will suffer from severe +dispersion if the sampling is bad. +In the extreme case where a wavelength is not even sampled twice on the mesh, its field is exponentially damped +[Taflove2005]_. + +Given that most simulations employ short-pulse lasers propagating along the :math:`y`-axis and featuring a large bandwidth, +the resolution of the laser wavelength should be a lot better than in the example, e.g. :math:`N_\lambda=24`, to reduce +errors due to numerical dispersion. + +Note, the reduced phase velocity of light can further cause the emission of numerical Cherenkov radiation by fast charged +particles in the simulation [Lehe2012]_. +The largest emitted wavelength equals the wavelength whose phase velocity is as slow as the particle's velocity, provided +it is resolved at least twice on the mesh. + + +Dispersion Relation for Arbitrary Order Finite Differences +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Using higher order finite differences for the approximation of spatial derivatives significantly improves the +dispersion properties of the solver. +Most notably, the velocity anisotropy reduces and the dependence of phase velocity on sampling reduces, too. +Yet higher order solvers still feature dispersion. +As shown in the following picture, its effect is, however, not reduction of phase velocity but increase of phase velocity +beyond the physical vacuum speed of light. +But this can be tweaked by reducing the time step relative to the limit set by the stability criterion. + +.. image:: media/dispersion-relation_AOFDTD_3.png + :width: 400 + :alt: Velocity anisotropy for AOFDTD + +.. image:: media/dispersion-relation_AOFDTD_sampling.png + :width: 400 + :alt: Dispersion for AOFDTD + +Note, it is generally not a good idea to reduce the time step in Yee's method significantly below the stability +criterion as this increases the absolute phase velocity error. +See the following figure, + +.. image:: media/dispersion-relation_AOFDTD_Courant-factor.png + :width: 400 + :alt: Scaling of velocity error with Courant factor for diagonal propagation + +from which the optimum Courant factor :math:`S=c\Delta t / \Delta` can be read off for a 2D, square mesh, too. + +An important conclusion from the above figures showing velocity error over sampling is, that +a higher order solver, with a larger mesh spacing and a smaller time step than given by the above stability limit, +produces physically more accurate results than the standard Yee solver operating with smaller mesh spacing and a +time step close to the stability limit. + +That is, it can be beneficial not only in terms of **physical accuracy**, but also in terms of **memory complexity** +and **time to solution**, to chose a higher order solver with lower spatial resolution and increased time sampling +relative to the stability limit. +Memory complexity scales with number of cells :math:`N_\mathrm{cells}` required to sample a given volume +:math:`N_\mathrm{cells}^d`, where :math:`d=2,3` is the dimension of the simulation domain, +which decreases for larger cells. +Time to solution scales with the time step and this can be larger with solvers of higher order compared to the Yee solver +with comparable dispersion properties (which requires a smaller cell size than the arbitrary order solver) +since the time step is limited by the stability condition which scales with cell size. +Since the cell size can be larger for arbitrary order solvers, the respective time step limit given by the stability +condition is larger and operating with a time step ten times smaller than the limit might still result in a larger +step than those of the comparable Yee solver. +Finally, physical accuracy is increased by the reduction of the impact of dispersion effects. + + +Usage +----- +The field solver can be chosen and configured in :ref:`fieldSolver.param `. + + +References +---------- +.. [Esirkepov2001] + T.Zh. Esirkepov, + *Exact charge conservation scheme for particle-in-cell simulation with an arbitrary form-factor*, + Computer Physics Communications 135.2 (2001): 144-153, + https://doi.org/10.1016/S0010-4655(00)00228-9 + +.. [Ghrist2000] + M. Ghrist, + *High-Order Finite Difference Methods for Wave Equations*, + PhD thesis (2000), + Department of Applied Mathematics, University of Colorado + +.. [Lehe2012] + R. Lehe et al. + *Numerical growth of emittance in simulations of laser-wakefield acceleration*, + Physical Review Special Topics-Accelerators and Beams 16.2 (2013): 021301. + +.. [Taflove2005] + A. Taflove + *Computational electrodynamics: the finite-difference time-domain method* + Artech house (2005) + +.. [Yee1966] + K.S. Yee, + *Numerical solution of initial boundary value problems involving Maxwell's equations in isotropic media*, + IEEE Trans. Antennas Propagat. 14, 302-307 (1966) diff --git a/docs/source/models/field_ionization.rst b/docs/source/models/field_ionization.rst index 74657e5ba1..ac52a93eb5 100644 --- a/docs/source/models/field_ionization.rst +++ b/docs/source/models/field_ionization.rst @@ -56,6 +56,24 @@ Overview: Implemented Models Models marked with "(R&D)" are under *research and development* and should be used with care. +Ionization Current +------------------ + +In order to conserve energy, PIConGPU supports an ionization current to decrease the electric field according to the amount of energy lost to field ioniztion processes. +The current for a single ion is + +.. math:: + + \mathbf{J}_\mathrm{ion} = E_\mathrm{ion} \frac{\mathbf{E}}{|\mathbf{E}|^2 \Delta t V_\mathrm{cell}} + +It is assigned to the grid according to the macroparticle shape. +:math:`E_\mathrm{ion}` is the energy required to ionize the atom/ion, :math:`\mathbf{E}` is the electric field at the particle position and :math:`V_\mathrm{cell}` is the cell volume. +This formula makes the assumption that the ejection energy of the electron is zero. +See [Mulser]_. +The ionization current is accessible in :ref:`speciesDefinition.param `. To activate ionization current, set the second template of the ionization model to particles::ionization::current::EnergyConservation. +By default the ionization current is deactivated. + + Usage ----- @@ -200,3 +218,9 @@ References *Atomic Screening Constant from SCF Functions. II. Atoms with 37 to 86 Electrons*, The Journal of Chemical Physics 47, 1300-1307 (1967) https://dx.doi.org/10.1063/1.1712084 + +.. [Mulser] + P. Mulser et al. + *Modeling field ionization in an energy conserving form and resulting nonstandard fluid dynamcis*, + Physics of Plasmas 5, 4466 (1998) + https://doi.org/10.1063/1.873184 diff --git a/docs/source/models/field_ionization_charge_state_prediction.py b/docs/source/models/field_ionization_charge_state_prediction.py index fb32207e9a..7e572948e6 100644 --- a/docs/source/models/field_ionization_charge_state_prediction.py +++ b/docs/source/models/field_ionization_charge_state_prediction.py @@ -1,7 +1,7 @@ """Ionization prediction module and example. This file is part of the PIConGPU. -Copyright 2019-2020 PIConGPU contributors +Copyright 2019-2021 PIConGPU contributors Authors: Marco Garten License: GPLv3+ """ diff --git a/docs/source/models/media/Yee-cell.png b/docs/source/models/media/Yee-cell.png new file mode 100644 index 0000000000..fc63f4a9c8 Binary files /dev/null and b/docs/source/models/media/Yee-cell.png differ diff --git a/docs/source/models/media/Yee-cell.svg b/docs/source/models/media/Yee-cell.svg new file mode 100644 index 0000000000..9e5fb4d8fd --- /dev/null +++ b/docs/source/models/media/Yee-cell.svg @@ -0,0 +1,922 @@ + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + Ex + + + + (i,j,k+1) + + + + + Ey + + + + + + + (i,j,k) + + + + + + + + x + y + z + + + + + + + + + + + + Bz + + + + By + + + + + + + + Ey + + + Ex + + + + + + + + Ez + + + + Bx + + (i,j+1,k) + + + + + + + + + + + + + + + Ey + + + + Ey + + + + + + + + + + + + + + + + Ez + + + + Ez + + + + Ez + + + + + + + + + + + + + + + Ex + + + Ex + + + + + + + Bx + + + + + + + + + + + + By + + + + + + + + Bz + + + + + + (i+1,j,k) + + diff --git a/docs/source/models/media/dispersion-relation_AOFDTD_3.png b/docs/source/models/media/dispersion-relation_AOFDTD_3.png new file mode 100644 index 0000000000..4eaf9793f0 Binary files /dev/null and b/docs/source/models/media/dispersion-relation_AOFDTD_3.png differ diff --git a/docs/source/models/media/dispersion-relation_AOFDTD_Courant-factor.png b/docs/source/models/media/dispersion-relation_AOFDTD_Courant-factor.png new file mode 100644 index 0000000000..53ab333781 Binary files /dev/null and b/docs/source/models/media/dispersion-relation_AOFDTD_Courant-factor.png differ diff --git a/docs/source/models/media/dispersion-relation_AOFDTD_sampling.png b/docs/source/models/media/dispersion-relation_AOFDTD_sampling.png new file mode 100644 index 0000000000..663e9508a0 Binary files /dev/null and b/docs/source/models/media/dispersion-relation_AOFDTD_sampling.png differ diff --git a/docs/source/models/media/dispersion-relation_Yee.png b/docs/source/models/media/dispersion-relation_Yee.png new file mode 100755 index 0000000000..f66ac4b09f Binary files /dev/null and b/docs/source/models/media/dispersion-relation_Yee.png differ diff --git a/docs/source/models/media/dispersion-relation_Yee_sampling.png b/docs/source/models/media/dispersion-relation_Yee_sampling.png new file mode 100644 index 0000000000..72db9c3e91 Binary files /dev/null and b/docs/source/models/media/dispersion-relation_Yee_sampling.png differ diff --git a/docs/source/models/pic.rst b/docs/source/models/pic.rst index 6b94b986d1..7792d6fdbe 100644 --- a/docs/source/models/pic.rst +++ b/docs/source/models/pic.rst @@ -3,7 +3,7 @@ The Particle-in-Cell Algorithm ============================== -.. sectionauthor:: Axel Huebl +.. sectionauthor:: Axel Huebl, Klaus Steiniger Please also refer to the textbooks [BirdsallLangdon]_, [HockneyEastwood]_, our :ref:`latest paper on PIConGPU ` and the works in [Huebl2014]_ and [Huebl2019]_ . @@ -13,15 +13,15 @@ System of Equations .. math:: \nabla \cdot \mathbf{E} &= \frac{1}{\varepsilon_0}\sum_s \rho_s - + \nabla \cdot \mathbf{B} &= 0 - + \nabla \times \mathbf{E} &= -\frac{\partial \mathbf{B}} {\partial t} - + \nabla \times \mathbf{B} &= \mu_0\left(\sum_s \mathbf{J}_s + \varepsilon_0 \frac{\partial \mathbf{E}} {\partial t} \right) - + for multiple particle species :math:`s`. -:math:`\mathbf{E}(t)` represents the electic, :math:`\mathbf{B}(t)` the magnetic, :math:`\rho_s` the charge density and :math:`\mathbf{J}_s(t)` the current density field. +:math:`\mathbf{E}(t)` represents the electric, :math:`\mathbf{B}(t)` the magnetic, :math:`\rho_s` the charge density and :math:`\mathbf{J}_s(t)` the current density field. Except for normalization of constants, PIConGPU implements the governing equations in SI units. @@ -61,22 +61,15 @@ Electro-Magnetic PIC Method --------------------------- **Fields** such as :math:`\mathbf{E}(t), \mathbf{B}(t)` and :math:`\mathbf{J}(t)` are discretized on a regular mesh in Eulerian frame of reference (see [EulerLagrangeFrameOfReference]_). +See :ref:`section Finite-Difference Time-Domain Method ` describing how Maxwell's equations are discretized on a mesh in PIConGPU. The distribution of **Particles** is described by the distribution function :math:`f_s(\mathbf{x},\mathbf{v},t)`. -This distribution function is sampled by *markers* (commonly referred to as *macro-particles*). +This distribution function is sampled by *markers*, which are commonly referred to as *macroparticles*. +These markers represent blobs of incompressible phase fluid moving in phase space. The temporal evolution of the distribution function is simulated by advancing the markers over time according to the Vlasov--Maxwell--Equation in Lagrangian frame (see eq. :eq:`VlasovMaxwell` and [EulerLagrangeFrameOfReference]_). - -Markers carry a spatial shape of order :math:`n` and a delta-distribution in momentum space. -In most cases, these shapes are implemented as B-splines and are pre-integrated to *assignment functions* :math:`S` of the form: - -.. math:: - - S^0(x) = \big\{ \substack{1 \qquad \text{if}~0 \le x \lt 1\\ 0 \qquad \text{else}} - - S^n(x) = \left(S^{n-1} * S^0\right)(x) = \int_{x-1}^x S^{n-1}(\xi) d\xi - -PIConGPU implements these up to order :math:`n=4`. -The three dimensional marker shape is a multiplicative union of B-splines :math:`S^n(x,y,z) = S^n(x) S^n(y) S^n(z)`. +A marker has a finite-size and a velocity, such that it can be regarded as a cloud of particles, whose center of mass is the marker's position and whose mean velocity is the marker's velocity. +The cloud shape :math:`S^n(x)` of order :math:`n` of a marker describes its charge density distribution. +See :ref:`section Hierarchy of Charge Assignment Schemes ` for a list of available marker shapes in PIConGPU. References ---------- diff --git a/docs/source/models/shapes.rst b/docs/source/models/shapes.rst new file mode 100644 index 0000000000..8210020f0e --- /dev/null +++ b/docs/source/models/shapes.rst @@ -0,0 +1,63 @@ +.. _model-shapes: + +Hierarchy of Charge Assignment Schemes +====================================== + +.. sectionauthor:: Klaus Steiniger + +In PIConGPU, the cloud shapes :math:`S^n(x)` are pre-integrated to *assignment functions* :math:`W^n(x)`. + +.. math:: + W^n(x) = \Pi(x) \ast S^n(x) = \int\limits_{-\infty}^{+\infty} \Pi(x^\prime) S^n(x^\prime - x) dx^\prime\,, \text{ where } + \Pi(x) = \left\{\begin{array}{ll} + 0 & |x| \gt \frac{1}{2} \\ + \frac{1}{2} & |x| = \frac{1}{2} \\ + 1 & |x| \lt \frac{1}{2} + \end{array}\right. + +is the top-hat function and :math:`\ast` the convolution. + +Evaluating the assignment functions at mesh points directly provides the fraction of charge from the marker assigned to that point. + +The assignment functions are implemented as B-splines. +The zeroth order assignment function :math:`W^0` is the top-hat function :math:`\Pi`. +It represents charge assignment to the nearest mesh point only, resulting in a stepwise charge density distribution. +Therefore, it should not be used. +The assignment function of order :math:`n` is generated by convolution of the assignment function of order :math:`n-1` with the top-hat function + +.. math:: + W^n(x) = W^{n-1}(x) \ast \Pi(x) = \int\limits_{-\infty}^{+\infty} W^{n-1}(x^\prime) \Pi(x^\prime - x) dx^\prime\,. + +The three dimensional assignment function is a multiplicative union of B-splines :math:`W^n(x,y,z) = W^n(x) W^n(y) W^n(z)`. + +PIConGPU implements these up to order :math:`n=4`. +The naming scheme follows [HockneyEastwood]_, tab. 5-1, p. 144, where the name of a scheme +is defined by the visual form of its cloud shape :math:`S`. + + +.. table:: + :widths: auto + :name: assignment_schemes_hierarchy + + +---------------------------------------+-------+----------------------------+ + | Scheme | Order | Assignment function | + +=======================================+=======+============================+ + | NGP (nearest-grid-point) | 0 | stepwise | + +---------------------------------------+-------+----------------------------+ + | CIC (cloud-in-cell) | 1 | piecewise linear spline | + +---------------------------------------+-------+----------------------------+ + | TSC (triangular shaped cloud) | 2 | piecewise quadratic spline | + +---------------------------------------+-------+----------------------------+ + | PQS (piecewise quadratic cloud shape) | 3 | piecewise cubic spline | + +---------------------------------------+-------+----------------------------+ + | PCS (piecewise cubic cloud shape) | 4 | piecewise quartic spline | + +---------------------------------------+-------+----------------------------+ + +References +---------- + +.. [HockneyEastwood] + R.W. Hockney, J.W. Eastwood. + *Computer Simulation Using Particles*, + CRC Press (1988), + ISBN 0-85274-392-0 diff --git a/docs/source/prgpatterns/lockstep.rst b/docs/source/prgpatterns/lockstep.rst index 8b97a818fd..d1f4ed4473 100644 --- a/docs/source/prgpatterns/lockstep.rst +++ b/docs/source/prgpatterns/lockstep.rst @@ -45,7 +45,7 @@ Collective Loop // `frame` is a list which must be traversed collectively while( frame.isValid() ) { - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx( acc ).x; using ParticleDomCfg = IdxConfig< frameSize, numWorker @@ -67,7 +67,7 @@ Non-Collective Loop .. code-block:: cpp - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx( acc ).x; using ParticleDomCfg = IdxConfig< frameSize, numWorkers @@ -91,7 +91,7 @@ Create a Context Variable .. code-block:: cpp - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx( acc ).x; using ParticleDomCfg = IdxConfig< frameSize, numWorkers @@ -128,7 +128,7 @@ Using a Master Worker bool ); - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx( acc ).x; ForEachIdx< IdxConfig< 1, @@ -150,4 +150,4 @@ Using a Master Worker /* important: synchronize now, in case upcoming operations (with * other workers) access that manipulated shared memory section */ - __syncthreads(); + cupla::__syncthreads( acc ); diff --git a/docs/source/usage/param/core.rst b/docs/source/usage/param/core.rst index 126fde70f0..97d57ef8db 100644 --- a/docs/source/usage/param/core.rst +++ b/docs/source/usage/param/core.rst @@ -106,6 +106,8 @@ species.param :path: include/picongpu/param/species.param :no-link: +:ref:`Current solver details `. + speciesDefinition.param ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/source/usage/param/particles/current.rst b/docs/source/usage/param/particles/current.rst new file mode 100644 index 0000000000..c957dc143a --- /dev/null +++ b/docs/source/usage/param/particles/current.rst @@ -0,0 +1,62 @@ +.. _usage-params-core-currentdeposition: + +Current Deposition +"""""""""""""""""" + +The current solver can be set in :ref:`species.param ` or directly per species :ref:`speciesDefinition.param `. + +.. _usage-params-core-particles-currentsolver: + +Current Solver +'''''''''''''' + +Esirkepov +~~~~~~~~~ + +.. doxygenstruct:: picongpu::currentSolver::Esirkepov + :project: PIConGPU + +EmZ +~~~ + +.. doxygenstruct:: picongpu::currentSolver::EmZ + :project: PIConGPU + +VillaBune +~~~~~~~~~ + +.. doxygenstruct:: picongpu::currentSolver::VillaBune + :project: PIConGPU + +EsirkepovNative +~~~~~~~~~~~~~~~ + +.. doxygenstruct:: picongpu::currentSolver::EsirkepovNative + :project: PIConGPU + + +.. _usage-params-core-particles-depositionstrategy: + +Deposition Strategy +''''''''''''''''''' + +A current solver supports a strategy to change how the algorithm behaves on different compute architectures. +The strategy is optional, could affect performance. + +StridedCachedSupercells +~~~~~~~~~~~~~~~~~~~~~~~ + +.. doxygenstruct:: picongpu::currentSolver::strategy::StridedCachedSupercells + :project: PIConGPU + +CachedSupercells +~~~~~~~~~~~~~~~~ + +.. doxygenstruct:: picongpu::currentSolver::strategy::CachedSupercells + :project: PIConGPU + +NonCachedSupercells +~~~~~~~~~~~~~~~~~~~ + +.. doxygenstruct:: picongpu::currentSolver::strategy::NonCachedSupercells + :project: PIConGPU diff --git a/docs/source/usage/param/plugins.rst b/docs/source/usage/param/plugins.rst index 289f5b9960..0eb0365a64 100644 --- a/docs/source/usage/param/plugins.rst +++ b/docs/source/usage/param/plugins.rst @@ -68,7 +68,7 @@ pngColorScales.param :no-link: transitionRadiation.param -^^^^^^^^^^^^^^^^^^^^ +^^^^^^^^^^^^^^^^^^^^^^^^^ .. doxygenfile:: transitionRadiation.param :project: PIConGPU diff --git a/docs/source/usage/plugins.rst b/docs/source/usage/plugins.rst index 86c1be2a66..2ed481b84d 100644 --- a/docs/source/usage/plugins.rst +++ b/docs/source/usage/plugins.rst @@ -7,6 +7,7 @@ Plugins Plugin name short description ==================================================================================== ================================================================================= :ref:`ADIOS ` [#f2]_ [#f7]_ stores simulation data as openPMD flavoured ADIOS files [Huebl2017]_ +:ref:`openPMD ` [#f2]_ [#f7]_ outputs simulation data via the openPMD API :ref:`energy histogram ` [#f7]_ energy histograms for electrons and ions :ref:`charge conservation ` [#f6]_ maximum difference between electron charge density and div E :ref:`checkpoint ` [#f2]_ stores the primary data of the simulation for restarts. @@ -14,7 +15,6 @@ Plugin name :ref:`count per supercell ` [#f3]_ count macro particles *per supercell* :ref:`energy fields ` electromagnetic field energy per time step :ref:`energy particles ` [#f7]_ kinetic and total energies summed over all electrons and/or ions -:ref:`HDF5 ` [#f2]_ [#f7]_ stores simulation data as openPMD flavoured HDF5 files [Huebl2017]_ :ref:`ISAAC ` interactive 3D live visualization [Matthes2016]_ :ref:`intensity ` [#f1]_ [#f5]_ [#f6]_ maximum and integrated electric field along the y-direction :ref:`particle calorimeter ` [#f3]_ [#f4]_ [#f7]_ spatially resolved, particle energy detector in infinite distance @@ -28,6 +28,7 @@ Plugin name :ref:`slice field printer ` [#f5]_ print out a slice of the electric and/or magnetic and/or current field :ref:`sum currents ` compute the total current summed over all cells :ref:`transitionRadiation ` compute emitted electromagnetic spectra +:ref:`xrayScattering ` compute SAXS scattering amplitude ( based on `FieldTmp` species density ) ==================================================================================== ================================================================================= .. rubric:: Footnotes @@ -123,7 +124,7 @@ If you would like to help in developing those classes for a plugin of your choic `DOI:10.1016/j.nima.2013.10.073 `_ .. [Pausch2018] - R. Pausch, A. Debus, A. Huebl, U. Schramma, K. Steiniger, R. Widera, and M. Bussmann. + R. Pausch, A. Debus, A. Huebl, U. Schramm, K. Steiniger, R. Widera, and M. Bussmann. *Quantitatively consistent computation of coherent and incoherent radiation in particle-in-cell codes - a general form factor formalism for macro-particles*, Nuclear Instruments and Methods in Physics Research Section A: Accelerators, Spectrometers, Detectors and Associated Equipment 909, pp. 419-422 (2018) `arXiv:1802.03972 `_, `DOI:10.1016/j.nima.2018.02.020 `_ diff --git a/docs/source/usage/plugins/chargeConservation.rst b/docs/source/usage/plugins/chargeConservation.rst index 4ba55db43b..cf5f4b698c 100644 --- a/docs/source/usage/plugins/chargeConservation.rst +++ b/docs/source/usage/plugins/chargeConservation.rst @@ -10,7 +10,6 @@ The maximum deviation value multiplied by the cell's volume is printed. .. attention:: This plugin assumes a Yee-like divergence E stencil! - Do not use it together with other field solvers like *directional splitting* (for the *Lehe* solver it is still correct). .cfg file ^^^^^^^^^ diff --git a/docs/source/usage/plugins/checkpoint.rst b/docs/source/usage/plugins/checkpoint.rst index b5567bd8c7..d5a4a1be07 100644 --- a/docs/source/usage/plugins/checkpoint.rst +++ b/docs/source/usage/plugins/checkpoint.rst @@ -21,18 +21,18 @@ What is the format of the created files? We write our fields and particles in an open markup called :ref:`openPMD `. -For further details, see the according sections in :ref:`HDF5 ` and :ref:`ADIOS `. +For further details, see the according sections in :ref:`the openPMD API ` and :ref:`ADIOS `. External Dependencies ^^^^^^^^^^^^^^^^^^^^^ -The plugin is available as soon as the :ref:`libSplash (HDF5) or ADIOS libraries ` are compiled in. +The plugin is available as soon as the :ref:`openPMD API or ADIOS libraries ` are compiled in. .cfg file ^^^^^^^^^ You can use ``--checkpoint.period`` to specify the output period of the created checkpoints. -Note that this plugin will only be available if libSplash (HDF5) or ADIOS is found during compile configuration. +Note that this plugin will only be available if the openPMD API, libSplash (HDF5) or ADIOS is found during compile configuration. ============================================= ====================================================================================== PIConGPU command line option Description @@ -59,9 +59,9 @@ PIConGPU command line option Description ``--checkpoint..*`` Additional options to control the IO-backend ============================================= ====================================================================================== -Depending on the available external dependencies (see above), the options for the ```` are: +Depending on the available external dependencies (see above), the options for the `` are: -* :ref:`hdf5 ` +* :ref:`openPMD ` * :ref:`adios ` (keep in mind the :ref:`note on meta-files ` for restarts) Interacting Manually with Checkpoint Data diff --git a/docs/source/usage/plugins/energyHistogram.rst b/docs/source/usage/plugins/energyHistogram.rst index 5e1b56b60b..5d816250d4 100644 --- a/docs/source/usage/plugins/energyHistogram.rst +++ b/docs/source/usage/plugins/energyHistogram.rst @@ -104,13 +104,13 @@ You can quickly load and interact with the data in Python with: eh_data.get_times(species='e') # load data for a given iteration - counts, bins_keV = eh_data.get('e', species_filter='all', iteration=2000) - - # load data for a given time - counts, bins_keV = eh_data.get('e', species_filter='all', time=1.3900e-14) + counts, bins_keV, _, _ = eh_data.get(species='e', species_filter='all', iteration=2000) # get data for multiple iterations - d, bins, iteration, dt = eh_data.get(species='e', iteration=[200, 400, 8000]) + counts, bins_keV, iteration, dt = eh_data.get(species='e', iteration=[200, 400, 8000]) + + # load data for a given time + counts, bins_keV, iteration, dt = eh_data.get(species='e', species_filter='all', time=1.3900e-14) Matplotlib Visualizer diff --git a/docs/source/usage/plugins/hdf5.rst b/docs/source/usage/plugins/hdf5.rst deleted file mode 100644 index 2d06dc3af4..0000000000 --- a/docs/source/usage/plugins/hdf5.rst +++ /dev/null @@ -1,102 +0,0 @@ -.. _usage-plugins-HDF5: - -HDF5 ----- - -Stores simulation data such as fields and particles along with domain information, -conversion units etc. as `HDF5 `_ files [Huebl2017]_ . -It uses `libSplash `_ for writing HDF5 data. -It is used for post-simulation analysis and for **restarts** of the simulation after a crash or an intended stop. - -What is the format of the created HDF5 files? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -We write our fields and particles in an open markup called **openPMD**. -You can investigate your files via a large collection of `tools and frameworks `_ or use the native HDF5 bindings of your `favorite programming language `_. - -**Resources for a quick-start:** - -* `online tutorial `_ -* `example files `_ -* `written standard `_ of the openPMD standard -* `list of projects `_ supporting openPMD files - -External Dependencies -^^^^^^^^^^^^^^^^^^^^^ - -The plugin is available as soon as the :ref:`libSplash and HDF5 libraries ` are compiled in. - -.param file -^^^^^^^^^^^ - -The corresponding ``.param`` file is :ref:`fileOutput.param `. - -One can e.g. disable the output of particles by setting: - -.. code-block:: cpp - - /* output all species */ - using FileOutputParticles = VectorAllSpecies; - /* disable */ - using FileOutputParticles = MakeSeq_t< >; - -.cfg file -^^^^^^^^^ - -You can use ``--hdf5.period`` and ``--hdf5.file`` to specify the output period and path and name of the created fileset. -For example, ``--hdf5.period 128 --hdf5.file simData --hdf5.source 'species_all'`` will write only the particle species data to files of the form ``simData_0.h5``, ``simData_128.h5`` in the default simulation output directory every 128 steps. -Note that this plugin will only be available if libSplash and HDF5 is found during compile configuration. - -============================ ==================================================================== -PIConGPU command line option Description -============================ ==================================================================== -``--hdf5.period`` Period after which simulation data should be stored on disk. -``--hdf5.file`` Relative or absolute fileset prefix for simulation data. - If relative, files are stored under ``simOutput/``. -``--hdf5.source`` Select data sources to dump. Default is ``species_all,fields_all``, - which dumps all fields and particle species. -============================ ==================================================================== - -.. note:: - - This plugin is a multi plugin. - Command line parameter can be used multiple times to create e.g. dumps with different dumping period. - In the case where a optional parameter with a default value is explicitly defined the parameter will be always passed to the instance of the multi plugin where the parameter is not set. - e.g. - - .. code-block:: bash - - --hdf5.period 128 --hdf5.file simData1 - --hdf5.period 1000 --hdf5.file simData2 --hdf5.source 'species_all' - - creates two plugins: - - #. dump **all species data** each 128th time step. - #. dump **all fields and species data** (this is the default) data each 1000th time step. - -Memory Complexity -^^^^^^^^^^^^^^^^^ - -Accelerator -""""""""""" - -no extra allocations. - -Host -"""" - -During I/O, each complete particle species is allocated one after an other. - -Additional Tools -^^^^^^^^^^^^^^^^ - -See our :ref:`openPMD ` chapter. - -References -^^^^^^^^^^ - -.. [Huebl2017] - A. Huebl, R. Widera, F. Schmitt, A. Matthes, N. Podhorszki, J.Y. Choi, S. Klasky, and M. Bussmann. - *On the Scalability of Data Reduction Techniques in Current and Upcoming HPC Systems from an Application Perspective.* - ISC High Performance Workshops 2017, LNCS 10524, pp. 15-29 (2017), - `arXiv:1706.00522 `_, `DOI:10.1007/978-3-319-67630-2_2 `_ diff --git a/docs/source/usage/plugins/openPMD.cfg b/docs/source/usage/plugins/openPMD.cfg new file mode 100644 index 0000000000..f584925c70 --- /dev/null +++ b/docs/source/usage/plugins/openPMD.cfg @@ -0,0 +1,21 @@ +TBG_openPMD="--openPMD.period 100 \ + --openPMD.file simOutput \ + --openPMD.ext bp \ + --openPMD.json '{ \ + \"adios2\": { \ + \"dataset\": { \ + \"operators\": [ \ + { \ + \"type\": \"bzip2\" \ + } \ + ] \ + }, \ + \"engine\": { \ + \"type\": \"file\", \ + \"parameters\": { \ + \"BufferGrowthFactor\": \"1.2\", \ + \"InitialBufferSize\": \"2GB\" \ + } \ + } \ + } \ + }'" \ No newline at end of file diff --git a/docs/source/usage/plugins/openPMD.rst b/docs/source/usage/plugins/openPMD.rst new file mode 100644 index 0000000000..45edf8f0ee --- /dev/null +++ b/docs/source/usage/plugins/openPMD.rst @@ -0,0 +1,152 @@ +.. _usage-plugins-openPMD: + +openPMD +------- + +Stores simulation data such as fields and particles according to the `openPMD standard `_ using the `openPMD API `_. + +External Dependencies +^^^^^^^^^^^^^^^^^^^^^ + +The plugin is available as soon as the :ref:`openPMD API ` is compiled in. +If the openPMD API is found in version 0.13.0 or greater, PIConGPU will support streaming IO via openPMD. + +.param file +^^^^^^^^^^^ + +The corresponding ``.param`` file is :ref:`fileOutput.param `. + +One can e.g. disable the output of particles by setting: + +.. code-block:: cpp + + /* output all species */ + using FileOutputParticles = VectorAllSpecies; + /* disable */ + using FileOutputParticles = MakeSeq_t< >; + +.cfg file +^^^^^^^^^ + +You can use ``--openPMD.period`` to specify the output period. +The base filename is specified via ``--openPMD.file``. +The openPMD API will parse the file name to decide the chosen backend and iteration layout: + +* The filename extension will determine the backend. +* The openPMD will either create one file encompassing all iterations (group-based iteration layout) or one file per iteration (file-based iteration layout). + The filename will be searched for a pattern describing how to derive a concrete iteration's filename. + If no such pattern is found, the group-based iteration layout will be chosen. + Please refer to the documentation of the openPMD API for further information. + +In order to set defaults for these value, two further options control the filename: + +* ``--openPMD.ext`` sets the filename extension. + Possible extensions include ``bp`` for the ADIOS backends (default), ``h5`` for HDF5 and ``sst`` for Streaming via ADIOS2/SST. + If the openPMD API has been built with support for the ADIOS1 and ADIOS2 backends, ADIOS2 will take precedence over ADIOS1. + This behavior can be overridden by setting the environment variable ``OPENPMD_BP_BACKEND=ADIOS1``. +* ``--openPMD.infix`` sets the filename pattern that controls the iteration layout, default is "_06T" for a six-digit number specifying the iteration. + Leave empty to pick group-based iteration layout. + Since passing an empty string may be tricky in some workflows, specifying ``--openPMD.infix=NULL`` is also possible. + + Note that streaming IO requires group-based iteration layout in openPMD, i.e. ``--openPMD.infix=NULL`` is mandatory. + If PIConGPU detects a streaming backend (e.g. by ``--openPMD.ext=sst``), it will automatically set ``--openPMD.infix=NULL``, overriding the user's choice. + Note however that the ADIOS2 backend can also be selected via ``--openPMD.json`` and via environment variables which PIConGPU does not check. + It is hence recommended to set ``--openPMD.infix=NULL`` explicitly. + +For example, ``--openPMD.period 128 --openPMD.file simData --openPMD.source 'species_all'`` will write only the particle species data to files of the form ``simData_000000.bp``, ``simData_000128.bp`` in the default simulation output directory every 128 steps. +Note that this plugin will only be available if the openPMD API is found during compile configuration. + +openPMD backend-specific settings may be controlled via two mechanisms: + +* Environment variables. + Please refer to the backends' documentations for information on environment variables understood by the backends. +* Backend-specific runtime parameters may be set via JSON in the openPMD API. + PIConGPU exposes this via the command line option ``--openPMD.json``. + Please refer to the openPMD API's documentation for further information. + +The JSON parameter may be passed directly as a string, or by filename. +The latter case is distinguished by prepending the filename with an at-sign ``@``. +Specifying a JSON-formatted string from within a ``.cfg`` file can be tricky due to colliding escape mechanisms. +An example for a well-escaped JSON string as part of a ``.cfg`` file is found below. + +.. literalinclude:: openPMD.cfg + +PIConGPU further defines an **extended format for JSON options** that may alternatively used in order to pass dataset-specific configurations. +For each backend ````, the backend-specific dataset configuration found under ``config[""]["dataset"]`` may take the form of a JSON list of patterns: ``[, , …]``. + +Each such pattern ```` is a JSON object with key ``cfg`` and optional key ``select``: ``{"select": , "cfg": }``. + +In here, ```` is a regex or a list of regexes, as used by POSIX ``grep -E``. +```` is a configuration that will be forwarded as-is to openPMD. + +The single patterns will be processed in top-down manner, selecting the first matching pattern found in the list. +The regexes will be matched against the openPMD dataset path within the iteration (e.g. ``E/x`` or ``particles/.*/position/.*``), considering full matches only. + +The **default configuration** is specified by omitting the ``select`` key. +Specifying more than one default is an error. +If no pattern matches a dataset, the default configuration is chosen if specified, or an empty JSON object ``{}`` otherwise. + +A full example: + +.. literalinclude:: openPMD_extended_config.json + +Two data preparation strategies are available for downloading particle data off compute devices. + +* Set ``--openPMD.dataPreparationStrategy doubleBuffer`` for use of the strategy that has been optimized for use with ADIOS-based backends. + The alias ``openPMD.dataPreparationStrategy adios`` may be used. + This strategy requires at least 2x the GPU main memory on the host side. + This is the default. +* Set ``--openPMD.dataPreparationStrategy mappedMemory`` for use of the strategy that has been optimized for use with HDF5-based backends. + This strategy has a small host-side memory footprint (<< GPU main memory). + The alias ``openPMD.dataPreparationStrategy hdf5`` may be used. + +===================================== ==================================================================================================================================================== +PIConGPU command line option description +===================================== ==================================================================================================================================================== +``--openPMD.period`` Period after which simulation data should be stored on disk. +``--openPMD.source`` Select data sources to dump. Default is ``species_all,fields_all``, which dumps all fields and particle species. +``--openPMD.compression`` Legacy parameter to set data transform compression method to be used for ADIOS1 backend until it implements setting compression from JSON config. +``--openPMD.file`` Relative or absolute openPMD file prefix for simulation data. If relative, files are stored under ``simOutput``. +``--openPMD.ext`` openPMD filename extension (this controls thebackend picked by the openPMD API). +``--openPMD.infix`` openPMD filename infix (use to pick file- or group-based layout in openPMD). Set to NULL to keep empty (e.g. to pick group-based iteration layout). +``--openPMD.json`` Set backend-specific parameters for openPMD backends in JSON format. +``--openPMD.dataPreparationStrategy`` Strategy for preparation of particle data ('doubleBuffer' or 'mappedMemory'). Aliases 'adios' and 'hdf5' may be used respectively. +===================================== ==================================================================================================================================================== + +.. note:: + + This plugin is a multi plugin. + Command line parameter can be used multiple times to create e.g. dumps with different dumping period. + In the case where an optional parameter with a default value is explicitly defined, the parameter will always be passed to the instance of the multi plugin where the parameter is not set. + e.g. + + .. code-block:: bash + + --openPMD.period 128 --openPMD.file simData1 --openPMD.source 'species_all' + --openPMD.period 1000 --openPMD.file simData2 --openPMD.source 'fields_all' --openPMD.ext h5 + + creates two plugins: + + #. dump all species data each 128th time step, use HDF5 backend. + #. dump all field data each 1000th time step, use the default ADIOS backend. + +Memory Complexity +^^^^^^^^^^^^^^^^^ + +Accelerator +""""""""""" + +no extra allocations. + +Host +"""" + +As soon as the openPMD plugin is compiled in, one extra ``mallocMC`` heap for the particle buffer is permanently reserved. +During I/O, particle attributes are allocated one after another. +Using ``--openPMD.dataPreparationStrategy doubleBuffer`` (default) will require at least 2x the GPU memory on the host side. +For a smaller host side memory footprint (<< GPU main memory) pick ``--openPMD.dataPreparationStrategy mappedMemory``. + +Additional Tools +^^^^^^^^^^^^^^^^ + +See our :ref:`openPMD ` chapter. diff --git a/docs/source/usage/plugins/openPMD_extended_config.json b/docs/source/usage/plugins/openPMD_extended_config.json new file mode 100644 index 0000000000..e2dad6900b --- /dev/null +++ b/docs/source/usage/plugins/openPMD_extended_config.json @@ -0,0 +1,35 @@ +{ + "adios2": { + "engine": { + "usesteps": true, + "parameters": { + "InitialBufferSize": "2Gb", + "Profile": "On" + } + }, + "dataset": [ + { + "cfg": { + "operators": [ + { + "type": "blosc", + "parameters": { + "clevel": "1", + "doshuffle": "BLOSC_BITSHUFFLE" + } + } + ] + } + }, + { + "select": [ + ".*positionOffset.*", + ".*particlePatches.*" + ], + "cfg": { + "operators": [] + } + } + ] + } +} diff --git a/docs/source/usage/plugins/particleMergerProbabilistic.rst b/docs/source/usage/plugins/particleMergerProbabilistic.rst new file mode 100644 index 0000000000..e070e168e8 --- /dev/null +++ b/docs/source/usage/plugins/particleMergerProbabilistic.rst @@ -0,0 +1,69 @@ +.. _usage-plugins-particleMergerProbabilistic: + +Particle Merger Probabilistic Version +------------------------------------- + +Merges macro particles that are close in phase space to reduce computational load. +Voronoi-based probalistic variative algorithm. The difference between Base Voronoi algorothm +and probabilistic version in parameters: instead of threshold of spread in position and momentum +use ratio of deleted particles. + + +.param file +^^^^^^^^^^^ + +In :ref:`particleMerging.param ` is currently one compile-time parameter: + +===================== ==================================================================================== +Compile-Time Option Description +===================== ==================================================================================== +``MAX_VORONOI_CELLS`` Maximum number of active Voronoi cells per supercell. + If the number of active Voronoi cells reaches this limit merging events are dropped. +===================== ==================================================================================== + +.cfg file +^^^^^^^^^ + +====================================================== ======================================================================================================================== +PIConGPU command line option Description +====================================================== ======================================================================================================================== +``--_randomizedMerger.period`` The ouput periodicity of the plugin. A value of ``100`` would mean an output at simulation time step *0, 100, 200, ...*. + +``--_randomizedMerger.ratioDeletedParticles`` The ratio of particles to delete. The parameter have to be in Range *[0:1]*. + +``--_randomizedMerger.maxParticlesToMerge`` Maximum number of macroparticles that can be merged into a single macroparticle. + +``--_randomizedMerger.posSpreadThreshold`` Below this threshold of spread in position macroparticles can be merged [unit: cell edge length]. + +``--_randomizedMerger.momSpreadThreshold`` Below this absolute threshold of spread in momentum macroparticles can be merged [unit: :math:`m_{e-} \cdot c`]. +====================================================== ======================================================================================================================== + +Memory Complexity +^^^^^^^^^^^^^^^^^ + +Accelerator +""""""""""" + +no extra allocations, but requires an extra particle attribute per species, ``voronoiCellId``. + +Host +"""" + +no extra allocations. + +Known Limitations +^^^^^^^^^^^^^^^^^ + +- this plugin is only available with the CUDA backend +- this plugin might take a significant amount of time due to not being fully parallelized. + +Reference +^^^^^^^^^ + +The particle merger implements a macro particle merging algorithm based on: + +Luu, P. T., Tueckmantel, T., & Pukhov, A. (2016). +Voronoi particle merging algorithm for PIC codes. +Computer Physics Communications, 202, 165-174. + +There is a slight deviation from the paper in determining the next subdivision. The implementation always tries to subdivide a Voronoi cell by positions first; momentums are only checked in case the spreads in the positions satisfy the threshold. diff --git a/docs/source/usage/plugins/phaseSpace.rst b/docs/source/usage/plugins/phaseSpace.rst index 63a73c2a32..e3fc40f725 100644 --- a/docs/source/usage/plugins/phaseSpace.rst +++ b/docs/source/usage/plugins/phaseSpace.rst @@ -8,7 +8,7 @@ This plugin creates a 2D phase space image for a user-given spatial and momentum External Dependencies ^^^^^^^^^^^^^^^^^^^^^ -The plugin is available as soon as the :ref:`libSplash and HDF5 libraries ` are compiled in. +The plugin is available as soon as the :ref:`openPMD API ` is compiled in. .cfg file ^^^^^^^^^ @@ -19,13 +19,13 @@ Example for *y-pz* phase space for the *electron* species (``.cfg`` file macro): # Calculate a 2D phase space # - momentum range in m_e c - TGB_ePSypz="--e_phaseSpace.period 10 --e_phaseSpace.filter all --e_phaseSpace.space y --e_phaseSpace.momentum pz --e_phaseSpace.min -1.0 --e_phaseSpace.max 1.0" + TGB_ePSypz="--e_phaseSpace.period 10 --e_phaseSpace.filter all --e_phaseSpace.space y --e_phaseSpace.momentum pz --e_phaseSpace.min -1.0 --e_phaseSpace.max 1.0 --e_phaseSpace.ext h5" The distinct options are (assuming a species ``e`` for electrons): ====================================== ======================================================== ============================ -Option Usage Unit +Option Usage Unit ====================================== ======================================================== ============================ ``--e_phaseSpace.period `` calculate each N steps *none* ``--e_phaseSpace.filter`` Use filtered particles. Available filters are set up in *none* @@ -34,6 +34,7 @@ Option Usage ``--e_phaseSpace.momentum `` momentum coordinate of the 2D phase space *none* ``--e_phaseSpace.min `` minimum of the momentum range :math:`m_\mathrm{species} c` ``--e_phaseSpace.max `` maximum of the momentum range :math:`m_\mathrm{species} c` +``--e_phaseSpace.ext `` filename extension for openPMD backend *none* ====================================== ======================================================== ============================ Memory Complexity @@ -52,11 +53,23 @@ negligible. Output ^^^^^^ -The 2D histograms are stored in ``.hdf5`` files in the ``simOutput/phaseSpace/`` directory. +The 2D histograms are stored in the ``simOutput/phaseSpace/`` directory, by default in ``.h5`` files. A file is created per species, phasespace selection and time step. Values are given as *charge density* per phase space bin. -In order to scale to a simpler *charge of particles* per :math:`\mathrm{d}r_i` and :math:`\mathrm{d}p_i` -bin multiply by the cell volume ``dV``. +In order to scale to a simpler *charge of particles* per :math:`\mathrm{d}r_i` and :math:`\mathrm{d}p_i` -bin multiply by the cell volume ``dV`` (written as an attribute of the openPMD Mesh). + +The output writes a number of non-standard custom openPMD attributes: + +* ``p_min`` and ``p_max``: The lower and upper bounds for the momentum axis, respectively. +* ``dr``: The spacing of the spatial axis in PIConGPU units. +* ``dV``: The volume of a phase space cell. Relates to ``dr`` via ``dV = dp * dr`` where ``dp`` would be the grid spacing along the momentum axis. +* ``dr_unit``: The SI scaling for the spatial axis. Use this instead of ``gridUnitSI``. +* ``p_unit``: The SI scaling for the momentum axis. Use this instead of ``gridUnitSI``. +* ``globalDomainOffset``, ``globalDomainSize`` and ``globalDomainAxisLabels``: Information on the global domain. +* ``totalDomainOffset``, ``totalDomainSize`` and ``totalDomainAxisLabels``: Information on the total domain. + Please consult the `PIConGPU wiki `_ for explanations on the meaning of global and total domain. +* ``sim_unit``: SI scaling for the charge density values. Alias for ``unitSI``. Analysis Tools ^^^^^^^^^^^^^^ @@ -223,7 +236,8 @@ Known Limitations - charge deposition uses the counter shape for now (would need one more write to neighbors to evaluate it correctly according to the shape) - the user has to define the momentum range in advance - the resolution is fixed to ``1024 bins`` in momentum and the number of cells in the selected spatial dimension -- this plugin does not yet use :ref:`openPMD markup `. +- While the openPMD standard `has already been updated `_ to support phase space data, the openPMD API does not yet implement this part. + The openPMD attribute ``gridUnitSI`` and ``gridUnitDimension`` can hence not be correctly written yet and should be ignored in favor of the custom attributes written by this plugin. References ^^^^^^^^^^ diff --git a/docs/source/usage/plugins/radiation.rst b/docs/source/usage/plugins/radiation.rst index 002035a3b4..a47cc8580e 100644 --- a/docs/source/usage/plugins/radiation.rst +++ b/docs/source/usage/plugins/radiation.rst @@ -287,6 +287,11 @@ Command line option Description ``--_radiation.folderRadPerGPU`` Name of the folder, where the GPU specific spectra are stored. Default: ``radPerGPU`` ``--_radiation.compression`` If set, the hdf5 output is compressed. +``--_radiation.numJobs`` Number of independent jobs used for the radiation calculation. + This option is used to increase the utilization of the device by producing more independent work. + This option enables accumulation of data in parallel into multiple temporary arrays, thereby increasing the utilization of + the device by increasing the memory footprint + Default: ``2`` ========================================= ============================================================================================================================== Memory Complexity @@ -295,7 +300,8 @@ Memory Complexity Accelerator """"""""""" -each energy bin times each coordinate bin allocates one counter (``float_X``) permanently and on each accelerator. +locally, ``numJobs`` times number of frequencies ``N_omega`` times number of directions ``N_theta`` is permanently allocated. +Each result element (amplitude) is a double precision complex number. Host """" diff --git a/docs/source/usage/plugins/transitionRadiation.rst b/docs/source/usage/plugins/transitionRadiation.rst index c092af03b6..f76c449571 100644 --- a/docs/source/usage/plugins/transitionRadiation.rst +++ b/docs/source/usage/plugins/transitionRadiation.rst @@ -1,7 +1,7 @@ .. _usage-plugins-transitionRadiation: Transition Radiation ---------- +-------------------- The spectrally resolved far field radiation created by electrons passing through a metal foil. diff --git a/docs/source/usage/plugins/xrayScattering.rst b/docs/source/usage/plugins/xrayScattering.rst new file mode 100644 index 0000000000..4806290aa6 --- /dev/null +++ b/docs/source/usage/plugins/xrayScattering.rst @@ -0,0 +1,142 @@ +.. _usage-plugins-xrayScattering: + +xrayScattering +-------------- + +This plugin calculates Small Angle X-ray Scattering (SAXS) patterns from electron density. +( Using a density `FieldTmp` as an intermediate step and not directly the macro particle distribution. ) +This is a species specific plugin and it has to be run separately for each scattering species. +Since the plugin output is the scattered complex amplitude, contributions from different species can be coherently summed later on. + +.. math:: + + \Phi({\vec q}) &= \frac{r_e}{d} \int_{t} \mathrm{d}t \int_{V} \mathrm{d}V \phi({\vec r}, t) n({\vec r}, t) \\ + I &= \left| \Phi \right|^2 + + +============================== ================================================================================ +Variable Meaning +============================== ================================================================================ +:math:`\Phi` Scattered amplitude +:math:`\vec q` Scattering vector with :math:`|{\vec q}| = \frac{4 \pi \sin \theta}{\lambda}` +:math:`\theta` Scattering angle. :math:`2\theta` is the angle between the incoming and the scattered k-vectors. +:math:`\lambda` Probing beam wavelength +:math:`n` Electron density +:math:`\phi` Incoming wave amplitude +:math:`I` Scattering intensity +:math:`d` Screen distance +:math:`r_e` Classical electron radius + +============================== ================================================================================ + + +For the free electrons, the density :math:`n` is just their number density, for ions it is the bound electrons density of the species. +This plugin will automatically switch to bound electrons density for species having the `boundElectrons` property. + +The volume integral is realized by a discrete sum over the simulation cells and the temporal integration reduces to accumulating the amplitude over simulation time steps. + +.. note:: + This calculation is based on the kinematic model of scattering. Multiple scattering CAN NOT be handled in this model. + +.param file +^^^^^^^^^^^ + +The `xrayScattering.param` file sets the x-ray beam alignment as well as its temporal and transverse envelope. + +.. note:: + At the moment the translation (to the side center + offset) is not working correctly. + For that reason, the envelopes and the offset can't be set in the ``.param`` file yet. + The probe is always a plane wave. + Beam rotation works. + +The alignment settings define a beam coordinate system with :math:`\hat{z} = \hat{k}` and :math:`\hat{x}`, :math:`\hat{y}` perpendicular to the x-ray propagation direction. +It is always a right-hand system. It is oriented in such way that for propagation parallel to the PIC x- or y-axis (`Side`: `X`, `XR`, `Y` or `YR`) :math:`\hat{x}_{\text{beam}} = - \hat{z}_{\text{PIC}}` holds and if :math:`{\vec k }` is parallel to the PIC z-axis (`Side`: `Z` or `ZR`), :math:`\hat{x}_{\text{beam}} = - \hat{y}_{\text{PIC}}` holds. +The orientation can be then fine adjusted with the `RotationParam` setting. +.. TODO: Figures showing the beam coordinate system orientation in the PIC system. + +.. TODO: Add other parameters after the coordinate transform has been fixed and the settings have been moved back to the .param file. + +================= =============================================================================================================================== + Setting Description +================= =============================================================================================================================== +``ProbingSide`` The side from which the x-ray is propagated. + Set `X`, `Y` or `Z` for propagation along one of the PIC coordinate system axes; + `XR`, `YR` or `ZR` for propagation in an opposite direction. + +``RotationParam`` Rotation of the beam axis, :math:`z_{\text{beam}}`, from the default orientation ( perpendicular the the simulation box side ). + Set the beam yaw and pitch angles in radians. +================= =============================================================================================================================== + +.. TODO: Add BEAM_OFFSET in between after the coordinate transform has been fixed. + +The coordinate transfer from the PIC system to the beam system is performed in the following order: +rotation to one of the default orientations (``ProbingSide`` setting), additional rotation (``RotationParam`` ). This has to be taken into account when defining the experimental setup. + + +.cfg file +^^^^^^^^^ + +For a specific (charged) species ```` e.g. ``e``, the scattering can be computed by the following commands. + +============================================ ============================================================================================================================================ +Command line option Description +============================================ ============================================================================================================================================ +``--_xrayScattering.period`` Period at which the plugin is enabled (PIC period syntax). Only the intensity from this steps is accumulated. + Default is `0`, which means that the scattering intensity in never calculated and therefor off + +``--_xrayScattering.outputPeriod`` Period at which the accumulated amplitude is written to the output file (PIC period syntax). Usually set close to the x-ray coherence time. + +``--_xrayScattering.qx_max`` Upper bound of reciprocal space range in qx direction. The unit is :math:`Å^{-1}`. Default is `5`. + +``--_xrayScattering.qy_max`` Upper bound of reciprocal space range in qy direction. The unit is :math:`Å^{-1}` Default is `5`. + +``--_xrayScattering.qx_min`` Lower bound of reciprocal space range in qx direction. The unit is :math:`Å^{-1}` Default is `-5`. + +``--_xrayScattering.qy_min`` Lower bound of reciprocal space range in qy direction. The unit is :math:`Å^{-1}` Default is `-5`. + +``--_xrayScattering.n_qx`` Number of scattering vectors needed to be calculated in qx direction. Default is `100`, + +``--_xrayScattering.n_qy`` Number of scattering vectors needed to be calculated in qy direction. Default is '100'. + +``--_xrayScattering.file`` Output file name. Default is `_xrayScatteringOutput`. + +``--_xrayScattering.ext`` `openPMD` filename extension. This controls the backend picked by the `openPMD` API. Default is `bp` for adios backend. + +``--_xrayScattering.compression`` Backend-specific `openPMD` compression method (e.g.) zlib. + +``--_xrayScattering.memoryLayout`` Possible values: `mirror` and `split`. Output can be mirrored on all Host+Device pairs or uniformly split, in chunks, over all nodes. + Use split when the output array is too big to store the complete computed q-space on one device. + For small output grids the `mirror` setting could turn out to be more efficient. +============================================ ============================================================================================================================================ + + +Output +^^^^^^ + +``_xrayScatteringOutput.`` + +Output file in the `openPMD` standard. An example on how to access your data with the python reader: + +.. code-block:: python + + from picongpu.plugins.data import XrayScatteringData + + simulation_path = '...' # dir containing simOutput, input, .., + # Read output from the 0th step, for electrons, hdf5 backend. + data = XrayScatteringData( simulation_path, 'e', 'h5' ) + amplitude = saxsData.get(iteration=0) * saxsData.get_unit() + del XrayScatteringData + +When you don't want to use the python reader keep in mind that: + * All iterations are saved in a single file + * The mesh containing the output is called `'amplitude'` + * This mesh has 2 components, `'x'` is the real part and `'y'` is the imaginary part. + +.. note:: + The amplitude is not zeroed on ``outputPeriod`` so one has to subtract the output from the iteration one period before and then calculate :math:`\left|\Phi\right|^2` and sum it with the intensities from other coherence periods. + + +References +^^^^^^^^^^ + +- [1] Kluge, T., Rödel, C., Rödel, M., Pelka, A., McBride, E. E., Fletcher, L. B., … Cowan, T. E. (2017). Nanometer-scale characterization of laser-driven compression, shocks, and phase transitions, by x-ray scattering using free electron lasers. Physics of Plasmas, 24(10). https://doi.org/10.1063/1.5008289 diff --git a/docs/source/usage/tbg.rst b/docs/source/usage/tbg.rst index 6578a0e59f..a4762ca6d2 100644 --- a/docs/source/usage/tbg.rst +++ b/docs/source/usage/tbg.rst @@ -58,15 +58,6 @@ Slurm is a modern batch system, e.g. installed on the Taurus cluster at TU Dresd .. include:: ../install/profiles/taurus-tud/Slurm_Tutorial.rst :start-line: 3 -PBS -""" - -PBS (for *Portable Batch System*) is a widely distributed batch system that comes in several implementations (open, professional, etc.). -It is used, e.g. on Hypnos at HZDR. - -.. include:: ../install/profiles/hypnos-hzdr/PBS_Tutorial.rst - :start-line: 3 - LSF """ diff --git a/docs/source/usage/workflows/memoryPerDevice.py b/docs/source/usage/workflows/memoryPerDevice.py index e4ffdc7c80..190fa3047f 100755 --- a/docs/source/usage/workflows/memoryPerDevice.py +++ b/docs/source/usage/workflows/memoryPerDevice.py @@ -3,8 +3,8 @@ """ This file is part of PIConGPU. -Copyright 2018-2020 PIConGPU contributors -Authors: Marco Garten, Paweł Ordyna +Copyright 2018-2021 PIConGPU contributors +Authors: Marco Garten, Pawel Ordyna License: GPLv3+ """ diff --git a/docs/source/usage/workflows/probeParticles.rst b/docs/source/usage/workflows/probeParticles.rst index 98a01d99c0..f771e36fb1 100644 --- a/docs/source/usage/workflows/probeParticles.rst +++ b/docs/source/usage/workflows/probeParticles.rst @@ -16,7 +16,7 @@ Self-consistently interacting particles are usually called :ref:`tracer particle Workflow """""""" -* ``speciesDefinition.param``: create a species specifically for probes and add ``fieldE`` and ``fieldB`` attributes to it for storing interpolated fields +* ``speciesDefinition.param``: create a species specifically for probes and add ``probeE`` and ``probeB`` attributes to it for storing interpolated fields .. code-block:: cpp diff --git a/etc/picongpu/aris-grnet/gpu.tpl b/etc/picongpu/aris-grnet/gpu.tpl index 440a2080b8..413dccbb0f 100644 --- a/etc/picongpu/aris-grnet/gpu.tpl +++ b/etc/picongpu/aris-grnet/gpu.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov, +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov, # Jian Fuh Ong # # This file is part of PIConGPU. @@ -102,7 +102,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] && [ !TBG_numHostedGPUPerNode -eq # Run CUDA memtest to check GPU's health srun -n !TBG_tasks !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available or compute node is not exclusively allocated, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available or compute node is not exclusively allocated. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/bash/mpiexec.tpl b/etc/picongpu/bash/mpiexec.tpl index 04f509cb57..8c8e774c77 100644 --- a/etc/picongpu/bash/mpiexec.tpl +++ b/etc/picongpu/bash/mpiexec.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Anton Helm, Rene Widera +# Copyright 2013-2021 Axel Huebl, Anton Helm, Rene Widera # # This file is part of PIConGPU. # @@ -53,7 +53,7 @@ export OMPI_MCA_io=^ompio if [ -f !TBG_dstPath/input/bin/cuda_memtest ] ; then mpiexec -am !TBG_dstPath/tbg/openib.conf --mca mpi_leave_pinned 0 -npernode !TBG_gpusPerNode -n !TBG_tasks !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/bash/mpirun.tpl b/etc/picongpu/bash/mpirun.tpl index fb6e760cd1..ef21800811 100644 --- a/etc/picongpu/bash/mpirun.tpl +++ b/etc/picongpu/bash/mpirun.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Anton Helm, Rene Widera +# Copyright 2013-2021 Axel Huebl, Anton Helm, Rene Widera # # This file is part of PIConGPU. # @@ -53,7 +53,7 @@ export OMPI_MCA_io=^ompio if [ -f !TBG_dstPath/input/bin/cuda_memtest ] ; then mpirun -am !TBG_dstPath/tbg/openib.conf --mca mpi_leave_pinned 0 -npernode !TBG_gpusPerNode -n !TBG_tasks !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/cori-nersc/knl.tpl b/etc/picongpu/cori-nersc/knl.tpl index 9d8c908836..47a975d8b4 100644 --- a/etc/picongpu/cori-nersc/knl.tpl +++ b/etc/picongpu/cori-nersc/knl.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Alexander Matthes +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Alexander Matthes # # This file is part of PIConGPU. # diff --git a/etc/picongpu/cori-nersc/knl_picongpu.profile.example b/etc/picongpu/cori-nersc/knl_picongpu.profile.example index 5cf1d64aae..b2fb1c0e95 100644 --- a/etc/picongpu/cori-nersc/knl_picongpu.profile.example +++ b/etc/picongpu/cori-nersc/knl_picongpu.profile.example @@ -21,7 +21,7 @@ export proj="" # module swap craype-haswell craype-mic-knl module swap PrgEnv-intel PrgEnv-gnu # GCC 8.2.0 -module load cmake/3.14.4 +module load cmake/3.15.0 module load boost/1.70.0 # Other Software ############################################################## diff --git a/etc/picongpu/cpuNumaStarter.sh b/etc/picongpu/cpuNumaStarter.sh index 62504ebf52..aecaa01160 100755 --- a/etc/picongpu/cpuNumaStarter.sh +++ b/etc/picongpu/cpuNumaStarter.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2017-2020 Rene Widera, Alexander Matthes +# Copyright 2017-2021 Rene Widera, Alexander Matthes # # This file is part of PIConGPU. # diff --git a/etc/picongpu/cuda.filter b/etc/picongpu/cuda.filter index ed8c25e0ec..0663ae5f77 100644 --- a/etc/picongpu/cuda.filter +++ b/etc/picongpu/cuda.filter @@ -1,7 +1,7 @@ std::* -- 0 *boost::* -- 0 pmacc::Environment* -- 0 -pmacc::algorithms::* -- 0 +pmacc::* -- 0 *Event* -- 0 *MPI_Test* -- 0 *new* -- 0 diff --git a/etc/picongpu/davide-cineca/gpu.tpl b/etc/picongpu/davide-cineca/gpu.tpl index f4c0f50ee6..0d01e1954f 100644 --- a/etc/picongpu/davide-cineca/gpu.tpl +++ b/etc/picongpu/davide-cineca/gpu.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera # # This file is part of PIConGPU. # @@ -104,7 +104,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] && [ !TBG_numHostedGPUPerNode -eq # Run CUDA memtest to check GPU's health srun --cpu-bind=sockets !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available or compute node is not exclusively allocated, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available or compute node is not exclusively allocated. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/davide-cineca/gpu_picongpu.profile.example b/etc/picongpu/davide-cineca/gpu_picongpu.profile.example index 21b041a260..e926cc2979 100644 --- a/etc/picongpu/davide-cineca/gpu_picongpu.profile.example +++ b/etc/picongpu/davide-cineca/gpu_picongpu.profile.example @@ -21,7 +21,7 @@ export proj=$(groups | awk '{print $2}') # module purge module load gnu/6.4.0 -module load cmake/3.11.4 +module load cmake/3.15.0 module load cuda/9.2.88 module load openmpi/3.1.0--gnu--6.4.0 module load boost/1.68.0--openmpi--3.1.0--gnu--6.4.0 diff --git a/etc/picongpu/davinci-rice/picongpu.tpl b/etc/picongpu/davinci-rice/picongpu.tpl index b4e316b2bd..045dc3b70b 100644 --- a/etc/picongpu/davinci-rice/picongpu.tpl +++ b/etc/picongpu/davinci-rice/picongpu.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch +# Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch # # This file is part of PIConGPU. # @@ -81,7 +81,7 @@ export OMPI_MCA_io=^ompio if [ -f !TBG_dstPath/input/bin/cuda_memtest ] ; then mpirun -n TBG_tasks --display-map -am tbg/openib.conf --mca mpi_leave_pinned 0 !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/draco-mpcdf/general.tpl b/etc/picongpu/draco-mpcdf/general.tpl index a29d4e16bf..a6bbf6a52d 100644 --- a/etc/picongpu/draco-mpcdf/general.tpl +++ b/etc/picongpu/draco-mpcdf/general.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera # # This file is part of PIConGPU. # diff --git a/etc/picongpu/draco-mpcdf/picongpu.profile.example b/etc/picongpu/draco-mpcdf/picongpu.profile.example index 5c02c7fcf2..7717c94f5d 100644 --- a/etc/picongpu/draco-mpcdf/picongpu.profile.example +++ b/etc/picongpu/draco-mpcdf/picongpu.profile.example @@ -19,7 +19,7 @@ module purge module load git/2.14 module load gcc/6.3 -module load cmake/3.11.4 +module load cmake/3.15.0 module load boost/gcc/1.64 module load impi/2017.3 module load hdf5-mpi/gcc/1.8.18 diff --git a/etc/picongpu/hemera-hzdr/defq.tpl b/etc/picongpu/hemera-hzdr/defq.tpl index 238c720050..9a7ddbbf14 100644 --- a/etc/picongpu/hemera-hzdr/defq.tpl +++ b/etc/picongpu/hemera-hzdr/defq.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera # # This file is part of PIConGPU. # diff --git a/etc/picongpu/hemera-hzdr/defq_picongpu.profile.example b/etc/picongpu/hemera-hzdr/defq_picongpu.profile.example index 42a6d4bc70..acb0a4f702 100644 --- a/etc/picongpu/hemera-hzdr/defq_picongpu.profile.example +++ b/etc/picongpu/hemera-hzdr/defq_picongpu.profile.example @@ -16,6 +16,7 @@ export MY_NAME="$(whoami) <$MY_MAIL>" # General modules ############################################################# # module purge +module load git module load gcc/7.3.0 module load cmake/3.15.2 module load openmpi/2.1.2 @@ -29,6 +30,7 @@ module load c-blosc/1.14.4 module load adios/1.13.1 module load hdf5-parallel/1.8.20 module load libsplash/1.7.0 +module load python/3.6.5 module load libpng/1.6.35 module load pngwriter/0.7.0 diff --git a/etc/picongpu/hemera-hzdr/fwkt_v100.tpl b/etc/picongpu/hemera-hzdr/fwkt_v100.tpl index 5bc8341d9b..413442fa4e 100644 --- a/etc/picongpu/hemera-hzdr/fwkt_v100.tpl +++ b/etc/picongpu/hemera-hzdr/fwkt_v100.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera, +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera, # Marco Garten, Alexander Debus # # This file is part of PIConGPU. @@ -104,7 +104,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] && [ !TBG_numHostedGPUPerNode -eq # Run CUDA memtest to check GPU's health mpiexec !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available or compute node is not exclusively allocated, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available or compute node is not exclusively allocated. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/hemera-hzdr/fwkt_v100_picongpu.profile.example b/etc/picongpu/hemera-hzdr/fwkt_v100_picongpu.profile.example index 12f0932eca..da4dae273f 100644 --- a/etc/picongpu/hemera-hzdr/fwkt_v100_picongpu.profile.example +++ b/etc/picongpu/hemera-hzdr/fwkt_v100_picongpu.profile.example @@ -16,10 +16,11 @@ export MY_NAME="$(whoami) <$MY_MAIL>" # General modules ############################################################# # module purge +module load git module load gcc/7.3.0 module load cmake/3.15.2 -module load cuda/10.0 -module load openmpi/2.1.2-cuda100 +module load cuda/10.2 +module load openmpi/2.1.2-cuda102 module load boost/1.68.0 # Other Software ############################################################## @@ -27,9 +28,12 @@ module load boost/1.68.0 module load zlib/1.2.11 module load c-blosc/1.14.4 -module load adios/1.13.1-cuda100 -module load hdf5-parallel/1.8.20-cuda100 -module load libsplash/1.7.0-cuda100 +module load hdf5-parallel/1.8.20-cuda102 +module load libsplash/1.7.0-cuda102 +module load python/3.6.5 +module load adios/1.13.1-cuda102 +module load adios2/2.6.0-cuda102 +module load openpmd/0.12.0-cuda102 module load libpng/1.6.35 module load pngwriter/0.7.0 diff --git a/etc/picongpu/hemera-hzdr/gpu.tpl b/etc/picongpu/hemera-hzdr/gpu.tpl index 90c9b2d12c..ff7487d18b 100644 --- a/etc/picongpu/hemera-hzdr/gpu.tpl +++ b/etc/picongpu/hemera-hzdr/gpu.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera, Marco Garten +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera, Marco Garten # # This file is part of PIConGPU. # @@ -101,7 +101,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] && [ !TBG_numHostedGPUPerNode -eq # Run CUDA memtest to check GPU's health mpiexec !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available or compute node is not exclusively allocated, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available or compute node is not exclusively allocated. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/hemera-hzdr/gpu_picongpu.profile.example b/etc/picongpu/hemera-hzdr/gpu_picongpu.profile.example index 42a7de084d..2e5cb5d869 100644 --- a/etc/picongpu/hemera-hzdr/gpu_picongpu.profile.example +++ b/etc/picongpu/hemera-hzdr/gpu_picongpu.profile.example @@ -16,10 +16,11 @@ export MY_NAME="$(whoami) <$MY_MAIL>" # General modules ############################################################# # module purge +module load git module load gcc/7.3.0 module load cmake/3.15.2 -module load cuda/10.0 -module load openmpi/2.1.2-cuda100 +module load cuda/10.2 +module load openmpi/2.1.2-cuda102 module load boost/1.68.0 # Other Software ############################################################## @@ -27,9 +28,12 @@ module load boost/1.68.0 module load zlib/1.2.11 module load c-blosc/1.14.4 -module load adios/1.13.1-cuda100 -module load hdf5-parallel/1.8.20-cuda100 -module load libsplash/1.7.0-cuda100 +module load hdf5-parallel/1.8.20-cuda102 +module load libsplash/1.7.0-cuda102 +module load python/3.6.5 +module load adios/1.13.1-cuda102 +module load adios2/2.6.0-cuda102 +module load openpmd/0.12.0-cuda102 module load libpng/1.6.35 module load pngwriter/0.7.0 diff --git a/etc/picongpu/hemera-hzdr/k20.tpl b/etc/picongpu/hemera-hzdr/k20.tpl index b8992555f0..44dc0a6970 100644 --- a/etc/picongpu/hemera-hzdr/k20.tpl +++ b/etc/picongpu/hemera-hzdr/k20.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Anton Helm, Richard Pausch, Rene Widera, +# Copyright 2013-2021 Axel Huebl, Anton Helm, Richard Pausch, Rene Widera, # Marco Garten # # This file is part of PIConGPU. @@ -104,7 +104,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] && [ !TBG_numHostedGPUPerNode -eq # Run CUDA memtest to check GPU's health mpiexec !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available or compute node is not exclusively allocated, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available or compute node is not exclusively allocated. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/hemera-hzdr/k20_picongpu.profile.example b/etc/picongpu/hemera-hzdr/k20_picongpu.profile.example index 25afced6be..2162dad95c 100644 --- a/etc/picongpu/hemera-hzdr/k20_picongpu.profile.example +++ b/etc/picongpu/hemera-hzdr/k20_picongpu.profile.example @@ -16,10 +16,11 @@ export MY_NAME="$(whoami) <$MY_MAIL>" # General modules ############################################################# # module purge +module load git module load gcc/7.3.0 module load cmake/3.15.2 -module load cuda/10.0 -module load openmpi/2.1.2-cuda100 +module load cuda/10.2 +module load openmpi/2.1.2-cuda102 module load boost/1.68.0 # Other Software ############################################################## @@ -27,9 +28,12 @@ module load boost/1.68.0 module load zlib/1.2.11 module load c-blosc/1.14.4 -module load adios/1.13.1-cuda100 -module load hdf5-parallel/1.8.20-cuda100 -module load libsplash/1.7.0-cuda100 +module load hdf5-parallel/1.8.20-cuda102 +module load libsplash/1.7.0-cuda102 +module load python/3.6.5 +module load adios/1.13.1-cuda102 +module load adios2/2.6.0-cuda102 +module load openpmd/0.12.0-cuda102 module load libpng/1.6.35 module load pngwriter/0.7.0 diff --git a/etc/picongpu/hemera-hzdr/k20_restart.tpl b/etc/picongpu/hemera-hzdr/k20_restart.tpl index 52b9701b07..48e4ae642b 100644 --- a/etc/picongpu/hemera-hzdr/k20_restart.tpl +++ b/etc/picongpu/hemera-hzdr/k20_restart.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, +# Copyright 2013-2021 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, # Bifeng Lei, Marco Garten # # This file is part of PIConGPU. @@ -167,7 +167,7 @@ export OMPI_MCA_io=^ompio if [ -f !TBG_dstPath/input/bin/cuda_memtest ] && [ !TBG_numHostedGPUPerNode -eq !TBG_gpusPerNode ] ; then mpiexec !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available or compute node is not exclusively allocated, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available or compute node is not exclusively allocated. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/hemera-hzdr/k80.tpl b/etc/picongpu/hemera-hzdr/k80.tpl index 3cfc81ea4d..b20d60192e 100644 --- a/etc/picongpu/hemera-hzdr/k80.tpl +++ b/etc/picongpu/hemera-hzdr/k80.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Anton Helm, Richard Pausch, Rene Widera, +# Copyright 2013-2021 Axel Huebl, Anton Helm, Richard Pausch, Rene Widera, # Marco Garten # # This file is part of PIConGPU. @@ -104,7 +104,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] && [ !TBG_numHostedGPUPerNode -eq # Run CUDA memtest to check GPU's health mpiexec !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available or compute node is not exclusively allocated, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available or compute node is not exclusively allocated. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/hemera-hzdr/k80_picongpu.profile.example b/etc/picongpu/hemera-hzdr/k80_picongpu.profile.example index 87262ff55a..03e08aa848 100644 --- a/etc/picongpu/hemera-hzdr/k80_picongpu.profile.example +++ b/etc/picongpu/hemera-hzdr/k80_picongpu.profile.example @@ -16,10 +16,11 @@ export MY_NAME="$(whoami) <$MY_MAIL>" # General modules ############################################################# # module purge +module load git module load gcc/7.3.0 module load cmake/3.15.2 -module load cuda/10.0 -module load openmpi/2.1.2-cuda100 +module load cuda/10.2 +module load openmpi/2.1.2-cuda102 module load boost/1.68.0 # Other Software ############################################################## @@ -27,9 +28,12 @@ module load boost/1.68.0 module load zlib/1.2.11 module load c-blosc/1.14.4 -module load adios/1.13.1-cuda100 -module load hdf5-parallel/1.8.20-cuda100 -module load libsplash/1.7.0-cuda100 +module load hdf5-parallel/1.8.20-cuda102 +module load libsplash/1.7.0-cuda102 +module load python/3.6.5 +module load adios/1.13.1-cuda102 +module load adios2/2.6.0-cuda102 +module load openpmd/0.12.0-cuda102 module load libpng/1.6.35 module load pngwriter/0.7.0 diff --git a/etc/picongpu/hemera-hzdr/k80_restart.tpl b/etc/picongpu/hemera-hzdr/k80_restart.tpl index d65f9e9730..8ed316c572 100644 --- a/etc/picongpu/hemera-hzdr/k80_restart.tpl +++ b/etc/picongpu/hemera-hzdr/k80_restart.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, +# Copyright 2013-2021 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, # Bifeng Lei, Marco Garten # # This file is part of PIConGPU. @@ -167,7 +167,7 @@ export OMPI_MCA_io=^ompio if [ -f !TBG_dstPath/input/bin/cuda_memtest ] && [ !TBG_numHostedGPUPerNode -eq !TBG_gpusPerNode ] ; then mpiexec !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available or compute node is not exclusively allocated, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available or compute node is not exclusively allocated. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/hydra-hzdr/default.tpl b/etc/picongpu/hydra-hzdr/default.tpl deleted file mode 100644 index 9f9105f844..0000000000 --- a/etc/picongpu/hydra-hzdr/default.tpl +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Anton Helm, Rene Widera -# -# This file is part of PIConGPU. -# -# PIConGPU is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# PIConGPU is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with PIConGPU. -# If not, see . -# - - -# PIConGPU batch script for hydra PBS batch system - -#PBS -q !TBG_queue -#PBS -l walltime=!TBG_wallTime -# Sets batch job's name -#PBS -N !TBG_jobName -#PBS -l nodes=!TBG_nodes:ppn=!TBG_coresPerNode -#PBS -l mem=!TBG_globalMainMemStr -#PBS -m !TBG_mailSettings -M !TBG_mailAddress -#PBS -d !TBG_dstPath -#PBS -n - -#PBS -o stdout -#PBS -e stderr - - -## calculation are done by tbg ## -.TBG_queue="default" - -# settings that can be controlled by environment variables before submit -.TBG_mailSettings=${MY_MAILNOTIFY:-"n"} -.TBG_mailAddress=${MY_MAIL:-"someone@example.com"} -.TBG_author=${MY_NAME:+--author \"${MY_NAME}\"} -.TBG_profile=${PIC_PROFILE:-"~/picongpu.profile"} - -# 2 packages per node if we need more than 2 ranks else same count as TBG_tasks -.TBG_gpusPerNode=`if [ $TBG_tasks -gt 2 ] ; then echo 2; else echo $TBG_tasks; fi` - -#number of cores per parallel node / default is 2 cores per gpu on 'default' queue -.TBG_coresPerNode="$(( TBG_gpusPerNode * 16 ))" - -# use ceil to caculate nodes -.TBG_nodes="$(( ( TBG_tasks + TBG_gpusPerNode -1 ) / TBG_gpusPerNode))" -# main memory used for the job -.TBG_globalMainMem=$(( TBG_nodes * 256 )) -.TBG_globalMainMemStr="!TBG_globalMainMem"GB -## end calculations ## - -echo 'Running program...' - -cd !TBG_dstPath - -export MODULES_NO_OUTPUT=1 -source !TBG_profile -if [ $? -ne 0 ] ; then - echo "Error: PIConGPU environment profile under \"!TBG_profile\" not found!" - exit 1 -fi -unset MODULES_NO_OUTPUT - -#set user rights to u=rwx;g=r-x;o=--- -umask 0027 - -mkdir simOutput 2> /dev/null -cd simOutput - -#wait that all nodes see ouput folder -sleep 1 - -# The OMPIO backend in OpenMPI up to 3.1.3 and 4.0.0 is broken, use the -# fallback ROMIO backend instead. -# see bug https://github.com/open-mpi/ompi/issues/6285 -export OMPI_MCA_io=^ompio - -if [ $? -eq 0 ] ; then - mpiexec --prefix $MPIHOME -x LIBRARY_PATH -tag-output --bind-to none --display-map -am !TBG_dstPath/tbg/openib.conf --mca mpi_leave_pinned 0 -npernode !TBG_gpusPerNode -n !TBG_tasks !TBG_dstPath/tbg/cpuNumaStarter.sh !TBG_dstPath/input/bin/picongpu !TBG_author !TBG_programParams | tee output -fi - -mpiexec --prefix $MPIHOME -x LIBRARY_PATH -npernode !TBG_gpusPerNode -n !TBG_tasks /usr/bin/env bash -c "killall -9 picongpu 2>/dev/null || true" diff --git a/etc/picongpu/hydra-hzdr/default_picongpu.profile.example b/etc/picongpu/hydra-hzdr/default_picongpu.profile.example deleted file mode 100644 index 9e10352310..0000000000 --- a/etc/picongpu/hydra-hzdr/default_picongpu.profile.example +++ /dev/null @@ -1,71 +0,0 @@ -# Name and Path of this Script ############################### (DO NOT change!) -export PIC_PROFILE=$(cd $(dirname $BASH_SOURCE) && pwd)"/"$(basename $BASH_SOURCE) - -# User Information ################################# (edit the following lines) -# - automatically add your name and contact to output file meta data -# - send me mails on batch system job (b)egin, (e)nd, (a)bortion or (n)o mail -export MY_MAILNOTIFY="n" -export MY_MAIL="someone@example.com" -export MY_NAME="$(whoami) <$MY_MAIL>" - -# Text Editor for Tools ###################################### (edit this line) -# - examples: "nano", "vim", "emacs -nw", "vi" or without terminal: "gedit" -#export EDITOR="nano" - -# Modules ##################################################################### -# -if [ -f /etc/profile.modules ] -then - . /etc/profile.modules - module purge -# export MODULES_NO_OUTPUT=1 - - # Core Dependencies - module load gcc/5.3.0 - module load cmake/3.13.4 - module load boost/1.65.1 - module load openmpi/1.8.6 - module load numactl - - # Plugins (optional) - module load pngwriter/0.7.0 - module load hdf5-parallel/1.8.15 libsplash/1.7.0 - - # either use libSplash or ADIOS for file I/O - #module load adios/1.13.1 - - # Debug Tools - #module load gdb - #module load valgrind/3.8.1 - -# unset MODULES_NO_OUTPUT -fi - -# Environment ################################################################# -# -alias getNode='qsub -I -q default -lwalltime=00:30:00 -lnodes=1:ppn=32' - -export PICSRC=/home/$(whoami)/src/picongpu -export PIC_EXAMPLES=$PICSRC/share/picongpu/examples -export PIC_BACKEND="omp2b:ivybridge" - -export PATH=$PATH:$PICSRC -export PATH=$PATH:$PICSRC/bin -export PATH=$PATH:$PICSRC/src/splash2txt/build -export PATH=$PATH:$PICSRC/src/tools/bin - -export PYTHONPATH=$PICSRC/src/tools/lib/python:$PYTHONPATH - -# "tbg" default options ####################################################### -# - PBS/Torque (qsub) -# - "default" queue -export TBG_SUBMIT="qsub" -export TBG_TPLFILE="etc/picongpu/hydra-hzdr/default.tpl" - -# Load autocompletion for PIConGPU commands -BASH_COMP_FILE=$PICSRC/bin/picongpu-completion.bash -if [ -f $BASH_COMP_FILE ] ; then - source $BASH_COMP_FILE -else - echo "bash completion file '$BASH_COMP_FILE' not found." >&2 -fi diff --git a/etc/picongpu/hypnos-hzdr/PBS_Tutorial.rst b/etc/picongpu/hypnos-hzdr/PBS_Tutorial.rst deleted file mode 100644 index 837a50e406..0000000000 --- a/etc/picongpu/hypnos-hzdr/PBS_Tutorial.rst +++ /dev/null @@ -1,45 +0,0 @@ -PBS examples -============ - -Job Submission -'''''''''''''' - -PIConGPU job submission on the *Hypnos* cluster at *HZDR*: - -* ``tbg -s qsub -c etc/picongpu/0008gpus.cfg -t etc/picongpu/hypnos-hzdr/k20.tpl /bigdata/hplsim/<...>/test-001`` - -Where ``<...>`` is one of: - -* ``external/$(whoami)`` -* internal: - - * ``scratch/$(whoami)`` - * ``development/$(whoami)`` - * ``production/`` - -Job Control -''''''''''' - -* interactive job: - - * ``qsub -I -q k20 -lwalltime=12:00:00 -lnodes=1:ppn=8`` - -* details for my jobs: - - * ``qstat -f 12345`` all details for job with ``12345`` - * ``qstat -u $(whoami)`` all jobs under my user name - -* details for queues: - - * ``qstat -a queueName`` show all jobs in a queue - * ``pbs_free -l`` compact view on free and busy nodes - * ``pbsnodes`` list all nodes and their detailed state (free, busy/job-exclusive, offline) - -* communicate with job: - - * ``qdel `` abort job - * ``qsig -s `` send signal or signal name to job - * ``qalter -lwalltime=12:00:00 `` change the walltime of a job - * ``qalter -Wdepend=afterany:54321 12345`` only start job ``12345`` after job with id ``54321`` has finished - * ``qhold `` prevent the job from starting - * ``qrls `` release the job to be eligible for run (after it was set on hold) diff --git a/etc/picongpu/hypnos-hzdr/fermi.tpl b/etc/picongpu/hypnos-hzdr/fermi.tpl deleted file mode 100644 index 107a30678d..0000000000 --- a/etc/picongpu/hypnos-hzdr/fermi.tpl +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Anton Helm, Rene Widera -# -# This file is part of PIConGPU. -# -# PIConGPU is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# PIConGPU is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with PIConGPU. -# If not, see . -# - - -# PIConGPU batch script for hypnos PBS batch system - -#PBS -q !TBG_queue -#PBS -l walltime=!TBG_wallTime -# Sets batch job's name -#PBS -N !TBG_jobName -#PBS -l nodes=!TBG_nodes:ppn=!TBG_coresPerNode -#PBS -m !TBG_mailSettings -M !TBG_mailAddress -#PBS -d !TBG_dstPath - -#PBS -o stdout -#PBS -e stderr - - -## calculation are done by tbg ## -# Tesla C2070 queue on kepler018 & kepler019 -.TBG_queue="k20f" - -# settings that can be controlled by environment variables before submit -.TBG_mailSettings=${MY_MAILNOTIFY:-"n"} -.TBG_mailAddress=${MY_MAIL:-"someone@example.com"} -.TBG_author=${MY_NAME:+--author \"${MY_NAME}\"} -.TBG_profile=${PIC_PROFILE:-"~/picongpu.profile"} - -# number of available/hosted GPUs per node in the system -.TBG_numHostedGPUPerNode=4 - -# required GPUs per node for the current job -.TBG_gpusPerNode=`if [ $TBG_tasks -gt $TBG_numHostedGPUPerNode ] ; then echo $TBG_numHostedGPUPerNode; else echo $TBG_tasks; fi` - -#number of cores per parallel node / default is 2 cores per gpu on k20 queue -.TBG_coresPerNode="$(( TBG_gpusPerNode * 2 ))" - -# use ceil to caculate nodes -.TBG_nodes="$(( ( TBG_tasks + TBG_gpusPerNode -1 ) / TBG_gpusPerNode))" -## end calculations ## - -echo 'Running program...' - -cd !TBG_dstPath - -export MODULES_NO_OUTPUT=1 -source !TBG_profile -if [ $? -ne 0 ] ; then - echo "Error: PIConGPU environment profile under \"!TBG_profile\" not found!" - exit 1 -fi -unset MODULES_NO_OUTPUT - -#set user rights to u=rwx;g=r-x;o=--- -umask 0027 - -mkdir simOutput 2> /dev/null -cd simOutput - -#wait that all nodes see ouput folder -sleep 1 - -# The OMPIO backend in OpenMPI up to 3.1.3 and 4.0.0 is broken, use the -# fallback ROMIO backend instead. -# see bug https://github.com/open-mpi/ompi/issues/6285 -export OMPI_MCA_io=^ompio - -# test if cuda_memtest binary is available and we have the node exclusive -if [ -f !TBG_dstPath/input/bin/cuda_memtest ] && [ !TBG_numHostedGPUPerNode -eq !TBG_gpusPerNode ] ; then - mpiexec --prefix $MPIHOME -tag-output --display-map -x LIBRARY_PATH -am !TBG_dstPath/tbg/openib.conf --mca mpi_leave_pinned 0 -npernode !TBG_gpusPerNode -n !TBG_tasks !TBG_dstPath/input/bin/cuda_memtest.sh -else - echo "no binary 'cuda_memtest' available or compute node is not exclusively allocated, skip GPU memory test" >&2 -fi - -if [ $? -eq 0 ] ; then - mpiexec --prefix $MPIHOME -x LIBRARY_PATH -tag-output --display-map -am !TBG_dstPath/tbg/openib.conf --mca mpi_leave_pinned 0 -npernode !TBG_gpusPerNode -n !TBG_tasks !TBG_dstPath/input/bin/picongpu !TBG_author !TBG_programParams | tee output -fi - -mpiexec --prefix $MPIHOME -x LIBRARY_PATH -npernode !TBG_gpusPerNode -n !TBG_tasks /usr/bin/env bash -c "killall -9 picongpu 2>/dev/null || true" diff --git a/etc/picongpu/hypnos-hzdr/laser.tpl b/etc/picongpu/hypnos-hzdr/laser.tpl deleted file mode 100644 index 7c1c312a72..0000000000 --- a/etc/picongpu/hypnos-hzdr/laser.tpl +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Anton Helm, Rene Widera -# -# This file is part of PIConGPU. -# -# PIConGPU is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# PIConGPU is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with PIConGPU. -# If not, see . -# - - -# PIConGPU batch script for hypnos PBS batch system - -#PBS -q !TBG_queue -#PBS -l walltime=!TBG_wallTime -# Sets batch job's name -#PBS -N !TBG_jobName -#PBS -l nodes=!TBG_nodes:ppn=!TBG_coresPerNode -#PBS -m !TBG_mailSettings -M !TBG_mailAddress -#PBS -d !TBG_dstPath - -#PBS -o stdout -#PBS -e stderr - - -## calculation are done by tbg ## -.TBG_queue="laser" - -# settings that can be controlled by environment variables before submit -.TBG_mailSettings=${MY_MAILNOTIFY:-"n"} -.TBG_mailAddress=${MY_MAIL:-"someone@example.com"} -.TBG_author=${MY_NAME:+--author \"${MY_NAME}\"} -.TBG_profile=${PIC_PROFILE:-"~/picongpu.profile"} - -# 8 packages per node if we need more than 8 ranks else same count as TBG_tasks -.TBG_gpusPerNode=`if [ $TBG_tasks -gt 8 ] ; then echo 8; else echo $TBG_tasks; fi` - -#number of cores per parallel node / default is 2 cores per gpu on k20 queue -.TBG_coresPerNode="$(( TBG_gpusPerNode * 8 ))" - -# use ceil to caculate nodes -.TBG_nodes="$(( ( TBG_tasks + TBG_gpusPerNode -1 ) / TBG_gpusPerNode))" -## end calculations ## - -echo 'Running program...' - -cd !TBG_dstPath - -export MODULES_NO_OUTPUT=1 -source !TBG_profile -if [ $? -ne 0 ] ; then - echo "Error: PIConGPU environment profile under \"!TBG_profile\" not found!" - exit 1 -fi -unset MODULES_NO_OUTPUT - -#set user rights to u=rwx;g=r-x;o=--- -umask 0027 - -mkdir simOutput 2> /dev/null -cd simOutput - -#wait that all nodes see ouput folder -sleep 1 - -# The OMPIO backend in OpenMPI up to 3.1.3 and 4.0.0 is broken, use the -# fallback ROMIO backend instead. -# see bug https://github.com/open-mpi/ompi/issues/6285 -export OMPI_MCA_io=^ompio - -if [ $? -eq 0 ] ; then - mpiexec --prefix $MPIHOME -x LIBRARY_PATH -tag-output --display-map -am !TBG_dstPath/tbg/openib.conf --mca mpi_leave_pinned 0 -npernode !TBG_gpusPerNode -n !TBG_tasks !TBG_dstPath/tbg/cpuNumaStarter.sh !TBG_dstPath/input/bin/picongpu !TBG_author !TBG_programParams | tee output -fi - -mpiexec --prefix $MPIHOME -x LIBRARY_PATH -npernode !TBG_gpusPerNode -n !TBG_tasks /usr/bin/env bash -c "killall -9 picongpu 2>/dev/null || true" diff --git a/etc/picongpu/hypnos-hzdr/laser_picongpu.profile.example b/etc/picongpu/hypnos-hzdr/laser_picongpu.profile.example deleted file mode 100644 index 3752a60e84..0000000000 --- a/etc/picongpu/hypnos-hzdr/laser_picongpu.profile.example +++ /dev/null @@ -1,72 +0,0 @@ -# Name and Path of this Script ############################### (DO NOT change!) -export PIC_PROFILE=$(cd $(dirname $BASH_SOURCE) && pwd)"/"$(basename $BASH_SOURCE) - -# User Information ################################# (edit the following lines) -# - automatically add your name and contact to output file meta data -# - send me mails on batch system job (b)egin, (e)nd, (a)bortion or (n)o mail -export MY_MAILNOTIFY="n" -export MY_MAIL="someone@example.com" -export MY_NAME="$(whoami) <$MY_MAIL>" - -# Text Editor for Tools ###################################### (edit this line) -# - examples: "nano", "vim", "emacs -nw", "vi" or without terminal: "gedit" -#export EDITOR="nano" - -# Modules ##################################################################### -# -if [ -f /etc/profile.modules ] -then - . /etc/profile.modules - module purge -# export MODULES_NO_OUTPUT=1 - - # Core Dependencies - module load gcc/5.3.0 - module load cmake/3.13.4 - module load boost/1.65.1 - module load openmpi/1.8.6 - module load numactl - - # Plugins (optional) - module load zlib/1.2.8 - module load pngwriter/0.7.0 - module load hdf5-parallel/1.8.15 libsplash/1.7.0 - - # either use libSplash or ADIOS for file I/O - #module load adios/1.13.1 - - # Debug Tools - #module load gdb - #module load valgrind/3.8.1 - -# unset MODULES_NO_OUTPUT -fi - -# Environment ################################################################# -# -alias getNode='qsub -I -q laser -lwalltime=00:30:00 -lnodes=1:ppn=64' - -export PICSRC=/home/$(whoami)/src/picongpu -export PIC_EXAMPLES=$PICSRC/share/picongpu/examples -export PIC_BACKEND="omp2b:bdver1" - -export PATH=$PATH:$PICSRC -export PATH=$PATH:$PICSRC/bin -export PATH=$PATH:$PICSRC/src/splash2txt/build -export PATH=$PATH:$PICSRC/src/tools/bin - -export PYTHONPATH=$PICSRC/lib/python:$PYTHONPATH - -# "tbg" default options ####################################################### -# - PBS/Torque (qsub) -# - "laser" queue -export TBG_SUBMIT="qsub" -export TBG_TPLFILE="etc/picongpu/hypnos-hzdr/laser.tpl" - -# Load autocompletion for PIConGPU commands -BASH_COMP_FILE=$PICSRC/bin/picongpu-completion.bash -if [ -f $BASH_COMP_FILE ] ; then - source $BASH_COMP_FILE -else - echo "bash completion file '$BASH_COMP_FILE' not found." >&2 -fi diff --git a/etc/picongpu/jureca-jsc/batch.tpl b/etc/picongpu/jureca-jsc/batch.tpl index e9e111fa5c..e7b9daadcf 100644 --- a/etc/picongpu/jureca-jsc/batch.tpl +++ b/etc/picongpu/jureca-jsc/batch.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov # # This file is part of PIConGPU. # diff --git a/etc/picongpu/jureca-jsc/batch_picongpu.profile.example b/etc/picongpu/jureca-jsc/batch_picongpu.profile.example index 9d46ab1898..5ea730e87e 100644 --- a/etc/picongpu/jureca-jsc/batch_picongpu.profile.example +++ b/etc/picongpu/jureca-jsc/batch_picongpu.profile.example @@ -24,7 +24,7 @@ jutil env activate -p $proj # module purge module load Intel/2019.0.117-GCC-7.3.0 -module load CMake/3.13.0 +module load CMake/3.15.0 module load IntelMPI/2018.4.274 module load Python/3.6.6 module load Boost/1.68.0-Python-3.6.6 diff --git a/etc/picongpu/jureca-jsc/booster.tpl b/etc/picongpu/jureca-jsc/booster.tpl index 5261491b6f..dc4fa6255e 100644 --- a/etc/picongpu/jureca-jsc/booster.tpl +++ b/etc/picongpu/jureca-jsc/booster.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov # # This file is part of PIConGPU. # diff --git a/etc/picongpu/jureca-jsc/booster_picongpu.profile.example b/etc/picongpu/jureca-jsc/booster_picongpu.profile.example index 20a5b0b51b..806c8fe123 100644 --- a/etc/picongpu/jureca-jsc/booster_picongpu.profile.example +++ b/etc/picongpu/jureca-jsc/booster_picongpu.profile.example @@ -25,7 +25,7 @@ jutil env activate -p $proj module purge module load Architecture/KNL module load Intel/2019.0.117-GCC-7.3.0 -module load CMake/3.12.3 +module load CMake/3.15.0 module load IntelMPI/2018.4.274 module load Python/3.6.6 module load Boost/1.68.0-Python-3.6.6 diff --git a/etc/picongpu/jureca-jsc/gpus.tpl b/etc/picongpu/jureca-jsc/gpus.tpl index a6bda9a5e2..8f3550bdbe 100644 --- a/etc/picongpu/jureca-jsc/gpus.tpl +++ b/etc/picongpu/jureca-jsc/gpus.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov # # This file is part of PIConGPU. # @@ -93,7 +93,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] && [ !TBG_numHostedDevicesPerNode # Run CUDA memtest to check GPU's health srun --cpu_bind=sockets !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available or compute node is not exclusively allocated, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available or compute node is not exclusively allocated. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/jureca-jsc/gpus_picongpu.profile.example b/etc/picongpu/jureca-jsc/gpus_picongpu.profile.example index 387fded36b..25b30b7f5d 100644 --- a/etc/picongpu/jureca-jsc/gpus_picongpu.profile.example +++ b/etc/picongpu/jureca-jsc/gpus_picongpu.profile.example @@ -25,7 +25,7 @@ jutil env activate -p $proj module purge module load GCC/7.3.0 module load CUDA/9.2.88 -module load CMake/3.13.0 +module load CMake/3.15.0 module load MVAPICH2/2.3-GDR module load Python/3.6.6 diff --git a/etc/picongpu/juwels-jsc/batch.tpl b/etc/picongpu/juwels-jsc/batch.tpl index 2840946edc..64dd07439f 100644 --- a/etc/picongpu/juwels-jsc/batch.tpl +++ b/etc/picongpu/juwels-jsc/batch.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov # # This file is part of PIConGPU. # @@ -32,7 +32,7 @@ #SBATCH --mem=!TBG_memPerNode #SBATCH --mail-type=!TBG_mailSettings #SBATCH --mail-user=!TBG_mailAddress -#SBATCH --workdir=!TBG_dstPath +#SBATCH --chdir=!TBG_dstPath #SBATCH -o stdout #SBATCH -e stderr diff --git a/etc/picongpu/juwels-jsc/batch_picongpu.profile.example b/etc/picongpu/juwels-jsc/batch_picongpu.profile.example index a6b0260093..87331071d6 100644 --- a/etc/picongpu/juwels-jsc/batch_picongpu.profile.example +++ b/etc/picongpu/juwels-jsc/batch_picongpu.profile.example @@ -10,31 +10,41 @@ export MY_MAIL="someone@example.com" export MY_NAME="$(whoami) <$MY_MAIL>" # Project Information ######################################## (edit this line) -# - project account for computing time -export proj=$(groups | awk '{print $4}') - +# - project and account for allocation +# +# `jutil user projects` will return a table of project associations. +# Each row contains: project,unixgroup,PI-uid,project-type,budget-accounts +# We need the first and last entry. +# Here: select the last available project. +# Alternative: Set proj, account manually +export proj=$( jutil user projects --noheader | awk '{print $1}' | tail -n 1 ) +export account=$(jutil user projects -n | awk '{print $NF}' | tail -n 1) # Text Editor for Tools ###################################### (edit this line) # - examples: "nano", "vim", "emacs -nw", "vi" or without terminal: "gedit" #export EDITOR="nano" - # Set up environment, including $SCRATCH and $PROJECT -jutil env activate -p $proj +# Handle a case where the budgeting account is not set. +if [ $accountt = "-" ]; then + jutil env activate --project $proj; +else + jutil env activate --project $proj --budget $account +fi + # General modules ############################################################# # module purge -module load Intel/2019.0.117-GCC-7.3.0 -module load CMake/3.13.0 -module load IntelMPI/2018.4.274 -module load Python/3.6.6 -module load Boost/1.68.0-Python-3.6.6 +module load Intel/2020.2.254-GCC-9.3.0 +module load CMake/3.18.0 +module load IntelMPI/2019.8.254 +module load Python/3.8.5 + +module load Boost/1.73.0 # Other Software ############################################################## # -module load zlib/.1.2.11 -module load HDF5/1.10.1 -module load libpng/.1.6.35 -export CMAKE_PREFIX_PATH=$EBROOTZLIB:$EBROOTLIBPNG:$CMAKE_PREFIX_PATH +module load HDF5/1.10.6 +#export CMAKE_PREFIX_PATH=$EBROOTZLIB:$EBROOTLIBPNG:$CMAKE_PREFIX_PATH PARTITION_LIB=$PROJECT/lib_batch LIBSPLASH_ROOT=$PARTITION_LIB/libSplash @@ -86,7 +96,7 @@ function getNode() { fi echo "Hint: please use 'srun --cpu_bind=sockets ' for launching multiple processes in the interactive mode" export OMP_NUM_THREADS=48 - salloc --time=1:00:00 --nodes=$numNodes --ntasks-per-node=2 --mem=94000 -A $proj -p batch bash + salloc --time=1:00:00 --nodes=$numNodes --ntasks-per-node=2 --mem=94000 -A $account -p batch bash } # allocate an interactive shell for one hour @@ -104,7 +114,7 @@ function getDevice() { fi echo "Hint: please use 'srun --cpu_bind=sockets ' for launching multiple processes in the interactive mode" export OMP_NUM_THREADS=48 - salloc --time=1:00:00 --ntasks-per-node=$(($numDevices)) --mem=94000 -A $proj -p batch bash + salloc --time=1:00:00 --ntasks-per-node=$(($numDevices)) --mem=94000 -A $account -p batch bash } # Load autocompletion for PIConGPU commands diff --git a/etc/picongpu/juwels-jsc/gpus.tpl b/etc/picongpu/juwels-jsc/gpus.tpl index 6d8c717931..34a69e7414 100644 --- a/etc/picongpu/juwels-jsc/gpus.tpl +++ b/etc/picongpu/juwels-jsc/gpus.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera, Sergei Bastrakov # # This file is part of PIConGPU. # @@ -34,7 +34,7 @@ #SBATCH --gres=gpu:!TBG_devicesPerNode #SBATCH --mail-type=!TBG_mailSettings #SBATCH --mail-user=!TBG_mailAddress -#SBATCH --workdir=!TBG_dstPath +#SBATCH --chdir=!TBG_dstPath #SBATCH -o stdout #SBATCH -e stderr @@ -93,7 +93,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] && [ !TBG_numHostedDevicesPerNode # Run CUDA memtest to check GPU's health srun --cpu_bind=sockets !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available or compute node is not exclusively allocated, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available or compute node is not exclusively allocated. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/juwels-jsc/gpus_picongpu.profile.example b/etc/picongpu/juwels-jsc/gpus_picongpu.profile.example index dd9470b2d8..508ba55ab9 100644 --- a/etc/picongpu/juwels-jsc/gpus_picongpu.profile.example +++ b/etc/picongpu/juwels-jsc/gpus_picongpu.profile.example @@ -10,44 +10,46 @@ export MY_MAIL="someone@example.com" export MY_NAME="$(whoami) <$MY_MAIL>" # Project Information ######################################## (edit this line) -# - project account for computing time -export proj=$(groups | awk '{print $4}') +# - project and account for allocation +# jutil user projects will return a table of project associations. +# Each row contains: project,unixgroup,PI-uid,project-type,budget-accounts +# We need the first and last entry. +# Here: select the last available project. +export proj=$( jutil user projects --noheader | awk '{print $1}' | tail -n 1 ) +export account=$(jutil user projects -n | awk '{print $NF}' | tail -n 1) # Text Editor for Tools ###################################### (edit this line) # - examples: "nano", "vim", "emacs -nw", "vi" or without terminal: "gedit" #export EDITOR="nano" # Set up environment, including $SCRATCH and $PROJECT -jutil env activate -p $proj +# Handle a case where the budgeting account is not set. +if [ "$account" = "-" ]; then + jutil env activate --project $proj; +else + jutil env activate --project $proj --budget $account +fi # General modules ############################################################# # module purge -module load GCC/7.3.0 -module load CUDA/9.2.88 -module load CMake/3.13.0 -module load MVAPICH2/2.3-GDR -module load Python/3.6.6 +module load GCC/9.3.0 +module load CUDA/11.0 +module load CMake/3.18.0 +module load ParaStationMPI/5.4.7-1 +module load mpi-settings/CUDA +module load Python/3.8.5 + +module load Boost/1.74.0 +module load HDF5/1.10.6 +# necessary for evaluations (NumPy, SciPy, Matplotlib, SymPy, Pandas, IPython) +module load SciPy-Stack/2020-Python-3.8.5 # Other Software ############################################################## # -module load zlib/.1.2.11 -module load libpng/.1.6.35 -export CMAKE_PREFIX_PATH=$EBROOTZLIB:$EBROOTLIBPNG:$CMAKE_PREFIX_PATH - -# This is required for Boost to have correct dynamic library dependencies -module load ICU/61.1 -export LD_LIBRARY_PATH=$EBROOTICU/lib:$LD_LIBRARY_PATH - +# Manually installed libraries are stored in PARTITION_LIB PARTITION_LIB=$PROJECT/lib_gpus -BOOST_ROOT=$PARTITION_LIB/boost -export CMAKE_PREFIX_PATH=$BOOST_ROOT:$CMAKE_PREFIX_PATH -export LD_LIBRARY_PATH=$BOOST_ROOT/lib:$LD_LIBRARY_PATH -HDF5_ROOT=$PARTITION_LIB/hdf5 -export PATH=$HDF5_ROOT/bin:$PATH -export CMAKE_PREFIX_PATH=$HDF5_ROOT:$CMAKE_PREFIX_PATH -export LD_LIBRARY_PATH=$HDF5_ROOT/lib:$LD_LIBRARY_PATH LIBSPLASH_ROOT=$PARTITION_LIB/libSplash PNGWRITER_ROOT=$PARTITION_LIB/pngwriter @@ -62,11 +64,8 @@ export PATH=$ADIOS_ROOT/bin:$PATH export CMAKE_PREFIX_PATH=$ADIOS_ROOT:$CMAKE_PREFIX_PATH -export LD_LIBRARY_PATH=$EBROOTICU/lib:$LD_LIBRARY_PATH - # Environment ################################################################# # -#export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$BOOST_LIB export PICSRC=$HOME/src/picongpu export PIC_EXAMPLES=$PICSRC/share/picongpu/examples @@ -97,7 +96,7 @@ function getNode() { return 1 fi echo "Hint: please use 'srun --cpu_bind=sockets ' for launching multiple processes in the interactive mode" - salloc --time=1:00:00 --nodes=$numNodes --ntasks-per-node=4 --gres=gpu:4 --mem=180000 -A $proj -p gpus bash + salloc --time=1:00:00 --nodes=$numNodes --ntasks-per-node=4 --gres=gpu:4 --mem=180000 -A $account -p gpus bash } # allocate an interactive shell for one hour @@ -114,7 +113,7 @@ function getDevice() { fi fi echo "Hint: please use 'srun --cpu_bind=sockets ' for launching multiple processes in the interactive mode" - salloc --time=1:00:00 --ntasks-per-node=$(($numDevices)) --gres=gpu:4 --mem=180000 -A $proj -p gpus bash + salloc --time=1:00:00 --ntasks-per-node=$(($numDevices)) --gres=gpu:4 --mem=180000 -A $account -p gpus bash } # Load autocompletion for PIConGPU commands diff --git a/etc/picongpu/lawrencium-lbnl/fermi.tpl b/etc/picongpu/lawrencium-lbnl/fermi.tpl index 796562c1b2..d7be1d54d4 100644 --- a/etc/picongpu/lawrencium-lbnl/fermi.tpl +++ b/etc/picongpu/lawrencium-lbnl/fermi.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # @@ -106,7 +106,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] ; then # Run CUDA memtest to check GPU's health mpirun !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/lawrencium-lbnl/k20.tpl b/etc/picongpu/lawrencium-lbnl/k20.tpl index 76518fbd1a..c2e7737ef5 100644 --- a/etc/picongpu/lawrencium-lbnl/k20.tpl +++ b/etc/picongpu/lawrencium-lbnl/k20.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # @@ -104,7 +104,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] ; then # Run CUDA memtest to check GPU's health mpirun !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/pizdaint-cscs/large.tpl b/etc/picongpu/pizdaint-cscs/large.tpl index 41c87ffa7b..3bf5a81d26 100644 --- a/etc/picongpu/pizdaint-cscs/large.tpl +++ b/etc/picongpu/pizdaint-cscs/large.tpl @@ -1,5 +1,5 @@ #!/bin/bash -l -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera # # This file is part of PIConGPU. # @@ -82,7 +82,7 @@ ln -s ../stdout output if [ -f !TBG_dstPath/input/bin/cuda_memtest ] ; then srun -n !TBG_tasks !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/pizdaint-cscs/normal.tpl b/etc/picongpu/pizdaint-cscs/normal.tpl index 5de12231bf..2743f2b3c5 100644 --- a/etc/picongpu/pizdaint-cscs/normal.tpl +++ b/etc/picongpu/pizdaint-cscs/normal.tpl @@ -1,5 +1,5 @@ #!/bin/bash -l -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Rene Widera +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Rene Widera # # This file is part of PIConGPU. # @@ -87,7 +87,7 @@ export PMI_NO_PREINITIALIZE=1 if [ -f !TBG_dstPath/input/bin/cuda_memtest ] ; then srun -n !TBG_tasks !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/pizdaint-cscs/picongpu.profile.example b/etc/picongpu/pizdaint-cscs/picongpu.profile.example index 09a10ca0de..42def63ec6 100644 --- a/etc/picongpu/pizdaint-cscs/picongpu.profile.example +++ b/etc/picongpu/pizdaint-cscs/picongpu.profile.example @@ -40,7 +40,7 @@ export CXX=$(which CC) export CRAY_CPU_TARGET=x86-64 # Libraries ################################################################### -module load CMake/3.11.4 +module load CMake/3.15.0 module load cray-mpich/7.6.0 module load cray-hdf5-parallel/1.10.0.3 diff --git a/etc/picongpu/submitAction.sh b/etc/picongpu/submitAction.sh index 064e343abc..fc2fa46bd9 100755 --- a/etc/picongpu/submitAction.sh +++ b/etc/picongpu/submitAction.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/etc/picongpu/summit-ornl/gpu_batch.tpl b/etc/picongpu/summit-ornl/gpu_batch.tpl index 95bc165ca6..7558e058b4 100644 --- a/etc/picongpu/summit-ornl/gpu_batch.tpl +++ b/etc/picongpu/summit-ornl/gpu_batch.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2019-2020 Axel Huebl, Rene Widera +# Copyright 2019-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # @@ -86,6 +86,6 @@ cd simOutput #if [ $? -eq 0 ] ; then export OMP_NUM_THREADS=!TBG_coresPerGPU -jsrun --nrs !TBG_tasks --tasks_per_rs 1 --cpu_per_rs !TBG_coresPerGPU --gpu_per_rs 1 --latency_priority GPU-CPU --bind rs --smpiargs="-gpu" !TBG_dstPath/input/bin/picongpu !TBG_author !TBG_programParams | tee output +jsrun --nrs !TBG_tasks --tasks_per_rs 1 --cpu_per_rs !TBG_coresPerGPU --gpu_per_rs 1 --latency_priority GPU-CPU --bind rs --smpiargs="-gpu" !TBG_dstPath/input/bin/picongpu --mpiDirect !TBG_author !TBG_programParams | tee output # note: instead of the PIConGPU binary, one can also debug starting "js_task_info | sort" #fi diff --git a/etc/picongpu/summit-ornl/gpu_picongpu.profile.example b/etc/picongpu/summit-ornl/gpu_picongpu.profile.example index 6661b76b12..c3284054f1 100644 --- a/etc/picongpu/summit-ornl/gpu_picongpu.profile.example +++ b/etc/picongpu/summit-ornl/gpu_picongpu.profile.example @@ -18,20 +18,23 @@ export proj= #export EDITOR="nano" # basic environment ########################################################### -module load gcc/6.4.0 +module load gcc/8.1.1 export CC=$(which gcc) export CXX=$(which g++) # required tools and libs module load git -module load cmake/3.14.2 -module load cuda/10.1.168 +module load cmake/3.18.2 +module load cuda/10.1.243 module load boost/1.66.0 # plugins (optional) ########################################################## -module load hdf5/1.10.3 -module load adios/1.13.1-py2 c-blosc zfp sz lz4 +module load ums +module load ums-aph114 +module load hdf5/1.10.4 +module load adios/1.13.1-py2 c-blosc zfp/0.5.5 sz lz4 +module load openpmd-api/0.12.0 # optionally download libSplash and compile it yourself from # https://github.com/ComputationalRadiationPhysics/libSplash/ diff --git a/etc/picongpu/taurus-tud/V100.tpl b/etc/picongpu/taurus-tud/V100.tpl index 8f6dbbf922..821e3b60ca 100644 --- a/etc/picongpu/taurus-tud/V100.tpl +++ b/etc/picongpu/taurus-tud/V100.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Alexander Debus, Klaus Steiniger +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Alexander Debus, Klaus Steiniger # # This file is part of PIConGPU. # @@ -107,7 +107,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] ; then # Run CUDA memtest to check GPU's health srun -K1 !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/taurus-tud/V100_picongpu.profile.example b/etc/picongpu/taurus-tud/V100_picongpu.profile.example index c7f71d5df2..2175755b40 100644 --- a/etc/picongpu/taurus-tud/V100_picongpu.profile.example +++ b/etc/picongpu/taurus-tud/V100_picongpu.profile.example @@ -19,7 +19,7 @@ module switch modenv/ml # load CUDA/9.2.88-GCC-7.3.0-2.30, also loads GCC/7.3.0-2.30, zlib, OpenMPI and others module load fosscuda/2018b -module load CMake/3.11.4-GCCcore-7.3.0 +module load CMake/3.15.0-GCCcore-7.3.0 module load libpng/1.6.34-GCCcore-7.3.0 printf "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n" diff --git a/etc/picongpu/taurus-tud/V100_restart.tpl b/etc/picongpu/taurus-tud/V100_restart.tpl index 8e34ff0d1b..168932b76d 100644 --- a/etc/picongpu/taurus-tud/V100_restart.tpl +++ b/etc/picongpu/taurus-tud/V100_restart.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Alexander Debus, Klaus Steiniger +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Alexander Debus, Klaus Steiniger # # This file is part of PIConGPU. # @@ -175,7 +175,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] ; then # Run CUDA memtest to check GPU's health mpiexec -hostfile ../machinefile.txt !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/taurus-tud/k20x.tpl b/etc/picongpu/taurus-tud/k20x.tpl index d7be22efe9..79dee88d89 100644 --- a/etc/picongpu/taurus-tud/k20x.tpl +++ b/etc/picongpu/taurus-tud/k20x.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch +# Copyright 2013-2021 Axel Huebl, Richard Pausch # # This file is part of PIConGPU. # @@ -97,7 +97,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] ; then # Run CUDA memtest to check GPU's health srun -K1 !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/taurus-tud/k20x_picongpu.profile.example b/etc/picongpu/taurus-tud/k20x_picongpu.profile.example index 37437867a9..ab3c69a9ba 100644 --- a/etc/picongpu/taurus-tud/k20x_picongpu.profile.example +++ b/etc/picongpu/taurus-tud/k20x_picongpu.profile.example @@ -22,7 +22,7 @@ export proj=$(groups | awk '{print $1}') module load modenv/scs5 module load foss/2018a module load GCC/6.4.0-2.28 -module load CMake/3.11.4-GCCcore-6.4.0 +module load CMake/3.15.0-GCCcore-6.4.0 module load CUDA/9.2.88 # gcc <= 7, intel 15-17 module load OpenMPI/2.1.2-GCC-6.4.0-2.28 diff --git a/etc/picongpu/taurus-tud/k80.tpl b/etc/picongpu/taurus-tud/k80.tpl index 279bdc9e43..777b920c23 100644 --- a/etc/picongpu/taurus-tud/k80.tpl +++ b/etc/picongpu/taurus-tud/k80.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch +# Copyright 2013-2021 Axel Huebl, Richard Pausch # # This file is part of PIConGPU. # @@ -97,7 +97,7 @@ if [ -f !TBG_dstPath/input/bin/cuda_memtest ] ; then # Run CUDA memtest to check GPU's health srun -K1 !TBG_dstPath/input/bin/cuda_memtest.sh else - echo "no binary 'cuda_memtest' available, skip GPU memory test" >&2 + echo "Note: GPU memory test was skipped as no binary 'cuda_memtest' available. This does not affect PIConGPU, starting it now" >&2 fi if [ $? -eq 0 ] ; then diff --git a/etc/picongpu/taurus-tud/k80_picongpu.profile.example b/etc/picongpu/taurus-tud/k80_picongpu.profile.example index b7fc7d1c63..11dc9799aa 100644 --- a/etc/picongpu/taurus-tud/k80_picongpu.profile.example +++ b/etc/picongpu/taurus-tud/k80_picongpu.profile.example @@ -22,7 +22,7 @@ export proj=$(groups | awk '{print $1}') module load modenv/scs5 module load foss/2018a module load GCC/6.4.0-2.28 -module load CMake/3.11.4-GCCcore-6.4.0 +module load CMake/3.16.0-GCCcore-6.4.0 module load CUDA/9.2.88 # gcc <= 7, intel 15-17 module load OpenMPI/2.1.2-GCC-6.4.0-2.28 diff --git a/etc/picongpu/taurus-tud/knl.tpl b/etc/picongpu/taurus-tud/knl.tpl index a49a6742d1..7bc35023e8 100644 --- a/etc/picongpu/taurus-tud/knl.tpl +++ b/etc/picongpu/taurus-tud/knl.tpl @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2013-2020 Axel Huebl, Richard Pausch, Alexander Matthes +# Copyright 2013-2021 Axel Huebl, Richard Pausch, Alexander Matthes # # This file is part of PIConGPU. # diff --git a/etc/picongpu/taurus-tud/knl_picongpu.profile.example b/etc/picongpu/taurus-tud/knl_picongpu.profile.example index 4ff09580c2..53ca4ff717 100644 --- a/etc/picongpu/taurus-tud/knl_picongpu.profile.example +++ b/etc/picongpu/taurus-tud/knl_picongpu.profile.example @@ -22,7 +22,7 @@ export proj=$(groups | awk '{print $1}') module load modenv/scs5 module load iimpi/2018a module load git/2.18.0-GCCcore-6.4.0 -module load CMake/3.11.4-GCCcore-7.3.0 +module load CMake/3.15.0-GCCcore-7.3.0 module load Boost/1.66.0-intel-2018a module load HDF5/1.10.1-intel-2018a module load libpng/1.6.34-GCCcore-7.3.0 diff --git a/include/mpiInfo/CMakeLists.txt b/include/mpiInfo/CMakeLists.txt index 43478f36a3..d77db5915a 100644 --- a/include/mpiInfo/CMakeLists.txt +++ b/include/mpiInfo/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt # # This file is part of mpiInfo. # @@ -22,7 +22,7 @@ # Required cmake version ################################################################################ -cmake_minimum_required(VERSION 3.11.4) +cmake_minimum_required(VERSION 3.15.0) ################################################################################ @@ -61,10 +61,10 @@ endif() # Language Flags ############################################################################### -# enforce C++11 +# enforce C++14 set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) ################################################################################ diff --git a/include/mpiInfo/mpiInfo.cpp b/include/mpiInfo/mpiInfo.cpp index e2836a3b38..53a6bff6c6 100644 --- a/include/mpiInfo/mpiInfo.cpp +++ b/include/mpiInfo/mpiInfo.cpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of mpiInfo. * @@ -27,7 +27,15 @@ #include -#define MPI_CHECK(cmd) {int error = cmd; if(error!=MPI_SUCCESS){printf("<%s>:%i ",__FILE__,__LINE__); throw std::runtime_error(std::string("[MPI] Error"));}} +#define MPI_CHECK(cmd) \ + { \ + int error = cmd; \ + if(error != MPI_SUCCESS) \ + { \ + printf("<%s>:%i ", __FILE__, __LINE__); \ + throw std::runtime_error(std::string("[MPI] Error")); \ + } \ + } namespace po = boost::program_options; @@ -44,15 +52,12 @@ enum * name like p1223(Pid=1233) is than p1223 * in some MPI implementation /mpich) the hostname is unique */ -void cleanHostname( char* name ) +void cleanHostname(char* name) { - for ( int i = 0; i < MPI_MAX_PROCESSOR_NAME; ++i ) + for(int i = 0; i < MPI_MAX_PROCESSOR_NAME; ++i) { - if ( !( name[i] >= 'A' && name[i] <= 'Z' ) && - !( name[i] >= 'a' && name[i] <= 'z' ) && - !( name[i] >= '0' && name[i] <= '9' ) && - !( name[i] == '_' ) && - !( name[i] == '-' ) ) + if(!(name[i] >= 'A' && name[i] <= 'Z') && !(name[i] >= 'a' && name[i] <= 'z') + && !(name[i] >= '0' && name[i] <= '9') && !(name[i] == '_') && !(name[i] == '-')) { name[i] = 0; return; @@ -68,7 +73,7 @@ void cleanHostname( char* name ) * from the master. * */ -int getHostRank( ) +int getHostRank() { char hostname[MPI_MAX_PROCESSOR_NAME]; int length; @@ -77,99 +82,102 @@ int getHostRank( ) int totalnodes; int myrank; - MPI_CHECK( MPI_Get_processor_name( hostname, &length ) ); - cleanHostname( hostname ); + MPI_CHECK(MPI_Get_processor_name(hostname, &length)); + cleanHostname(hostname); hostname[length++] = '\0'; - //int totalnodes; + // int totalnodes; - MPI_CHECK( MPI_Comm_size( MPI_COMM_WORLD, &totalnodes ) ); - MPI_CHECK( MPI_Comm_rank( MPI_COMM_WORLD, &myrank ) ); + MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &totalnodes)); + MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &myrank)); - if ( myrank == 0 ) + if(myrank == 0) { - std::map hosts; hosts[hostname] = 0; hostRank = 0; - for ( int rank = 1; rank < totalnodes; ++rank ) + for(int rank = 1; rank < totalnodes; ++rank) { - - MPI_CHECK( MPI_Recv( hostname, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, rank, gridHostnameTag, MPI_COMM_WORLD, MPI_STATUS_IGNORE ) ); - - //printf("Hostname: %s\n", hostname); + MPI_CHECK(MPI_Recv( + hostname, + MPI_MAX_PROCESSOR_NAME, + MPI_CHAR, + rank, + gridHostnameTag, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE)); + + // printf("Hostname: %s\n", hostname); int hostrank = 0; - if ( hosts.count( hostname ) > 0 ) hostrank = hosts[hostname] + 1; + if(hosts.count(hostname) > 0) + hostrank = hosts[hostname] + 1; - MPI_CHECK( MPI_Send( &hostrank, 1, MPI_INT, rank, gridHostRankTag, MPI_COMM_WORLD ) ); + MPI_CHECK(MPI_Send(&hostrank, 1, MPI_INT, rank, gridHostRankTag, MPI_COMM_WORLD)); hosts[hostname] = hostrank; - - } - } else { - MPI_CHECK( MPI_Send( hostname, length, MPI_CHAR, 0, gridHostnameTag, MPI_COMM_WORLD ) ); + MPI_CHECK(MPI_Send(hostname, length, MPI_CHAR, 0, gridHostnameTag, MPI_COMM_WORLD)); - MPI_CHECK( MPI_Recv( &hostRank, 1, MPI_INT, 0, gridHostRankTag, MPI_COMM_WORLD, MPI_STATUS_IGNORE ) ); + MPI_CHECK(MPI_Recv(&hostRank, 1, MPI_INT, 0, gridHostRankTag, MPI_COMM_WORLD, MPI_STATUS_IGNORE)); // if(hostRank!=0) hostRank--; //!\todo fix mpi hostrank start with 1 } return hostRank; } -int getMyRank( ) +int getMyRank() { int myrank; - MPI_CHECK( MPI_Comm_rank( MPI_COMM_WORLD, &myrank ) ); + MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &myrank)); return myrank; } -int getTotalRanks( ) +int getTotalRanks() { int totalnodes; - MPI_CHECK( MPI_Comm_size( MPI_COMM_WORLD, &totalnodes ) ); + MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &totalnodes)); return totalnodes; } -int main( int argc, char** argv ) +int main(int argc, char** argv) { bool localRank = false; bool myRank = false; bool totalRank = false; - po::options_description desc( "Allowed options" ); - desc.add_options( ) - ( "help,h", "produce help message" ) - ( "mpi_host_rank", po::value ( &localRank )->zero_tokens( ), "get local mpi rank" ) - ( "mpi_rank", po::value ( &myRank )->zero_tokens( ), "get mpi rank" ) - ( "mpi_size", po::value ( &totalRank )->zero_tokens( ), "get count of mpi ranks" ); + po::options_description desc("Allowed options"); + desc.add_options()( + "help,h", + "produce help message")("mpi_host_rank", po::value(&localRank)->zero_tokens(), "get local mpi rank")( + "mpi_rank", + po::value(&myRank)->zero_tokens(), + "get mpi rank")("mpi_size", po::value(&totalRank)->zero_tokens(), "get count of mpi ranks"); // parse command line options and config file and store values in vm po::variables_map vm; - po::store( boost::program_options::parse_command_line( argc, argv, desc ), vm ); - po::notify( vm ); + po::store(boost::program_options::parse_command_line(argc, argv, desc), vm); + po::notify(vm); // print help message and quit simulation - if ( vm.count( "help" ) ) + if(vm.count("help")) { std::cerr << desc << "\n"; - return false; + return 0; } - MPI_CHECK( MPI_Init( &argc, &argv ) ); - if ( localRank ) - std::cout << "mpi_host_rank: " << getHostRank( ) << std::endl; - if ( myRank ) - std::cout << "mpi_rank: " << getMyRank( ) << std::endl; - if ( totalRank ) - std::cout << "mpi_size: " << getTotalRanks( ) << std::endl; + MPI_CHECK(MPI_Init(&argc, &argv)); + if(localRank) + std::cout << "mpi_host_rank: " << getHostRank() << std::endl; + if(myRank) + std::cout << "mpi_rank: " << getMyRank() << std::endl; + if(totalRank) + std::cout << "mpi_size: " << getTotalRanks() << std::endl; - MPI_CHECK( MPI_Finalize( ) ); + MPI_CHECK(MPI_Finalize()); return 0; } - diff --git a/include/picongpu/ArgsParser.cpp b/include/picongpu/ArgsParser.cpp index 766c7290cd..a497b08aba 100644 --- a/include/picongpu/ArgsParser.cpp +++ b/include/picongpu/ArgsParser.cpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Rene Widera, * Benjamin Worpitz * * This file is part of PIConGPU. @@ -34,74 +34,66 @@ namespace picongpu { - -namespace -{ - - /** Report deprecated parameters - * - * This function is meant to handle cases when some parameters are changed - * but the old versions temporarily kept for backward compatibility and - * deprecated. Notably, this applies to compile-time parameters getting a - * run-time version. Hence it deliberately ignores incapsulation and code - * duplication and simply has a hardcoded set of cases. - */ - void reportDeprecated( boost::program_options::variables_map const & vm ) + namespace { - using pmacc::log; - using Level = PIConGPUVerbose::PHYSICS; - - /* Moving window: a new run-time parameter 'windowMovePoint' to replace - * compile-time 'movePoint' variable + /** Report deprecated parameters + * + * This function is meant to handle cases when some parameters are changed + * but the old versions temporarily kept for backward compatibility and + * deprecated. Notably, this applies to compile-time parameters getting a + * run-time version. Hence it deliberately ignores incapsulation and code + * duplication and simply has a hardcoded set of cases. */ - bool isMovingWindowEnabled = !vm[ "moving" ].empty(); - if( isMovingWindowEnabled ) + void reportDeprecated(boost::program_options::variables_map const& vm) { - bool isWindowMovePointSet = !vm[ "windowMovePoint" ].defaulted( ); - if( !isWindowMovePointSet ) - log< Level >( - "Warning: Compile-time variable 'movePoint' in grid.param " - "is deprecated. It is currently still required for " - "building purposes. Please keep the variable in your " - "grid.param, but for future compatibility set this value " - "using the 'windowMovePoint' parameter in your .cfg file. " - "The value of movePoint is the default for windowMovePoint, " - "setting the latter explicitly will override this." - ); + using pmacc::log; + using Level = PIConGPUVerbose::PHYSICS; + + /* Moving window: a new run-time parameter 'windowMovePoint' to replace + * compile-time 'movePoint' variable + */ + bool isMovingWindowEnabled = !vm["moving"].empty(); + if(isMovingWindowEnabled) + { + bool isWindowMovePointSet = !vm["windowMovePoint"].defaulted(); + if(!isWindowMovePointSet) + log("Warning: Compile-time variable 'movePoint' in grid.param " + "is deprecated. It is currently still required for " + "building purposes. Please keep the variable in your " + "grid.param, but for future compatibility set this value " + "using the 'windowMovePoint' parameter in your .cfg file. " + "The value of movePoint is the default for windowMovePoint, " + "setting the latter explicitly will override this."); + } } - } -} // anonymous namespace + } // anonymous namespace - ArgsParser::ArgsParser( ) + ArgsParser::ArgsParser() { - } - ArgsParser::ArgsParser( ArgsParser& ) + ArgsParser::ArgsParser(ArgsParser&) { - } - template - bool from_string( T& t, - const std::string& s, - std::ios_base& ( *f )( std::ios_base& ) ) + template + bool from_string(T& t, const std::string& s, std::ios_base& (*f)(std::ios_base&) ) { - std::istringstream iss( s ); - if ( ( iss >> f >> t ).fail( ) ) - throw std::invalid_argument( "convertion invalid!" ); + std::istringstream iss(s); + if((iss >> f >> t).fail()) + throw std::invalid_argument("convertion invalid!"); return true; } - ArgsParser& ArgsParser::getInstance( ) + ArgsParser& ArgsParser::getInstance() { static ArgsParser instance; return instance; } - ArgsParser::Status ArgsParser::parse( int argc, char** argv ) + ArgsParser::Status ArgsParser::parse(int argc, char** argv) { namespace po = boost::program_options; @@ -111,67 +103,66 @@ namespace std::stringstream desc_stream; desc_stream << "Usage picongpu [-d dx=1 dy=1 dz=1] -g width height depth [options]" << std::endl; - po::options_description desc( desc_stream.str( ) ); + po::options_description desc(desc_stream.str()); std::vector config_files; // add possible options - desc.add_options() - ( "help,h", "print help message and exit" ) - ( "validate", "validate command line parameters and exit" ) - ( "version,v", "print version information and exit" ) - ( "config,c", po::value > ( &config_files )->multitoken( ), "Config file(s)" ) - ; + desc.add_options()("help,h", "print help message and exit")( + "validate", + "validate command line parameters and exit")("version,v", "print version information and exit")( + "config,c", + po::value>(&config_files)->multitoken(), + "Config file(s)"); // add all options from plugins - for ( std::list::iterator iter = options.begin( ); - iter != options.end( ); ++iter ) - desc.add( *iter ); + for(std::list::iterator iter = options.begin(); iter != options.end(); ++iter) + desc.add(*iter); // parse command line options and config file and store values in vm po::variables_map vm; - //log ("parsing command line"); - po::store( po::parse_command_line( argc, argv, desc ), vm ); + // log ("parsing command line"); + po::store(po::parse_command_line(argc, argv, desc), vm); - if ( vm.count( "config" ) ) + if(vm.count("config")) { - std::vector conf_files = vm["config"].as >( ); + std::vector conf_files = vm["config"].as>(); - for ( std::vector::const_iterator iter = conf_files.begin( ); - iter != conf_files.end( ); ++iter ) + for(std::vector::const_iterator iter = conf_files.begin(); iter != conf_files.end(); + ++iter) { - //log ("parsing config file '%1%'") % (*iter); - std::ifstream config_file_stream( iter->c_str( ) ); - po::store( po::parse_config_file( config_file_stream, desc ), vm ); + // log ("parsing config file '%1%'") % (*iter); + std::ifstream config_file_stream(iter->c_str()); + po::store(po::parse_config_file(config_file_stream, desc), vm); } } - po::notify( vm ); + po::notify(vm); // print help message and quit simulation - if ( vm.count( "help" ) ) + if(vm.count("help")) { std::cout << desc << "\n"; return Status::successExit; } // print versions of dependent software - if ( vm.count( "version" ) ) + if(vm.count("version")) { - void( getSoftwareVersions( std::cout ) ); + void(getSoftwareVersions(std::cout)); return Status::successExit; } // no parameters set: required parameters (e.g., -g) will be missing // -> obvious wrong usage // -> print help and exit with error code - if ( argc == 1 ) // argc[0] is always the program name + if(argc == 1) // argc[0] is always the program name { std::cerr << desc << "\n"; return Status::error; } - reportDeprecated( vm ); + reportDeprecated(vm); - if ( vm.count( "validate" ) ) + if(vm.count("validate")) { /* if we reach this part of code the parameters are valid * and the option `validate` is set. @@ -179,7 +170,7 @@ namespace return Status::successExit; } } - catch ( const po::error& e ) + catch(const po::error& e) { std::cerr << e.what() << std::endl; return Status::error; @@ -188,4 +179,4 @@ namespace return Status::success; } -} +} // namespace picongpu diff --git a/include/picongpu/ArgsParser.hpp b/include/picongpu/ArgsParser.hpp index 4f39455e59..9f4808069f 100644 --- a/include/picongpu/ArgsParser.hpp +++ b/include/picongpu/ArgsParser.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Rene Widera, * Benjamin Worpitz * * This file is part of PIConGPU. @@ -41,7 +41,6 @@ namespace picongpu class ArgsParser { public: - //! Parsing status enum Status { @@ -69,7 +68,7 @@ namespace picongpu * @param argv command line arguments * @return parsing status */ - Status parse(int argc, char **argv); + Status parse(int argc, char** argv); private: /** @@ -82,4 +81,4 @@ namespace picongpu std::list options; }; -} +} // namespace picongpu diff --git a/include/picongpu/CMakeLists.txt b/include/picongpu/CMakeLists.txt index 618658b450..f0147788a8 100644 --- a/include/picongpu/CMakeLists.txt +++ b/include/picongpu/CMakeLists.txt @@ -1,5 +1,6 @@ -# Copyright 2013-2020 Axel Huebl, Benjamin Schneider, Felix Schmitt, Heiko Burau, -# Rene Widera, Alexander Grund, Alexander Matthes +# Copyright 2013-2021 Axel Huebl, Benjamin Schneider, Felix Schmitt, Heiko Burau, +# Rene Widera, Alexander Grund, Alexander Matthes, +# Franz Poeschel, Richard Pausch # # This file is part of PIConGPU. # @@ -22,7 +23,7 @@ # Required cmake version ################################################################################ -cmake_minimum_required(VERSION 3.11.4) +cmake_minimum_required(VERSION 3.15.0) ################################################################################ @@ -42,6 +43,7 @@ list(APPEND CMAKE_PREFIX_PATH "$ENV{CUDA_ROOT}") list(APPEND CMAKE_PREFIX_PATH "$ENV{BOOST_ROOT}") list(APPEND CMAKE_PREFIX_PATH "$ENV{HDF5_ROOT}") list(APPEND CMAKE_PREFIX_PATH "$ENV{ADIOS_ROOT}") +list(APPEND CMAKE_PREFIX_PATH "$ENV{OPENPMD_ROOT}") # Add from environment after specific env vars list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}") @@ -62,10 +64,10 @@ endif() # Language Flags ############################################################################### -# enforce C++11 +# enforce C++14 set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) ################################################################################ @@ -118,27 +120,13 @@ set(LIBS ${LIBS} ${CMAKE_THREAD_LIBS_INIT}) # Find OpenMP ################################################################################ -find_package(OpenMP) -if(OPENMP_FOUND) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") -endif() - - -################################################################################ -# Find mallocMC -################################################################################ -if(ALPAKA_ACC_GPU_CUDA_ENABLE) - find_package(mallocMC 2.3.0 QUIET) - - if(NOT mallocMC_FOUND) - message(STATUS "Using mallocMC from thirdParty/ directory") - set(MALLOCMC_ROOT "${PIConGPUapp_SOURCE_DIR}/../../thirdParty/mallocMC") - find_package(mallocMC 2.3.0 REQUIRED) - endif(NOT mallocMC_FOUND) - - include_directories(SYSTEM ${mallocMC_INCLUDE_DIRS}) - add_definitions(${mallocMC_DEFINITIONS}) - set(LIBS ${LIBS} ${mallocMC_LIBRARIES}) +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" AND ALPAKA_ACC_GPU_CUDA_ENABLE AND ALPAKA_CUDA_COMPILER MATCHES "clang") + message(WARNING "OpenMP host side acceleration is disabled: CUDA compilation with clang is not supporting OpenMP.") +else() + find_package(OpenMP) + if(OPENMP_FOUND) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + endif() endif() @@ -233,21 +221,6 @@ set(PIC_VERBOSE "1" CACHE STRING add_definitions(-DPIC_VERBOSE_LVL=${PIC_VERBOSE}) -################################################################################ -# ADIOS -################################################################################ - -# find adios installation -# set(ADIOS_USE_STATIC_LIBS ON) # force static linking -find_package(ADIOS 1.13.1) - -if(ADIOS_FOUND) - add_definitions(-DENABLE_ADIOS=1) - include_directories(SYSTEM ${ADIOS_INCLUDE_DIRS}) - set(LIBS ${LIBS} ${ADIOS_LIBRARIES}) -endif(ADIOS_FOUND) - - ################################################################################ # Additional defines for PIConGPU outputs ################################################################################ @@ -293,6 +266,66 @@ elseif(MSVC) endif() +################################################################################ +# openPMD +################################################################################ + +# find openPMD installation +find_package(openPMD 0.12.0 CONFIG COMPONENTS MPI) + +if(openPMD_FOUND) + if(openPMD_HAVE_ADIOS2 OR openPMD_HAVE_HDF5) + message(STATUS "Found openPMD: ${openPMD_DIR}") + add_definitions(-DENABLE_OPENPMD=1) + + # non of these should appear in cmake-gui, so make them internal + set(JSON_BuildTests OFF CACHE INTERNAL "") + set(JSON_MultipleHeaders OFF CACHE INTERNAL "") + set(JSON_ImplicitConversions OFF CACHE INTERNAL "") + set(JSON_Install OFF CACHE INTERNAL "") # only used PRIVATE + + # allow to use externally installed nlohmann_json + set( + PIC_nlohmann_json_PROVIDER "intern" CACHE + STRING "Use internally shipped or external nlohmann_json library.") + set_property( + CACHE PIC_nlohmann_json_PROVIDER + PROPERTY STRINGS "intern;extern") + mark_as_advanced(PIC_nlohmann_json_PROVIDER) + if(${PIC_nlohmann_json_PROVIDER} STREQUAL "intern") + add_subdirectory( + "${PIConGPUapp_SOURCE_DIR}/../../thirdParty/nlohmann_json" + "${CMAKE_CURRENT_BINARY_DIR}/build_nlohmann_json") + else() + find_package(nlohmann_json 3.9.1 CONFIG REQUIRED) + message(STATUS "nlohmann-json: Found version '${nlohmann_json_VERSION}'") + endif() + set(LIBS ${LIBS} openPMD::openPMD) + else() + message(STATUS "Found openPMD at ${openPMD_DIR}, but PIConGPU requires" + " availability of either its ADIOS2 or HDF5 backend - " + "NOT BUILDING the openPMD plugin") + endif() +else(openPMD_FOUND) + message(STATUS "Could NOT find openPMD - set openPMD_DIR or check your CMAKE_PREFIX_PATH") +endif(openPMD_FOUND) + + +################################################################################ +# ADIOS +################################################################################ + +# find adios installation +# set(ADIOS_USE_STATIC_LIBS ON) # force static linking +find_package(ADIOS 1.13.1) + +if(ADIOS_FOUND) + add_definitions(-DENABLE_ADIOS=1) + include_directories(SYSTEM ${ADIOS_INCLUDE_DIRS}) + set(LIBS ${LIBS} ${ADIOS_LIBRARIES}) +endif(ADIOS_FOUND) + + ################################################################################ # libSplash (+ hdf5 due to required headers) ################################################################################ @@ -368,6 +401,21 @@ else(ISAAC_FOUND) endif() endif(ISAAC_FOUND) +################################################################################ +# PIConGPU Workarounds +################################################################################ + +set(PIC_COMPUTE_CURRENT_THREAD_LIMITER_DEFAULT OFF) +if(ALPAKA_ACC_GPU_HIP_ENABLE) + set(PIC_COMPUTE_CURRENT_THREAD_LIMITER_DEFAULT ON) +endif() +option(PIC_COMPUTE_CURRENT_THREAD_LIMITER "Compute current results with HIP alpaka backend are wrong when more threads than particles in a frame will be used (possible compiler BUG).\ + ON means the number of threads will be limited to number of particles in a frame." ${PIC_COMPUTE_CURRENT_THREAD_LIMITER_DEFAULT}) + +if(PIC_COMPUTE_CURRENT_THREAD_LIMITER) + add_definitions(-DPIC_COMPUTE_CURRENT_THREAD_LIMITER=1) +endif() + ################################################################################ # Check if PIC_EXTENSION_PATH is relative or absolute ################################################################################ @@ -416,6 +464,16 @@ cupla_add_executable(picongpu target_link_libraries(picongpu PUBLIC ${LIBS} picongpu-hostonly) +if(openPMD_FOUND) + # Including will throw loads of warnings. Quiet them. + # (Doesn't work for nvcc??) + target_include_directories( + picongpu-hostonly + SYSTEM PRIVATE + $) + target_link_libraries(picongpu-hostonly PRIVATE nlohmann_json::nlohmann_json) +endif() + ################################################################################ # Clang-Tidy (3.9+) Target for CI @@ -456,7 +514,7 @@ if(${CLANG_TIDY_RETURN} EQUAL 0) # -checks='-*,modernize-use-using' # -fix # -fix-errors -- - -std=c++11 + -std=c++14 ${OpenMP_CXX_FLAGS} ${ALL_INCLUDES_STR} ${ALL_DEFINES_STR} diff --git a/include/picongpu/_defaultParam.loader b/include/picongpu/_defaultParam.loader index 0f6362d47f..ab50dd3769 100644 --- a/include/picongpu/_defaultParam.loader +++ b/include/picongpu/_defaultParam.loader @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten, Finn-Ole Carstens +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten, Finn-Ole Carstens * * This file is part of PIConGPU. * @@ -26,12 +26,12 @@ #pragma once #include "picongpu/param/dimension.param" -#if( PMACC_CUDA_ENABLED == 1 ) -# include "picongpu/param/mallocMC.param" +#include "picongpu/param/precision.param" +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) +# include "picongpu/param/mallocMC.param" #endif #include "picongpu/param/memory.param" #include "picongpu/param/random.param" -#include "picongpu/param/precision.param" #include "picongpu/param/physicalConstants.param" #include "picongpu/param/flylite.param" #include "picongpu/param/speciesConstants.param" @@ -46,8 +46,8 @@ #include "picongpu/param/pml.param" #include "picongpu/param/unit.param" #include "picongpu/param/particleFilters.param" -#if( PMACC_CUDA_ENABLED == 1 ) -# include "picongpu/param/bremsstrahlung.param" +#if(PMACC_CUDA_ENABLED == 1) +# include "picongpu/param/bremsstrahlung.param" #endif #include "picongpu/param/radiation.param" #include "picongpu/param/transitionRadiation.param" @@ -65,3 +65,6 @@ #include "picongpu/param/isaac.param" #include "picongpu/param/radiationObserver.param" #include "picongpu/param/particleMerger.param" +#if(ENABLE_OPENPMD == 1) +# include "picongpu/param/xrayScattering.param" +#endif diff --git a/include/picongpu/_defaultUnitless.loader b/include/picongpu/_defaultUnitless.loader index 4a2679af5a..24cf2a081d 100644 --- a/include/picongpu/_defaultUnitless.loader +++ b/include/picongpu/_defaultUnitless.loader @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Marco Garten, Finn-Ole Carstens * * This file is part of PIConGPU. @@ -39,7 +39,7 @@ #include "picongpu/unitless/speciesInitialization.unitless" #include "picongpu/unitless/fieldBackground.unitless" #include "picongpu/unitless/synchrotronPhotons.unitless" -#if( PMACC_CUDA_ENABLED == 1 ) +#if(PMACC_CUDA_ENABLED == 1) # include "picongpu/unitless/bremsstrahlung.unitless" #endif diff --git a/include/picongpu/algorithms/AssignedTrilinearInterpolation.hpp b/include/picongpu/algorithms/AssignedTrilinearInterpolation.hpp index 80eb489cb8..0b7e34abe2 100644 --- a/include/picongpu/algorithms/AssignedTrilinearInterpolation.hpp +++ b/include/picongpu/algorithms/AssignedTrilinearInterpolation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -32,28 +32,21 @@ namespace picongpu namespace pmacc { -namespace result_of -{ - - template< typename T_Cursor > - struct Functor< - picongpu::AssignedTrilinearInterpolation, - T_Cursor - > + namespace result_of { - using type = - typename boost::remove_reference< typename T_Cursor::type >::type; - }; + template + struct Functor + { + using type = typename boost::remove_reference::type; + }; -} // result_of -} // pmacc + } // namespace result_of +} // namespace pmacc namespace picongpu { - struct AssignedTrilinearInterpolation { - /** Does a 3D trilinear field-to-point interpolation for * arbitrary assignment function and arbitrary field_value types. * @@ -67,95 +60,57 @@ namespace picongpu * * interpolate on grid points in range [T_begin;T_end] */ - template< - typename T_AssignmentFunction, - int T_begin, - int T_end, - typename T_Cursor - > - HDINLINE static - auto - interpolate( - const T_Cursor& cursor, - const float3_X & pos - ) - -> typename ::pmacc::result_of::Functor< - AssignedTrilinearInterpolation, - T_Cursor - >::type + template + HDINLINE static auto interpolate(const T_Cursor& cursor, const float3_X& pos) -> + typename ::pmacc::result_of::Functor::type { - using type = typename ::pmacc::result_of::Functor< - AssignedTrilinearInterpolation, - T_Cursor - >::type; + using type = typename ::pmacc::result_of::Functor::type; - type result_z = type( 0.0 ); - for( int z = T_begin; z <= T_end; ++z ) + type result_z = type(0.0); + for(int z = T_begin; z <= T_end; ++z) { - type result_y = type( 0.0 ); - for( int y = T_begin; y <= T_end; ++y ) + type result_y = type(0.0); + for(int y = T_begin; y <= T_end; ++y) { - type result_x = type( 0.0 ); - for( int x = T_begin; x <= T_end; ++x ) + type result_x = type(0.0); + for(int x = T_begin; x <= T_end; ++x) /* a form factor is the "amount of particle" that is affected by this cell * so we have to sum over: cell_value * form_factor */ - result_x += *cursor( x, y, z ) * T_AssignmentFunction()( float_X( x ) - pos.x() ); + result_x += *cursor(x, y, z) * T_AssignmentFunction()(float_X(x) - pos.x()); - result_y += result_x * T_AssignmentFunction()( float_X( y ) - pos.y() ); + result_y += result_x * T_AssignmentFunction()(float_X(y) - pos.y()); } - result_z += result_y * T_AssignmentFunction()( float_X( z ) - pos.z() ); + result_z += result_y * T_AssignmentFunction()(float_X(z) - pos.z()); } return result_z; } /** Implementation for 2D position*/ - template< - class T_AssignmentFunction, - int T_begin, - int T_end, - class T_Cursor - > - HDINLINE static - auto - interpolate( - T_Cursor const & cursor, - float2_X const & pos - ) - -> typename ::pmacc::result_of::Functor< - AssignedTrilinearInterpolation, - T_Cursor - >::type + template + HDINLINE static auto interpolate(T_Cursor const& cursor, float2_X const& pos) -> + typename ::pmacc::result_of::Functor::type { - using type = typename ::pmacc::result_of::Functor< - AssignedTrilinearInterpolation, - T_Cursor - >::type; + using type = typename ::pmacc::result_of::Functor::type; - type result_y = type( 0.0 ); - for( int y = T_begin; y <= T_end; ++y ) + type result_y = type(0.0); + for(int y = T_begin; y <= T_end; ++y) { - type result_x = type( 0.0 ); - for( int x = T_begin; x <= T_end; ++x ) - //a form factor is the "amount of particle" that is affected by this cell - //so we have to sum over: cell_value * form_factor - result_x += *cursor(x, y ) * T_AssignmentFunction()( float_X( x ) - pos.x() ); + type result_x = type(0.0); + for(int x = T_begin; x <= T_end; ++x) + // a form factor is the "amount of particle" that is affected by this cell + // so we have to sum over: cell_value * form_factor + result_x += *cursor(x, y) * T_AssignmentFunction()(float_X(x) - pos.x()); - result_y += result_x * T_AssignmentFunction()( float_X( y ) - pos.y() ); + result_y += result_x * T_AssignmentFunction()(float_X(y) - pos.y()); } return result_y; } - static - auto - getStringProperties() - -> pmacc::traits::StringProperty + static auto getStringProperties() -> pmacc::traits::StringProperty { - pmacc::traits::StringProperty propList( - "name", - "uniform" - ); + pmacc::traits::StringProperty propList("name", "uniform"); return propList; } }; diff --git a/include/picongpu/algorithms/DifferenceToLower.def b/include/picongpu/algorithms/DifferenceToLower.def deleted file mode 100644 index 6a24895ea9..0000000000 --- a/include/picongpu/algorithms/DifferenceToLower.def +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include - - -namespace picongpu -{ - - /** calculate difference to lower value - * - * @tparam T_Dim number of dimensions of the accessed memory - * - * Zero is returned if `GetDifference` is called for a direction greater or equal to T_Dim. - */ - template< uint32_t T_Dim > - struct DifferenceToLower; - -} // namespace picongpu diff --git a/include/picongpu/algorithms/DifferenceToLower.hpp b/include/picongpu/algorithms/DifferenceToLower.hpp deleted file mode 100644 index 9eef409382..0000000000 --- a/include/picongpu/algorithms/DifferenceToLower.hpp +++ /dev/null @@ -1,106 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/simulation_defines.hpp" -#include "picongpu/algorithms/DifferenceToLower.def" - -#include - - -namespace picongpu -{ - - template< uint32_t T_dim > - struct DifferenceToLower - { - static constexpr uint32_t dim = T_dim; - - - using OffsetOrigin = typename pmacc::math::CT::make_Int< - dim, - 1 - >::type; - using OffsetEnd = typename pmacc::math::CT::make_Int< - dim, - 0 - >::type; - - /** calculate the difference for a given direction - * - * @tparam T_direction direction for the difference operation - * @tparam T_isLesserThanDim not needed/ this is calculated by the compiler - */ - template< - uint32_t T_direction, - bool T_isLesserThanDim = ( T_direction < dim ) - > - struct GetDifference - { - static constexpr uint32_t direction = T_direction; - - HDINLINE GetDifference( ) - { - } - - /** get difference to lower value - * @return difference divided by cell size of the given direction - */ - template< typename Memory > - HDINLINE typename Memory::ValueType operator()( Memory const & mem ) const - { - // defaults to (0, 0, 0) in 3D - DataSpace< dim > const indexIdentity; - // e.g., (0, -1, 0) for d/dy in 3D - DataSpace< dim > indexLower; - indexLower[ direction ] = -1; - - return ( mem( indexIdentity ) - mem( indexLower ) ) / - cellSize[ direction ]; - } - }; - - /** special case for `direction >= simulation dimensions` - * - * difference = d/dx = 0 - */ - template< uint32_t T_direction > - struct GetDifference< - T_direction, - false - > - { - - HDINLINE GetDifference( ) - { - } - - /** @return always a zeroed value - */ - template< typename Memory > - HDINLINE typename Memory::ValueType operator()( Memory const & mem ) const - { - return Memory::ValueType::create( 0.0 ); - } - }; - - }; - -} // namespace picongpu diff --git a/include/picongpu/algorithms/DifferenceToUpper.def b/include/picongpu/algorithms/DifferenceToUpper.def deleted file mode 100644 index 971a5971b0..0000000000 --- a/include/picongpu/algorithms/DifferenceToUpper.def +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include - - -namespace picongpu -{ - - /** calculate difference to upper value - * - * @tparam T_Dim number of dimensions of the accessed memory - * - * Zero is returned if `GetDifference` is called for a direction greater or equal to T_Dim. - */ - template< uint32_t T_Dim > - struct DifferenceToUpper; - -} // namespace picongpu diff --git a/include/picongpu/algorithms/DifferenceToUpper.hpp b/include/picongpu/algorithms/DifferenceToUpper.hpp deleted file mode 100644 index f2bc7b49d9..0000000000 --- a/include/picongpu/algorithms/DifferenceToUpper.hpp +++ /dev/null @@ -1,104 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/simulation_defines.hpp" -#include "picongpu/algorithms/DifferenceToUpper.def" - -#include - - -namespace picongpu -{ - - template< uint32_t T_dim > - struct DifferenceToUpper - { - static constexpr uint32_t dim = T_dim; - - using OffsetOrigin = typename pmacc::math::CT::make_Int< - dim, - 0 - >::type; - using OffsetEnd = typename pmacc::math::CT::make_Int< - dim, - 1 - >::type; - - /** calculate the difference for a given direction - * - * @tparam T_direction direction for the difference operation - * @tparam T_isLesserThanDim not needed/ this is calculated by the compiler - */ - template< - uint32_t T_direction, - bool T_isLesserThanDim = ( T_direction < dim ) - > - struct GetDifference - { - static constexpr uint32_t direction = T_direction; - - HDINLINE GetDifference( ) - { - } - - /** get difference to lower value - * @return difference divided by cell size of the given direction - */ - template< typename Memory > - HDINLINE typename Memory::ValueType operator()( Memory const & mem ) const - { - // defaults to (0, 0, 0) in 3D - DataSpace< dim > const indexIdentity; - // e.g., (0, 1, 0) for d/dy in 3D - DataSpace< dim > indexUpper; - indexUpper[ direction ] = 1; - - return ( mem( indexUpper ) - mem( indexIdentity ) ) / - cellSize[ direction ]; - } - }; - - /** special case for `direction >= simulation dimensions` - * - * difference = d/dx = 0 - */ - template< uint32_t T_direction > - struct GetDifference< - T_direction, - false - > - { - HDINLINE GetDifference( ) - { - } - - /** @return always a zeroed value - */ - template< typename Memory > - HDINLINE typename Memory::ValueType operator()( Memory const & mem) const - { - return Memory::ValueType::create( 0.0 ); - } - }; - - }; - -} // namespace picongpu diff --git a/include/picongpu/algorithms/FieldToParticleInterpolation.hpp b/include/picongpu/algorithms/FieldToParticleInterpolation.hpp index dd70e3438d..b696b4153b 100644 --- a/include/picongpu/algorithms/FieldToParticleInterpolation.hpp +++ b/include/picongpu/algorithms/FieldToParticleInterpolation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once #include "picongpu/simulation_defines.hpp" @@ -29,101 +28,89 @@ namespace picongpu { - -/** interpolate field which are defined on a grid to a point inside of a grid - * - * interpolate around a point from -AssignmentFunction::support/2 to - * (AssignmentFunction::support+1)/2 - * - * \tparam GridShiftMethod functor which shift coordinate system that al value are - * located on corner - * \tparam AssignmentFunction AssignmentFunction which is used for interpolation - * \tparam InterpolationMethod functor for interpolation method - */ -template -struct FieldToParticleInterpolation -{ - using AssignmentFunction = typename T_Shape::ChargeAssignmentOnSupport; - static constexpr int supp = AssignmentFunction::support; - - static constexpr int lowerMargin = supp / 2 ; - static constexpr int upperMargin = (supp + 1) / 2; - using LowerMargin = typename pmacc::math::CT::make_Int::type; - using UpperMargin = typename pmacc::math::CT::make_Int::type; - - PMACC_CASSERT_MSG( - __FieldToParticleInterpolation_supercell_is_too_small_for_stencil, - pmacc::math::CT::min< - typename pmacc::math::CT::mul< - SuperCellSize, - GuardSize - >::type - >::type::value >= lowerMargin && - pmacc::math::CT::min< - typename pmacc::math::CT::mul< - SuperCellSize, - GuardSize - >::type - >::type::value >= upperMargin - ); - - /*(supp + 1) % 2 is 1 for even supports else 0*/ - static constexpr int begin = -supp / 2 + (supp + 1) % 2; - static constexpr int end = begin+supp-1; - - - template - HDINLINE typename Cursor::ValueType operator()(Cursor field, - const floatD_X& particlePos, - const VecVector& fieldPos) + /** interpolate field which are defined on a grid to a point inside of a grid + * + * interpolate around a point from -AssignmentFunction::support/2 to + * (AssignmentFunction::support+1)/2 + * + * \tparam GridShiftMethod functor which shift coordinate system that al value are + * located on corner + * \tparam AssignmentFunction AssignmentFunction which is used for interpolation + * \tparam InterpolationMethod functor for interpolation method + */ + template + struct FieldToParticleInterpolation { - /**\brief: - * The following calls seperate the vector interpolation into - * independent scalar interpolations. - */ - using Supports = typename pmacc::math::CT::make_Int::type; - - typename Cursor::ValueType result; - for(uint32_t i = 0; i < Cursor::ValueType::dim; i++) + using AssignmentFunction = typename T_Shape::ChargeAssignmentOnSupport; + static constexpr int supp = AssignmentFunction::support; + + static constexpr int lowerMargin = supp / 2; + static constexpr int upperMargin = (supp + 1) / 2; + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = typename pmacc::math::CT::make_Int::type; + + PMACC_CASSERT_MSG( + __FieldToParticleInterpolation_supercell_is_too_small_for_stencil, + pmacc::math::CT::min::type>::type::value + >= lowerMargin + && pmacc::math::CT::min::type>::type::value + >= upperMargin); + + /*(supp + 1) % 2 is 1 for even supports else 0*/ + static constexpr int begin = -supp / 2 + (supp + 1) % 2; + static constexpr int end = begin + supp - 1; + + + template + HDINLINE typename Cursor::ValueType operator()( + Cursor field, + const floatD_X& particlePos, + const VecVector& fieldPos) { - auto fieldComponent = pmacc::cursor::make_FunctorCursor( - field, - pmacc::algorithm::functor::GetComponent(i) - ); - floatD_X particlePosShifted = particlePos; - ShiftCoordinateSystem()(fieldComponent, particlePosShifted, fieldPos[i]); - result[i] = InterpolationMethod::template interpolate (fieldComponent, particlePosShifted); + /**\brief: + * The following calls seperate the vector interpolation into + * independent scalar interpolations. + */ + using Supports = typename pmacc::math::CT::make_Int::type; + + typename Cursor::ValueType result; + for(uint32_t i = 0; i < Cursor::ValueType::dim; i++) + { + auto fieldComponent + = pmacc::cursor::make_FunctorCursor(field, pmacc::algorithm::functor::GetComponent(i)); + floatD_X particlePosShifted = particlePos; + ShiftCoordinateSystem()(fieldComponent, particlePosShifted, fieldPos[i]); + result[i] = InterpolationMethod::template interpolate( + fieldComponent, + particlePosShifted); + } + + return result; } - return result; - } + static pmacc::traits::StringProperty getStringProperties() + { + GetStringProperties propList; + return propList; + } + }; - static pmacc::traits::StringProperty getStringProperties() + namespace traits { - GetStringProperties propList; - return propList; - } - -}; - -namespace traits -{ - -/*Get margin of a solver - * class must define a LowerMargin and UpperMargin - */ -template -struct GetMargin > -{ -private: - using Interpolation = picongpu::FieldToParticleInterpolation; -public: - using LowerMargin = typename Interpolation::LowerMargin; - using UpperMargin = typename Interpolation::UpperMargin; -}; - -} //namespace traits + /*Get margin of a solver + * class must define a LowerMargin and UpperMargin + */ + template + struct GetMargin> + { + private: + using Interpolation = picongpu::FieldToParticleInterpolation; -} //namespace picongpu + public: + using LowerMargin = typename Interpolation::LowerMargin; + using UpperMargin = typename Interpolation::UpperMargin; + }; + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/algorithms/FieldToParticleInterpolationNative.hpp b/include/picongpu/algorithms/FieldToParticleInterpolationNative.hpp index 47bfc3c85b..71fb7d676f 100644 --- a/include/picongpu/algorithms/FieldToParticleInterpolationNative.hpp +++ b/include/picongpu/algorithms/FieldToParticleInterpolationNative.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Klaus Steiniger * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once #include "picongpu/simulation_defines.hpp" @@ -30,76 +29,89 @@ namespace picongpu { - -/** interpolate field which are defined on a grid to a point inside of a grid - * - * interpolate around of a point from -AssignmentFunction::support/2 to - * (AssignmentFunction::support+1)/2 - * - * \tparam GridShiftMethod functor which shift coordinate system that al value are - * located on corner - * \tparam AssignmentFunction AssignmentFunction which is used for interpolation - * \tparam InterpolationMethod functor for interpolation method - */ -template -struct FieldToParticleInterpolationNative -{ - using AssignmentFunction = typename T_Shape::ChargeAssignment; - static constexpr int supp = AssignmentFunction::support; - - static constexpr int lowerMargin = supp / 2; - static constexpr int upperMargin = (supp + 1) / 2; - using LowerMargin = typename pmacc::math::CT::make_Int::type; - using UpperMargin = typename pmacc::math::CT::make_Int::type; - - template - HDINLINE float3_X operator()(Cursor field, const floatD_X& particlePos, - const VecVector_ & fieldPos) + /** interpolate field which are defined on a grid to a point inside of a grid + * + * interpolate around of a point from -AssignmentFunction::support/2 to + * (AssignmentFunction::support+1)/2 + * + * \tparam GridShiftMethod functor which shift coordinate system that al value are + * located on corner + * \tparam AssignmentFunction AssignmentFunction which is used for interpolation + * \tparam InterpolationMethod functor for interpolation method + */ + template + struct FieldToParticleInterpolationNative { - /**\brief: - * The following three calls seperate the vector interpolation into three - * independent scalar interpolations. In each call the coordinate system - * is turned so that E_scalar does the interpolation for the z-component. + using AssignmentFunction = typename T_Shape::ChargeAssignment; + static constexpr int supp = AssignmentFunction::support; + + static constexpr int lowerMargin = supp / 2; + static constexpr int upperMargin = (supp + 1) / 2; + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = typename pmacc::math::CT::make_Int::type; + + template + HDINLINE float3_X operator()(Cursor field, const floatD_X& particlePos, const VecVector_& fieldPos) + { + /**\brief: + * The following three calls seperate the vector interpolation into three + * independent scalar interpolations. In each call the coordinate system + * is turned so that E_scalar does the interpolation for the z-component. + */ + + auto field_x + = pmacc::cursor::make_FunctorCursor(field, pmacc::algorithm::functor::GetComponent(0)); + floatD_X pos_tmp(particlePos); + ShiftCoordinateSystemNative()(field_x, pos_tmp, fieldPos.x()); + float_X result_x + = InterpolationMethod::template interpolate( + field_x, + pos_tmp); + + auto field_y + = pmacc::cursor::make_FunctorCursor(field, pmacc::algorithm::functor::GetComponent(1)); + pos_tmp = particlePos; + ShiftCoordinateSystemNative()(field_y, pos_tmp, fieldPos.y()); + float_X result_y + = InterpolationMethod::template interpolate( + field_y, + pos_tmp); + + auto field_z + = pmacc::cursor::make_FunctorCursor(field, pmacc::algorithm::functor::GetComponent(2)); + pos_tmp = particlePos; + ShiftCoordinateSystemNative()(field_z, pos_tmp, fieldPos.z()); + float_X result_z + = InterpolationMethod::template interpolate( + field_z, + pos_tmp); + + return float3_X(result_x, result_y, result_z); + } + + static pmacc::traits::StringProperty getStringProperties() + { + GetStringProperties propList; + return propList; + } + }; + + namespace traits + { + /*Get margin of a solver + * class must define a LowerMargin and UpperMargin */ + template + struct GetMargin> + { + private: + using Interpolation = picongpu::FieldToParticleInterpolationNative; - auto field_x = pmacc::cursor::make_FunctorCursor(field, pmacc::algorithm::functor::GetComponent(0)); - floatD_X pos_tmp(particlePos); - ShiftCoordinateSystemNative()(field_x, pos_tmp, fieldPos.x()); - float_X result_x = InterpolationMethod::template interpolate (field_x, pos_tmp); - - auto field_y = pmacc::cursor::make_FunctorCursor(field, pmacc::algorithm::functor::GetComponent(1)); - pos_tmp = particlePos; - ShiftCoordinateSystemNative()(field_y, pos_tmp, fieldPos.y()); - float_X result_y = InterpolationMethod::template interpolate (field_y, pos_tmp); - - auto field_z = pmacc::cursor::make_FunctorCursor(field, pmacc::algorithm::functor::GetComponent(2)); - pos_tmp = particlePos; - ShiftCoordinateSystemNative()(field_z, pos_tmp, fieldPos.z()); - float_X result_z = InterpolationMethod::template interpolate (field_z, pos_tmp); - - return float3_X(result_x, result_y, result_z); - } - -}; - -namespace traits -{ - -/*Get margin of a solver - * class must define a LowerMargin and UpperMargin - */ -template -struct GetMargin > -{ -private: - using Interpolation = picongpu::FieldToParticleInterpolationNative< AssignMethod, InterpolationMethod>; -public: - using LowerMargin = typename Interpolation::LowerMargin; - using UpperMargin = typename Interpolation::UpperMargin; -}; - -} //namespace traits - -} //namespace picongpu + public: + using LowerMargin = typename Interpolation::LowerMargin; + using UpperMargin = typename Interpolation::UpperMargin; + }; + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/algorithms/Gamma.def b/include/picongpu/algorithms/Gamma.def index 6ea120c120..ce4ee276e0 100644 --- a/include/picongpu/algorithms/Gamma.def +++ b/include/picongpu/algorithms/Gamma.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -24,12 +24,11 @@ namespace picongpu { - /** calculate the gamma of a particle * * @tparam T_PrecisionType precision in which the calculation is performed */ - template< typename T_PrecisionType = float_X > + template struct Gamma { using valueType = T_PrecisionType; @@ -43,17 +42,8 @@ namespace picongpu * @param mass particle mass * @return particle gamma */ - template< - typename T_MomType, - typename T_MassType - > - HDINLINE - valueType - operator()( - T_MomType const & mom, - T_MassType const mass - ) const; - + template + HDINLINE valueType operator()(T_MomType const& mom, T_MassType const mass) const; }; /** calculate the gamma of a particle @@ -66,19 +56,10 @@ namespace picongpu * @param mass particle mass * @return particle gamma */ - template< - typename T_PrecisionType, - typename T_MomType, - typename T_MassType - > - HDINLINE - T_PrecisionType - gamma( T_MomType const & mom, T_MassType const mass ) + template + HDINLINE T_PrecisionType gamma(T_MomType const& mom, T_MassType const mass) { - return Gamma< T_PrecisionType >{}( - mom, - mass - ); - }; + return Gamma{}(mom, mass); + } } // namespace picongpu diff --git a/include/picongpu/algorithms/Gamma.hpp b/include/picongpu/algorithms/Gamma.hpp index f5b0cf2a3f..0665952af0 100644 --- a/include/picongpu/algorithms/Gamma.hpp +++ b/include/picongpu/algorithms/Gamma.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -25,30 +25,18 @@ namespace picongpu { - - template< typename T_PrecisionType > - template< - typename T_MomType, - typename T_MassType - > - HDINLINE - T_PrecisionType - Gamma< T_PrecisionType >::operator()( - T_MomType const & mom, - T_MassType const mass - ) const + template + template + HDINLINE T_PrecisionType Gamma::operator()(T_MomType const& mom, T_MassType const mass) const { using namespace pmacc; - valueType const fMom2 = math::abs2( precisionCast< valueType >( mom ) ); + valueType const fMom2 = pmacc::math::abs2(precisionCast(mom)); constexpr valueType c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; - valueType const m2_c2_reci = valueType( 1.0 ) / - precisionCast( mass * mass * c2 ); + valueType const m2_c2_reci = valueType(1.0) / precisionCast(mass * mass * c2); - return math::sqrt( - precisionCast( valueType( 1.0 ) + fMom2 * m2_c2_reci ) - ); + return math::sqrt(precisionCast(valueType(1.0) + fMom2 * m2_c2_reci)); } } // namespace picongpu diff --git a/include/picongpu/algorithms/KinEnergy.hpp b/include/picongpu/algorithms/KinEnergy.hpp index 5f2c17b9e0..929f12a151 100644 --- a/include/picongpu/algorithms/KinEnergy.hpp +++ b/include/picongpu/algorithms/KinEnergy.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2017-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -25,48 +25,47 @@ namespace picongpu { + using namespace pmacc; -using namespace pmacc; + /** Computes the kinetic energy of a particle given its momentum and mass. + * + * The mass may be zero. + * + * For massive particle with low energy the non-relativistic + * kinetic energy expression is used in order to avoid bad roundings. + * + */ + template + struct KinEnergy + { + using ValueType = T_PrecisionType; -/** Computes the kinetic energy of a particle given its momentum and mass. - * - * The mass may be zero. - * - * For massive particle with low energy the non-relativistic - * kinetic energy expression is used in order to avoid bad roundings. - * - */ -template< typename T_PrecisionType = float_X > -struct KinEnergy -{ - using ValueType = T_PrecisionType; + template + HDINLINE ValueType operator()(MomType const& mom, MassType const& mass) + { + if(mass == MassType(0.0)) + return SPEED_OF_LIGHT * math::abs(precisionCast(mom)); - template< typename MomType, typename MassType > - HDINLINE ValueType operator()( MomType const & mom, MassType const & mass ) - { - if( mass == MassType( 0.0 ) ) - return SPEED_OF_LIGHT * math::abs( precisionCast< ValueType >( mom ) ); + /* if mass is non-zero then gamma is well defined */ + const ValueType gamma = Gamma()(mom, mass); - /* if mass is non-zero then gamma is well defined */ - const ValueType gamma = Gamma< ValueType >()( mom, mass ); + ValueType kinEnergy; - ValueType kinEnergy; + if(gamma < GAMMA_THRESH) + { + const ValueType mom2 = pmacc::math::abs2(precisionCast(mom)); + /* non relativistic kinetic energy expression */ + kinEnergy = mom2 / (ValueType(2.0) * mass); + } + else + { + constexpr ValueType c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; + /* kinetic energy for particles: E = (gamma - 1) * m * c^2 */ + kinEnergy = (gamma - ValueType(1.0)) * mass * c2; + } - if( gamma < GAMMA_THRESH ) - { - const ValueType mom2 = math::abs2( precisionCast< ValueType >( mom ) ); - /* non relativistic kinetic energy expression */ - kinEnergy = mom2 / ( ValueType( 2.0 ) * mass ); + return kinEnergy; } - else - { - constexpr ValueType c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; - /* kinetic energy for particles: E = (gamma - 1) * m * c^2 */ - kinEnergy = ( gamma - ValueType( 1.0 ) ) * mass * c2; - } - - return kinEnergy; - } -}; + }; -} +} // namespace picongpu diff --git a/include/picongpu/algorithms/LinearInterpolateWithUpper.hpp b/include/picongpu/algorithms/LinearInterpolateWithUpper.hpp index ac2d716e72..e341fb7f13 100644 --- a/include/picongpu/algorithms/LinearInterpolateWithUpper.hpp +++ b/include/picongpu/algorithms/LinearInterpolateWithUpper.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau, Rene Widera +/* Copyright 2015-2021 Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once #include @@ -26,60 +25,57 @@ namespace picongpu { - -/** Calculate linear interpolation to upper cell value - * - * @tparam T_Dim for how many dimensions does this operator interpolate - * - * If `GetDifference` is called for a direction greater or equal T_Dim, - * a zeroed value is returned (assumes symmetry in those directions). - */ -template -struct LinearInterpolateWithUpper -{ - static constexpr uint32_t dim = T_Dim; - - using OffsetOrigin = typename pmacc::math::CT::make_Int::type; - using OffsetEnd = typename pmacc::math::CT::make_Int::type; - - /** calculate the linear interpolation for a given direction + /** Calculate linear interpolation to upper cell value + * + * @tparam T_Dim for how many dimensions does this operator interpolate * - * @tparam T_direction direction for the interpolation operation - * @tparam T_isLesserThanDim not needed/ this is calculated by the compiler + * If `GetDifference` is called for a direction greater or equal T_Dim, + * a zeroed value is returned (assumes symmetry in those directions). */ - template - struct GetInterpolatedValue + template + struct LinearInterpolateWithUpper { - static constexpr uint32_t direction = T_direction; + static constexpr uint32_t dim = T_Dim; - /** get interpolated value - * @return interpolated value + using OffsetOrigin = typename pmacc::math::CT::make_Int::type; + using OffsetEnd = typename pmacc::math::CT::make_Int::type; + + /** calculate the linear interpolation for a given direction + * + * @tparam T_direction direction for the interpolation operation + * @tparam T_isLesserThanDim not needed/ this is calculated by the compiler */ - template - HDINLINE typename Memory::ValueType operator()(const Memory& mem) const + template + struct GetInterpolatedValue { - const DataSpace indexIdentity; /* defaults to (0, 0, 0) in 3D */ - DataSpace indexUpper; /* e.g., (0, 1, 0) for direction y in 3D */ - indexUpper[direction] = 1; + static constexpr uint32_t direction = T_direction; - return ( mem(indexUpper) + mem(indexIdentity)) * Memory::ValueType::create(0.5); - } - }; + /** get interpolated value + * @return interpolated value + */ + template + HDINLINE typename Memory::ValueType operator()(const Memory& mem) const + { + const DataSpace indexIdentity; /* defaults to (0, 0, 0) in 3D */ + DataSpace indexUpper; /* e.g., (0, 1, 0) for direction y in 3D */ + indexUpper[direction] = 1; - /** special case for `direction >= simulation dimensions`*/ - template - struct GetInterpolatedValue - { + return (mem(indexUpper) + mem(indexIdentity)) * Memory::ValueType::create(0.5); + } + }; - /** @return always identity - */ - template - HDINLINE typename Memory::ValueType operator()(const Memory& mem) const + /** special case for `direction >= simulation dimensions`*/ + template + struct GetInterpolatedValue { - return *mem; - } + /** @return always identity + */ + template + HDINLINE typename Memory::ValueType operator()(const Memory& mem) const + { + return *mem; + } + }; }; -}; - -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/algorithms/Set.hpp b/include/picongpu/algorithms/Set.hpp index 6816f89213..a243cb4849 100644 --- a/include/picongpu/algorithms/Set.hpp +++ b/include/picongpu/algorithms/Set.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -23,31 +23,22 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -template -struct Set -{ - - HDINLINE Set(Type_ defaultValue) : value(defaultValue) + template + struct Set { - - } - - template< - typename Dst, - typename T_Acc - > - HDINLINE void operator()( - T_Acc const &, - Dst & dst - ) const - { - dst = value; - } - -private: - PMACC_ALIGN(value, const Type_); -}; -} - + HDINLINE Set(Type_ defaultValue) : value(defaultValue) + { + } + + template + HDINLINE void operator()(T_Acc const&, Dst& dst) const + { + dst = value; + } + + private: + PMACC_ALIGN(value, const Type_); + }; +} // namespace picongpu diff --git a/include/picongpu/algorithms/ShiftCoordinateSystem.hpp b/include/picongpu/algorithms/ShiftCoordinateSystem.hpp index 1ef0fc0db0..7db476e292 100644 --- a/include/picongpu/algorithms/ShiftCoordinateSystem.hpp +++ b/include/picongpu/algorithms/ShiftCoordinateSystem.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once #include @@ -31,116 +30,109 @@ namespace picongpu { + /** calculate offset to move coordinate system in an easy to use system + * + * There are two cases: + * - system with even shape and odd shape + * - for more see documentation of the implementation + */ + template + struct GetOffsetToStaticShapeSystem; -/** calculate offset to move coordinate system in an easy to use system - * - * There are two cases: - * - system with even shape and odd shape - * - for more see documentation of the implementation - */ -template -struct GetOffsetToStaticShapeSystem; - -template -struct AssignToDim -{ - - template - HDINLINE void - operator()(T_Type& cursor, T_Vector& pos, const T_FieldType& fieldPos) + template + struct AssignToDim { - const uint32_t dim = T_Vector::dim; - using ValueType = typename T_Vector::type; + template + HDINLINE void operator()(T_Type& cursor, T_Vector& pos, const T_FieldType& fieldPos) + { + const uint32_t dim = T_Vector::dim; + using ValueType = typename T_Vector::type; - using Supports = T_Supports; - using Component = T_Component; + using Supports = T_Supports; + using Component = T_Component; - const uint32_t component = Component::x::value; - const uint32_t support = Supports::template at::type::value; - const bool isEven = (support % 2) == 0; + const uint32_t component = Component::x::value; + const uint32_t support = Supports::template at::type::value; + const bool isEven = (support % 2) == 0; - const ValueType v_pos = pos[component] - fieldPos[component]; - DataSpace< dim > intShift; - intShift[component] = GetOffsetToStaticShapeSystem ()(v_pos); - cursor = cursor(intShift); - pos[component] = v_pos - ValueType(intShift[component]); - } -}; - -/** shift to new coordinate system - * - * @tparam T_supports CT::Vector with support - */ -template -struct ShiftCoordinateSystem -{ + const ValueType v_pos = pos[component] - fieldPos[component]; + DataSpace intShift; + intShift[component] = GetOffsetToStaticShapeSystem()(v_pos); + cursor = cursor(intShift); + pos[component] = v_pos - ValueType(intShift[component]); + } + }; /** shift to new coordinate system * - * shift cursor and vector to new coordinate system - * @param[in,out] cursor cursor to memory - * @param[in,out] vector short vector with coordinates in old system - * - defined for [0.0;1.0) per dimension - * @param fieldPos vector with relative coordinates for shift ( value range [0.0;0.5] ) - * - * After this coordinate shift vector has well defined ranges per dimension, - * for each defined fieldPos: - * - * - Even Support: vector is always [0.0;1.0) - * - Odd Support: vector is always [-0.5;0.5) + * @tparam T_supports CT::Vector with support */ - template - HDINLINE void operator()(T_Cursor& cursor, T_Vector& vector, const T_FieldType & fieldPos) + template + struct ShiftCoordinateSystem { - /** \todo check if a static assert on - * "T_Cursor::dim" == T_Vector::dim == T_FieldType::dim is possible - * and does not waste registers */ - const uint32_t dim = T_Vector::dim; - - using Size = boost::mpl::vector1 < boost::mpl::range_c >; - using CombiTypes = typename AllCombinations::type; - - meta::ForEach > shift; - shift(cursor, vector, fieldPos); - - } -}; - - -/** Offset calculation for even support - * - * @param pos position of the particle relative to the grid - * - defined for [-0.5;1.0) - * @return offset for the old system ( new system = old_system - offset) - */ -template<> -struct GetOffsetToStaticShapeSystem -{ - - template - HDINLINE int operator()(const T_Type& pos) + /** shift to new coordinate system + * + * shift cursor and vector to new coordinate system + * @param[in,out] cursor cursor to memory + * @param[in,out] vector short vector with coordinates in old system + * - defined for [0.0;1.0) per dimension + * @param fieldPos vector with relative coordinates for shift ( value range [0.0;0.5] ) + * + * After this coordinate shift vector has well defined ranges per dimension, + * for each defined fieldPos: + * + * - Even Support: vector is always [0.0;1.0) + * - Odd Support: vector is always [-0.5;0.5) + */ + template + HDINLINE void operator()(T_Cursor& cursor, T_Vector& vector, const T_FieldType& fieldPos) + { + /** \todo check if a static assert on + * "T_Cursor::dim" == T_Vector::dim == T_FieldType::dim is possible + * and does not waste registers */ + const uint32_t dim = T_Vector::dim; + + using Size = boost::mpl::vector1>; + using CombiTypes = typename AllCombinations::type; + + meta::ForEach> shift; + shift(cursor, vector, fieldPos); + } + }; + + + /** Offset calculation for even support + * + * @param pos position of the particle relative to the grid + * - defined for [-0.5;1.0) + * @return offset for the old system ( new system = old_system - offset) + */ + template<> + struct GetOffsetToStaticShapeSystem { - return math::float2int_rd(pos); - } -}; + template + HDINLINE int operator()(const T_Type& pos) + { + return pmacc::math::float2int_rd(pos); + } + }; -/** Offset calculation for odd support - * - * @param pos position of the particle relative to the grid - * - defined for [-0.5;1.0) - * @return offset for the old system ( new system = old_system - offset) - */ -template<> -struct GetOffsetToStaticShapeSystem -{ - - template - HDINLINE int operator()(const T_Type& pos) + /** Offset calculation for odd support + * + * @param pos position of the particle relative to the grid + * - defined for [-0.5;1.0) + * @return offset for the old system ( new system = old_system - offset) + */ + template<> + struct GetOffsetToStaticShapeSystem { - return pos >= T_Type(0.5) ? 1 : 0; - } -}; + template + HDINLINE int operator()(const T_Type& pos) + { + return pos >= T_Type(0.5) ? 1 : 0; + } + }; } // namespace picongpu diff --git a/include/picongpu/algorithms/ShiftCoordinateSystemNative.hpp b/include/picongpu/algorithms/ShiftCoordinateSystemNative.hpp index d33c961dd8..46da69bdd9 100644 --- a/include/picongpu/algorithms/ShiftCoordinateSystemNative.hpp +++ b/include/picongpu/algorithms/ShiftCoordinateSystemNative.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once #include @@ -26,24 +25,22 @@ namespace picongpu { - -template -struct ShiftCoordinateSystemNative -{ - - /**shift to new coordinat system - * - * shift cursor and vector to new coordinate system - * @param curser curser to memory - * @param vector short vector with coordinates in old system - * @param fieldPos vector with relative coordinates for shift ( value range [0.0;0.5] ) - */ - template - HDINLINE void operator()(Cursor& cursor, Vector& vector, const floatD_X & fieldPos) + template + struct ShiftCoordinateSystemNative { - for (uint32_t i = 0; i < simDim; ++i) - vector[i] -= fieldPos[i]; - } -}; + /**shift to new coordinat system + * + * shift cursor and vector to new coordinate system + * @param curser curser to memory + * @param vector short vector with coordinates in old system + * @param fieldPos vector with relative coordinates for shift ( value range [0.0;0.5] ) + */ + template + HDINLINE void operator()(Cursor& cursor, Vector& vector, const floatD_X& fieldPos) + { + for(uint32_t i = 0; i < simDim; ++i) + vector[i] -= fieldPos[i]; + } + }; } // namespace picongpu diff --git a/include/picongpu/algorithms/Velocity.hpp b/include/picongpu/algorithms/Velocity.hpp index 2b6c97d651..90fc9f0b67 100644 --- a/include/picongpu/algorithms/Velocity.hpp +++ b/include/picongpu/algorithms/Velocity.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -21,21 +21,18 @@ namespace picongpu { - using namespace pmacc; struct Velocity { - - template - HDINLINE MomType operator()(const MomType mom, const MassType mass0) + template + HDINLINE MomType operator()(const MomType mom, const MassType mass0) { const float_X rc2 = MUE0_EPS0; - const float_X m0_2 = mass0*mass0; - const float_X fMom2 = math::abs2(mom); + const float_X m0_2 = mass0 * mass0; + const float_X fMom2 = pmacc::math::abs2(mom); float_X t = math::rsqrt(precisionCast(m0_2 + fMom2 * rc2)); return t * mom; } }; -} - +} // namespace picongpu diff --git a/include/picongpu/debug/PIConGPUVerbose.hpp b/include/picongpu/debug/PIConGPUVerbose.hpp index b2d0026055..d7a6b774be 100644 --- a/include/picongpu/debug/PIConGPUVerbose.hpp +++ b/include/picongpu/debug/PIConGPUVerbose.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -24,26 +24,21 @@ namespace picongpu { - #ifndef PIC_VERBOSE_LVL -#define PIC_VERBOSE_LVL 0 +# define PIC_VERBOSE_LVL 0 #endif -/*create verbose class*/ -DEFINE_VERBOSE_CLASS(PIConGPUVerbose) -( - /* define log lvl for later use - * e.g. log("TEXT");*/ - DEFINE_LOGLVL(0,NOTHING); - DEFINE_LOGLVL(1,PHYSICS); - DEFINE_LOGLVL(2,DOMAINS); - DEFINE_LOGLVL(4,CRITICAL); - DEFINE_LOGLVL(8,MEMORY); - DEFINE_LOGLVL(16,SIMULATION_STATE); - DEFINE_LOGLVL(32,INPUT_OUTPUT); -) -/*set default verbose lvl (integer number)*/ -(NOTHING::lvl|PIC_VERBOSE_LVL); + /*create verbose class*/ + DEFINE_VERBOSE_CLASS(PIConGPUVerbose) + ( + /* define log lvl for later use + * e.g. log("TEXT");*/ + DEFINE_LOGLVL(0, NOTHING); DEFINE_LOGLVL(1, PHYSICS); DEFINE_LOGLVL(2, DOMAINS); DEFINE_LOGLVL(4, CRITICAL); + DEFINE_LOGLVL(8, MEMORY); + DEFINE_LOGLVL(16, SIMULATION_STATE); + DEFINE_LOGLVL(32, INPUT_OUTPUT);) + /*set default verbose lvl (integer number)*/ + (NOTHING::lvl | PIC_VERBOSE_LVL); } /* namespace picongpu */ diff --git a/include/picongpu/extensionParam.loader b/include/picongpu/extensionParam.loader index be8434d2cb..a3daaabf88 100644 --- a/include/picongpu/extensionParam.loader +++ b/include/picongpu/extensionParam.loader @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * diff --git a/include/picongpu/extensionUnitless.loader b/include/picongpu/extensionUnitless.loader index be8434d2cb..a3daaabf88 100644 --- a/include/picongpu/extensionUnitless.loader +++ b/include/picongpu/extensionUnitless.loader @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * diff --git a/include/picongpu/fields/CellType.hpp b/include/picongpu/fields/CellType.hpp index 5515976b15..4e1e3fffa3 100644 --- a/include/picongpu/fields/CellType.hpp +++ b/include/picongpu/fields/CellType.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -21,15 +21,15 @@ #include "picongpu/simulation_defines.hpp" #include "picongpu/fields/MaxwellSolver/Solvers.hpp" +#include "picongpu/traits/GetCellType.hpp" namespace picongpu { -namespace fields -{ - - //! Alias for a cell type used by the field solver - using CellType = Solver::CellType; + namespace fields + { + //! Alias for a cell type used by the field solver + using CellType = traits::GetCellType::type; -} // namespace fields + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/EMFieldBase.hpp b/include/picongpu/fields/EMFieldBase.hpp index 89b5dfc538..b750673910 100644 --- a/include/picongpu/fields/EMFieldBase.hpp +++ b/include/picongpu/fields/EMFieldBase.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Benjamin Worpitz, Sergei Bastrakov * * This file is part of PIConGPU. @@ -41,96 +41,91 @@ namespace picongpu { -namespace fields -{ - - /** Base class for implementation inheritance in classes for the - * electromagnetic fields - * - * Stores field values on host and device and provides data synchronization - * between them. - * - * Implements interfaces defined by SimulationFieldHelper< MappingDesc > and - * ISimulationData. - */ - class EMFieldBase : - public SimulationFieldHelper< MappingDesc >, - public ISimulationData + namespace fields { - public: - - //! Type of each field value - using ValueType = float3_X; - - //! Number of components of ValueType, for serialization - static constexpr int numComponents = ValueType::dim; - - //! Type of host-device buffer for field values - using Buffer = pmacc::GridBuffer< ValueType, simDim >; - - //! Type of data box for field values on host and device - using DataBoxType = pmacc::DataBox< PitchedBox< ValueType, simDim > >; - - //! Size of supercell - using SuperCellSize = MappingDesc::SuperCellSize; - - /** Create a field - * - * @tparam T_tag communication tag value - * - * @param cellDescription mapping for kernels - * @param id unique id - * @param tag helper parameter for T_tag deduction - */ - template< CommunicationTag T_tag > - HINLINE EMFieldBase( - MappingDesc const & cellDescription, - pmacc::SimulationDataId const & id, - std::integral_constant< CommunicationTag, T_tag > tag - ); - - //! Get a reference to the host-device buffer for the field values - HINLINE Buffer & getGridBuffer( ); - - //! Get the grid layout - HINLINE GridLayout< simDim > getGridLayout( ); - - //! Get the host data box for the field values - HINLINE DataBoxType getHostDataBox( ); - - //! Get the device data box for the field values - HINLINE DataBoxType getDeviceDataBox( ); - - /** Start asynchronous communication of field values + /** Base class for implementation inheritance in classes for the + * electromagnetic fields * - * @param serialEvent event to depend on - */ - HINLINE EventTask asyncCommunication( EventTask serialEvent ); - - /** Reset the host-device buffer for field values + * Stores field values on host and device and provides data synchronization + * between them. * - * @param currentStep index of time iteration + * Implements interfaces defined by SimulationFieldHelper< MappingDesc > and + * ISimulationData. */ - HINLINE void reset( uint32_t currentStep ) override; - - //! Synchronize device data with host data - HINLINE void syncToDevice( ) override; - - //! Synchronize host data with device data - HINLINE void synchronize( ) override; - - //! Get id - HINLINE SimulationDataId getUniqueId( ) override; - - private: - - //! Host-device buffer for field values - std::unique_ptr< Buffer > buffer; - - //! Unique id - pmacc::SimulationDataId id; - - }; - -} // namespace fields + class EMFieldBase + : public SimulationFieldHelper + , public ISimulationData + { + public: + //! Type of each field value + using ValueType = float3_X; + + //! Number of components of ValueType, for serialization + static constexpr int numComponents = ValueType::dim; + + //! Type of host-device buffer for field values + using Buffer = pmacc::GridBuffer; + + //! Type of data box for field values on host and device + using DataBoxType = pmacc::DataBox>; + + //! Size of supercell + using SuperCellSize = MappingDesc::SuperCellSize; + + /** Create a field + * + * @tparam T_tag communication tag value + * + * @param cellDescription mapping for kernels + * @param id unique id + * @param tag helper parameter for T_tag deduction + */ + template + HINLINE EMFieldBase( + MappingDesc const& cellDescription, + pmacc::SimulationDataId const& id, + std::integral_constant tag); + + //! Get a reference to the host-device buffer for the field values + HINLINE Buffer& getGridBuffer(); + + //! Get the grid layout + HINLINE GridLayout getGridLayout(); + + //! Get the host data box for the field values + HINLINE DataBoxType getHostDataBox(); + + //! Get the device data box for the field values + HINLINE DataBoxType getDeviceDataBox(); + + /** Start asynchronous communication of field values + * + * @param serialEvent event to depend on + */ + HINLINE EventTask asyncCommunication(EventTask serialEvent); + + /** Reset the host-device buffer for field values + * + * @param currentStep index of time iteration + */ + HINLINE void reset(uint32_t currentStep) override; + + //! Synchronize device data with host data + HINLINE void syncToDevice() override; + + //! Synchronize host data with device data + HINLINE void synchronize() override; + + //! Get id + HINLINE SimulationDataId getUniqueId() override; + + private: + //! Host-device buffer for field values + std::unique_ptr buffer; + + //! Unique id + pmacc::SimulationDataId id; + }; + + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/EMFieldBase.tpp b/include/picongpu/fields/EMFieldBase.tpp index 1371951691..6160caeed1 100644 --- a/include/picongpu/fields/EMFieldBase.tpp +++ b/include/picongpu/fields/EMFieldBase.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch, Benjamin Worpitz, Sergei Bastrakov * * This file is part of PIConGPU. @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -46,135 +45,117 @@ namespace picongpu { -namespace fields -{ - - template< CommunicationTag T_tag > - EMFieldBase::EMFieldBase( - MappingDesc const & cellDescription, - pmacc::SimulationDataId const & id, - std::integral_constant< CommunicationTag, T_tag > - ) : - SimulationFieldHelper< MappingDesc >( cellDescription ), - id( id ) + namespace fields { - buffer = pmacc::memory::makeUnique< Buffer >( - cellDescription.getGridLayout( ) - ); - - using VectorSpeciesWithInterpolation = typename pmacc::particles::traits::FilterByFlag - < - VectorAllSpecies, - interpolation<> - >::type; - using LowerMarginInterpolation = bmpl::accumulate< - VectorSpeciesWithInterpolation, - typename pmacc::math::CT::make_Int::type, - pmacc::math::CT::max > > - >::type; - using UpperMarginInterpolation = bmpl::accumulate< - VectorSpeciesWithInterpolation, - typename pmacc::math::CT::make_Int::type, - pmacc::math::CT::max > > - >::type; - - /* Calculate the maximum Neighbors we need from MAX(ParticleShape, FieldSolver) */ - using LowerMarginSolver = typename GetMargin::LowerMargin; - using LowerMarginInterpolationAndSolver = typename pmacc::math::CT::max< - LowerMarginInterpolation, - LowerMarginSolver - >::type; - using UpperMarginSolver = typename GetMargin::UpperMargin; - using UpperMarginInterpolationAndSolver = typename pmacc::math::CT::max< - UpperMarginInterpolation, - UpperMarginSolver - >::type; - - /* Calculate upper and lower margin for pusher - (currently all pusher use the interpolation of the species) - and find maximum margin - */ - using VectorSpeciesWithPusherAndInterpolation = typename pmacc::particles::traits::FilterByFlag - < - VectorSpeciesWithInterpolation, - particlePusher<> - >::type; - using LowerMargin = typename bmpl::accumulate< - VectorSpeciesWithPusherAndInterpolation, - LowerMarginInterpolationAndSolver, - pmacc::math::CT::max > - >::type; - - using UpperMargin = typename bmpl::accumulate< - VectorSpeciesWithPusherAndInterpolation, - UpperMarginInterpolationAndSolver, - pmacc::math::CT::max > - >::type; - - const DataSpace< simDim > originGuard( LowerMargin( ).toRT( ) ); - const DataSpace< simDim > endGuard( UpperMargin( ).toRT( ) ); - - /*go over all directions*/ - for ( uint32_t i = 1; i < NumberOfExchanges::value; ++i ) + template + EMFieldBase::EMFieldBase( + MappingDesc const& cellDescription, + pmacc::SimulationDataId const& id, + std::integral_constant) + : SimulationFieldHelper(cellDescription) + , id(id) { - DataSpace relativeMask = Mask::getRelativeDirections ( i ); - /* guarding cells depend on direction - * for negative direction use originGuard else endGuard (relative direction ZERO is ignored) - * don't switch end and origin because this is a read buffer and no send buffer - */ - DataSpace guardingCells; - for ( uint32_t d = 0; d < simDim; ++d ) - guardingCells[d] = ( relativeMask[d] == -1 ? originGuard[d] : endGuard[d] ); - buffer->addExchange( GUARD, i, guardingCells, T_tag ); + buffer = std::make_unique(cellDescription.getGridLayout()); + + using VectorSpeciesWithInterpolation = + typename pmacc::particles::traits::FilterByFlag>::type; + using LowerMarginInterpolation = bmpl::accumulate< + VectorSpeciesWithInterpolation, + typename pmacc::math::CT::make_Int::type, + pmacc::math::CT::max>>>::type; + using UpperMarginInterpolation = bmpl::accumulate< + VectorSpeciesWithInterpolation, + typename pmacc::math::CT::make_Int::type, + pmacc::math::CT::max>>>::type; + + /* Calculate the maximum Neighbors we need from MAX(ParticleShape, FieldSolver) */ + using LowerMarginSolver = typename GetMargin::LowerMargin; + using LowerMarginInterpolationAndSolver = + typename pmacc::math::CT::max::type; + using UpperMarginSolver = typename GetMargin::UpperMargin; + using UpperMarginInterpolationAndSolver = + typename pmacc::math::CT::max::type; + + /* Calculate upper and lower margin for pusher + (currently all pusher use the interpolation of the species) + and find maximum margin + */ + using VectorSpeciesWithPusherAndInterpolation = typename pmacc::particles::traits:: + FilterByFlag>::type; + using LowerMargin = typename bmpl::accumulate< + VectorSpeciesWithPusherAndInterpolation, + LowerMarginInterpolationAndSolver, + pmacc::math::CT::max>>::type; + + using UpperMargin = typename bmpl::accumulate< + VectorSpeciesWithPusherAndInterpolation, + UpperMarginInterpolationAndSolver, + pmacc::math::CT::max>>::type; + + const DataSpace originGuard(LowerMargin().toRT()); + const DataSpace endGuard(UpperMargin().toRT()); + + /*go over all directions*/ + for(uint32_t i = 1; i < NumberOfExchanges::value; ++i) + { + DataSpace relativeMask = Mask::getRelativeDirections(i); + /* guarding cells depend on direction + * for negative direction use originGuard else endGuard (relative direction ZERO is ignored) + * don't switch end and origin because this is a read buffer and no send buffer + */ + DataSpace guardingCells; + for(uint32_t d = 0; d < simDim; ++d) + guardingCells[d] = (relativeMask[d] == -1 ? originGuard[d] : endGuard[d]); + buffer->addExchange(GUARD, i, guardingCells, T_tag); + } } - } - EMFieldBase::Buffer & EMFieldBase::getGridBuffer( ) - { - return *buffer; - } + EMFieldBase::Buffer& EMFieldBase::getGridBuffer() + { + return *buffer; + } - GridLayout< simDim > EMFieldBase::getGridLayout( ) - { - return cellDescription.getGridLayout( ); - } + GridLayout EMFieldBase::getGridLayout() + { + return cellDescription.getGridLayout(); + } - EMFieldBase::DataBoxType EMFieldBase::getHostDataBox( ) - { - return buffer->getHostBuffer( ).getDataBox( ); - } + EMFieldBase::DataBoxType EMFieldBase::getHostDataBox() + { + return buffer->getHostBuffer().getDataBox(); + } - EMFieldBase::DataBoxType EMFieldBase::getDeviceDataBox( ) - { - return buffer->getDeviceBuffer( ).getDataBox( ); - } + EMFieldBase::DataBoxType EMFieldBase::getDeviceDataBox() + { + return buffer->getDeviceBuffer().getDataBox(); + } - EventTask EMFieldBase::asyncCommunication( EventTask serialEvent ) - { - EventTask eB = buffer->asyncCommunication( serialEvent ); - return eB; - } + EventTask EMFieldBase::asyncCommunication(EventTask serialEvent) + { + EventTask eB = buffer->asyncCommunication(serialEvent); + return eB; + } - void EMFieldBase::reset( uint32_t ) - { - buffer->getHostBuffer( ).reset( true ); - buffer->getDeviceBuffer( ).reset( false ); - } + void EMFieldBase::reset(uint32_t) + { + buffer->getHostBuffer().reset(true); + buffer->getDeviceBuffer().reset(false); + } - void EMFieldBase::syncToDevice( ) - { - buffer->hostToDevice( ); - } + void EMFieldBase::syncToDevice() + { + buffer->hostToDevice(); + } - void EMFieldBase::synchronize( ) - { - buffer->deviceToHost( ); - } + void EMFieldBase::synchronize() + { + buffer->deviceToHost(); + } - pmacc::SimulationDataId EMFieldBase::getUniqueId( ) - { - return id; - } + pmacc::SimulationDataId EMFieldBase::getUniqueId() + { + return id; + } -} // namespace fields + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/FieldB.hpp b/include/picongpu/fields/FieldB.hpp index b9662b44f3..f484123bca 100644 --- a/include/picongpu/fields/FieldB.hpp +++ b/include/picongpu/fields/FieldB.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Benjamin Worpitz, Sergei Bastrakov * * This file is part of PIConGPU. @@ -32,7 +32,6 @@ namespace picongpu { - /** Representation of the magnetic field * * Stores field values on host and device and provides data synchronization @@ -44,18 +43,17 @@ namespace picongpu class FieldB : public fields::EMFieldBase { public: - /** Create a field * * @param cellDescription mapping for kernels */ - HINLINE FieldB( MappingDesc const & cellDescription ); + HINLINE FieldB(MappingDesc const& cellDescription); //! Unit type of field components - using UnitValueType = promoteType< float_64, ValueType >::type; + using UnitValueType = promoteType::type; //! Get units of field components - HDINLINE static UnitValueType getUnit( ); + HDINLINE static UnitValueType getUnit(); /** Get unit representation as powers of the 7 base measures * @@ -64,11 +62,10 @@ namespace picongpu * thermodynamic temperature theta, amount of substance N, * luminous intensity J) */ - HINLINE static std::vector< float_64 > getUnitDimension( ); + HINLINE static std::vector getUnitDimension(); //! Get text name - HINLINE static std::string getName( ); - + HINLINE static std::string getName(); }; } // namespace picongpu diff --git a/include/picongpu/fields/FieldB.tpp b/include/picongpu/fields/FieldB.tpp index c02bda48e9..faa0226289 100644 --- a/include/picongpu/fields/FieldB.tpp +++ b/include/picongpu/fields/FieldB.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch, Benjamin Worpitz, Sergei Bastrakov * * This file is part of PIConGPU. @@ -32,36 +32,31 @@ namespace picongpu { - - FieldB::FieldB( MappingDesc const & cellDescription ) : - fields::EMFieldBase( - cellDescription, - getName( ), - std::integral_constant< CommunicationTag, FIELD_B >{ } - ) + FieldB::FieldB(MappingDesc const& cellDescription) + : fields::EMFieldBase(cellDescription, getName(), std::integral_constant{}) { } - HDINLINE FieldB::UnitValueType FieldB::getUnit( ) + HDINLINE FieldB::UnitValueType FieldB::getUnit() { - return UnitValueType{ UNIT_BFIELD, UNIT_BFIELD, UNIT_BFIELD }; + return UnitValueType{UNIT_BFIELD, UNIT_BFIELD, UNIT_BFIELD}; } - std::vector< float_64 > FieldB::getUnitDimension( ) + std::vector FieldB::getUnitDimension() { /* B is in Tesla : kg / (A * s^2) * -> M * T^-2 * I^-1 */ - std::vector< float_64 > unitDimension( 7, 0.0 ); - unitDimension.at( SIBaseUnits::mass ) = 1.0; - unitDimension.at( SIBaseUnits::time ) = -2.0; - unitDimension.at( SIBaseUnits::electricCurrent ) = -1.0; + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::mass) = 1.0; + unitDimension.at(SIBaseUnits::time) = -2.0; + unitDimension.at(SIBaseUnits::electricCurrent) = -1.0; return unitDimension; } - std::string FieldB::getName( ) + std::string FieldB::getName() { return "B"; } -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/fields/FieldE.hpp b/include/picongpu/fields/FieldE.hpp index 56a32035f5..f162d910d1 100644 --- a/include/picongpu/fields/FieldE.hpp +++ b/include/picongpu/fields/FieldE.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Benjamin Worpitz, Sergei Bastrakov * * This file is part of PIConGPU. @@ -32,7 +32,6 @@ namespace picongpu { - /** Representation of the electric field * * Stores field values on host and device and provides data synchronization @@ -44,18 +43,17 @@ namespace picongpu class FieldE : public fields::EMFieldBase { public: - /** Create a field * * @param cellDescription mapping for kernels */ - HINLINE FieldE( MappingDesc const & cellDescription ); + HINLINE FieldE(MappingDesc const& cellDescription); //! Unit type of field components - using UnitValueType = promoteType< float_64, ValueType >::type; + using UnitValueType = promoteType::type; //! Get units of field components - HDINLINE static UnitValueType getUnit( ); + HDINLINE static UnitValueType getUnit(); /** Get unit representation as powers of the 7 base measures * @@ -64,11 +62,10 @@ namespace picongpu * thermodynamic temperature theta, amount of substance N, * luminous intensity J) */ - HINLINE static std::vector< float_64 > getUnitDimension( ); + HINLINE static std::vector getUnitDimension(); //! Get text name - HINLINE static std::string getName( ); - + HINLINE static std::string getName(); }; } // namespace picongpu diff --git a/include/picongpu/fields/FieldE.tpp b/include/picongpu/fields/FieldE.tpp index c9cab400e4..074da2796f 100644 --- a/include/picongpu/fields/FieldE.tpp +++ b/include/picongpu/fields/FieldE.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch, Benjamin Worpitz, Sergei Bastrakov * * This file is part of PIConGPU. @@ -32,35 +32,30 @@ namespace picongpu { - - FieldE::FieldE( MappingDesc const & cellDescription ) : - fields::EMFieldBase( - cellDescription, - getName( ), - std::integral_constant< CommunicationTag, FIELD_E >{ } - ) + FieldE::FieldE(MappingDesc const& cellDescription) + : fields::EMFieldBase(cellDescription, getName(), std::integral_constant{}) { } - HDINLINE FieldE::UnitValueType FieldE::getUnit( ) + HDINLINE FieldE::UnitValueType FieldE::getUnit() { - return UnitValueType{ UNIT_EFIELD, UNIT_EFIELD, UNIT_EFIELD }; + return UnitValueType{UNIT_EFIELD, UNIT_EFIELD, UNIT_EFIELD}; } - std::vector< float_64 > FieldE::getUnitDimension( ) + std::vector FieldE::getUnitDimension() { /* E is in volts per meters: V / m = kg * m / (A * s^3) * -> L * M * T^-3 * I^-1 */ - std::vector< float_64 > unitDimension( 7, 0.0 ); - unitDimension.at( SIBaseUnits::length ) = 1.0; - unitDimension.at( SIBaseUnits::mass ) = 1.0; - unitDimension.at( SIBaseUnits::time ) = -3.0; - unitDimension.at( SIBaseUnits::electricCurrent ) = -1.0; + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::length) = 1.0; + unitDimension.at(SIBaseUnits::mass) = 1.0; + unitDimension.at(SIBaseUnits::time) = -3.0; + unitDimension.at(SIBaseUnits::electricCurrent) = -1.0; return unitDimension; } - std::string FieldE::getName( ) + std::string FieldE::getName() { return "E"; } diff --git a/include/picongpu/fields/FieldJ.hpp b/include/picongpu/fields/FieldJ.hpp index acb50d2610..0865845907 100644 --- a/include/picongpu/fields/FieldJ.hpp +++ b/include/picongpu/fields/FieldJ.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Benjamin Worpitz * * This file is part of PIConGPU. @@ -42,7 +42,6 @@ namespace picongpu { - /** Representation of the current density field * * Stores field values on host and device and provides data synchronization @@ -51,10 +50,11 @@ namespace picongpu * Implements interfaces defined by SimulationFieldHelper< MappingDesc > and * ISimulationData. */ - class FieldJ : public SimulationFieldHelper, public ISimulationData + class FieldJ + : public SimulationFieldHelper + , public ISimulationData { public: - //! Type of each field value using ValueType = float3_X; @@ -65,19 +65,19 @@ namespace picongpu using UnitValueType = promoteType::type; //! Type of data box for field values on host and device - using DataBoxType = DataBox >; + using DataBoxType = DataBox>; /** Create a field * * @param cellDescription mapping for kernels */ - HINLINE FieldJ(MappingDesc const & cellDescription); + HINLINE FieldJ(MappingDesc const& cellDescription); //! Destroy a field HINLINE virtual ~FieldJ() = default; //! Get a reference to the host-device buffer for the field values - HINLINE GridBuffer &getGridBuffer(); + HINLINE GridBuffer& getGridBuffer(); //! Get the grid layout HINLINE GridLayout getGridLayout(); @@ -149,17 +149,17 @@ namespace picongpu * @param currentStep index of time iteration */ template - HINLINE void computeCurrent(T_Species & species, uint32_t currentStep); + HINLINE void computeCurrent(T_Species& species, uint32_t currentStep); /** Smooth current density and add it to the electric field * * @tparam T_area area to operate on - * @tparam T_CurrentInterpolation current interpolation type + * @tparam T_CurrentInterpolationFunctor current interpolation functor type * - * @param myCurrentInterpolation current interpolation + * @param myCurrentInterpolationFunctor current interpolation functor */ - template - HINLINE void addCurrentToEMF( T_CurrentInterpolation& myCurrentInterpolation ); + template + HINLINE void addCurrentToEMF(T_CurrentInterpolationFunctor myCurrentInterpolationFunctor); /** Bash field in a direction. * @@ -176,13 +176,11 @@ namespace picongpu HINLINE void insertField(uint32_t exchangeType); private: - //! Host-device buffer for current density values GridBuffer buffer; //! Buffer for receiving near-boundary values - std::unique_ptr< GridBuffer > fieldJrecv; - + std::unique_ptr> fieldJrecv; }; } // namespace picongpu diff --git a/include/picongpu/fields/FieldJ.kernel b/include/picongpu/fields/FieldJ.kernel index 5418068275..7590e441ab 100644 --- a/include/picongpu/fields/FieldJ.kernel +++ b/include/picongpu/fields/FieldJ.kernel @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten, * Benjamin Worpitz * * This file is part of PIConGPU. @@ -20,419 +20,275 @@ #pragma once -#include -#include - #include "picongpu/simulation_defines.hpp" - -#include "FieldJ.hpp" -#include - - +#include "picongpu/fields/currentDeposition/Strategy.def" +#include "picongpu/fields/currentDeposition/Cache.hpp" +#include "picongpu/fields/FieldJ.hpp" #include "picongpu/algorithms/Velocity.hpp" #include #include -#include #include -#include "picongpu/algorithms/Set.hpp" #include #include #include #include +#include +#include + +#include +#include namespace picongpu { + namespace currentSolver + { + /** compute current + * + * @tparam T_numWorkers number of workers + * @tparam T_BlockDescription current field domain description needed for the + * collective stencil + */ + template + struct KernelComputeCurrent + { + /** scatter particle current of particles located in a supercell + * + * The current for the supercell including the guards is cached in shared memory + * and scattered at the end of the functor to the global memory. + * + * @tparam JBox pmacc::DataBox, particle current box type + * @tparam ParBox pmacc::ParticlesBox, particle box type + * @tparam Mapping mapper functor type + * @tparam FrameSolver frame solver functor type + * @param T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param fieldJ field with particle current + * @param boxPar particle memory + * @param frameSolver functor to calculate the current for a frame + * @param mapper functor to map a block to a supercell + */ + template + DINLINE void operator()( + T_Acc const& acc, + JBox fieldJ, + ParBox boxPar, + FrameSolver frameSolver, + Mapping mapper) const + { + using namespace mappings::threads; -using namespace pmacc; + using FrameType = typename ParBox::FrameType; + using FramePtr = typename ParBox::FramePtr; + using SuperCellSize = typename Mapping::SuperCellSize; -using J_DataBox = FieldJ::DataBoxType; + /** @todo numParticlesPerFrame should be max number of particles within a frame + * and not a magic number derived from SuperCellSize + */ + constexpr uint32_t numParticlesPerFrame = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; -/** compute current - * - * @tparam T_numWorkers number of workers - * @tparam T_BlockDescription current field domain description needed for the - * collective stencil - */ -template< - uint32_t T_numWorkers, - typename T_BlockDescription -> -struct KernelComputeCurrent -{ - /** scatter particle current of particles located in a supercell - * - * The current for the supercell including the guards is cached in shared memory - * and scattered at the end of the functor to the global memory. - * - * @tparam JBox pmacc::DataBox, particle current box type - * @tparam ParBox pmacc::ParticlesBox, particle box type - * @tparam Mapping mapper functor type - * @tparam FrameSolver frame solver functor type - * @param T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param fieldJ field with particle current - * @param boxPar particle memory - * @param frameSolver functor to calculate the current for a frame - * @param mapper functor to map a block to a supercell - */ - template< - typename JBox, - typename ParBox, - typename FrameSolver, - typename Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - JBox fieldJ, - ParBox boxPar, - FrameSolver frameSolver, - Mapping mapper - ) const - { - using namespace mappings::threads; + /* We work with virtual CUDA blocks if we have more workers than particles. + * Each virtual CUDA block is working on a frame, if we have 2 blocks each block processes + * every second frame until all frames are processed. + */ + constexpr uint32_t numVirtualBlocks = (numWorkers + numParticlesPerFrame - 1u) / numParticlesPerFrame; - using FrameType = typename ParBox::FrameType; - using FramePtr = typename ParBox::FramePtr; - using SuperCellSize = typename Mapping::SuperCellSize; - /** @todo numParticlesPerFrame should be max number of particles within a frame - * and not a magic number derived from SuperCellSize - */ - constexpr uint32_t numParticlesPerFrame = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; + const DataSpace block(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + uint32_t const workerIdx = cupla::threadIdx(acc).x; - /* We work with virtual CUDA blocks if we have more workers than particles. - * Each virtual CUDA block is working on a frame, if we have 2 blocks each block processes - * every second frame until all frames are processed. - */ - constexpr uint32_t numVirtualBlocks = ( numWorkers + numParticlesPerFrame - 1u ) / numParticlesPerFrame; - - - const DataSpace< simDim > block( - mapper.getSuperCellIndex( - DataSpace< simDim >( blockIdx ) - ) - ); - uint32_t const workerIdx = threadIdx.x; - - using VirtualWorkerDomCfg = IdxConfig< - numParticlesPerFrame * numVirtualBlocks, - numWorkers - >; - - /* each virtual worker is part of one virtual block */ - memory::CtxArray< - uint32_t, - VirtualWorkerDomCfg - > - virtualBlockIdCtx( - workerIdx, - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - return linearIdx / numParticlesPerFrame; - } - ); - - /* linear virtual worker index in the virtual block*/ - memory::CtxArray< - uint32_t, - VirtualWorkerDomCfg - > - virtualLinearIdCtx( - workerIdx, - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - /* map virtualLinearIdCtx to the range [0;numParticlesPerFrame) */ - return linearIdx - ( virtualBlockIdCtx[ idx ] * numParticlesPerFrame ); - } - ); - - /* each virtual worker stores the currently used frame */ - memory::CtxArray< - FramePtr, - VirtualWorkerDomCfg - > frameCtx; - - memory::CtxArray< - lcellId_t, - VirtualWorkerDomCfg - > particlesInSuperCellCtx( 0u ); - - /* loop over all virtual workers */ - ForEachIdx< VirtualWorkerDomCfg > forEachVirtualWorker( workerIdx ); - - forEachVirtualWorker( - [&]( - uint32_t const, - uint32_t const idx - ) - { - frameCtx[ idx ] = boxPar.getLastFrame( block ); - if( frameCtx[ idx ].isValid() && virtualBlockIdCtx[ idx ] == 0u ) - particlesInSuperCellCtx[ idx ] = boxPar.getSuperCell( block ).getSizeLastFrame(); + using VirtualWorkerDomCfg = IdxConfig; - /* select N-th (N=virtualBlockId) frame from the end of the list */ - for( uint32_t i = 1; i <= virtualBlockIdCtx[ idx ] && frameCtx[ idx ].isValid(); ++i ) - { - particlesInSuperCellCtx[ idx ] = numParticlesPerFrame; - frameCtx[ idx ] = boxPar.getPreviousFrame( frameCtx[ idx ] ); - } - } - ); + /* each virtual worker is part of one virtual block */ + memory::CtxArray virtualBlockIdCtx( + workerIdx, + [&](uint32_t const linearIdx, uint32_t const) { return linearIdx / numParticlesPerFrame; }); - /* this memory is used by all virtual blocks */ - auto cachedJ = CachedBox::create< - 0u, - typename JBox::ValueType - >( - acc, - T_BlockDescription() - ); + /* linear virtual worker index in the virtual block*/ + memory::CtxArray virtualLinearIdCtx( + workerIdx, + [&](uint32_t const linearIdx, uint32_t const idx) { + /* map virtualLinearIdCtx to the range [0;numParticlesPerFrame) */ + return linearIdx - (virtualBlockIdCtx[idx] * numParticlesPerFrame); + }); - Set< typename JBox::ValueType > set( float3_X::create( 0.0 ) ); - ThreadCollective< - T_BlockDescription, - numWorkers - > collectiveSet( workerIdx ); + /* each virtual worker stores the currently used frame */ + memory::CtxArray frameCtx; - /* initialize shared memory with zeros */ - collectiveSet( acc, set, cachedJ ); + memory::CtxArray particlesInSuperCellCtx(0u); - __syncthreads(); + /* loop over all virtual workers */ + ForEachIdx forEachVirtualWorker(workerIdx); - while( true ) - { - bool isOneFrameValid = false; - forEachVirtualWorker( - [&]( - uint32_t const, - uint32_t const idx - ) - { - isOneFrameValid = isOneFrameValid || frameCtx[ idx ].isValid(); - } - ); - - if( !isOneFrameValid ) - break; + forEachVirtualWorker([&](uint32_t const, uint32_t const idx) { + frameCtx[idx] = boxPar.getLastFrame(block); + if(frameCtx[idx].isValid() && virtualBlockIdCtx[idx] == 0u) + particlesInSuperCellCtx[idx] = boxPar.getSuperCell(block).getSizeLastFrame(); - forEachVirtualWorker( - [&]( - uint32_t const, - uint32_t const idx - ) - { - /* this test is only important for the last frame - * if the frame is not the last one then: `particlesInSuperCell == numParticlesPerFrame` - */ - if( - frameCtx[ idx ].isValid() && - virtualLinearIdCtx[ idx ] < particlesInSuperCellCtx[ idx ] - ) + /* select N-th (N=virtualBlockId) frame from the end of the list */ + for(uint32_t i = 1; i <= virtualBlockIdCtx[idx] && frameCtx[idx].isValid(); ++i) { - frameSolver( - acc, - *frameCtx[ idx ], - virtualLinearIdCtx[ idx ], - cachedJ - ); + particlesInSuperCellCtx[idx] = numParticlesPerFrame; + frameCtx[idx] = boxPar.getPreviousFrame(frameCtx[idx]); } - } - ); + }); + + DataSpace const blockCell = block * SuperCellSize::toRT(); + using Strategy = currentSolver::traits::GetStrategy_t; - forEachVirtualWorker( - [&]( - uint32_t const, - uint32_t const idx - ) + /* this memory is used by all virtual blocks */ + auto cachedJ = detail::Cache::template create( + acc, + fieldJ.shift(blockCell), + workerIdx); + + cupla::__syncthreads(acc); + + while(true) { - if( frameCtx[ idx ].isValid() ) - { - particlesInSuperCellCtx[ idx ] = numParticlesPerFrame; - for( int i = 0; i < numVirtualBlocks && frameCtx[ idx ].isValid(); ++i ) + bool isOneFrameValid = false; + forEachVirtualWorker([&](uint32_t const, uint32_t const idx) { + isOneFrameValid = isOneFrameValid || frameCtx[idx].isValid(); + }); + + if(!isOneFrameValid) + break; + + forEachVirtualWorker([&](uint32_t const, uint32_t const idx) { + /* this test is only important for the last frame + * if the frame is not the last one then: `particlesInSuperCell == numParticlesPerFrame` + */ + if(frameCtx[idx].isValid() && virtualLinearIdCtx[idx] < particlesInSuperCellCtx[idx]) { - frameCtx[ idx ] = boxPar.getPreviousFrame( frameCtx[ idx ] ); + frameSolver(acc, *frameCtx[idx], virtualLinearIdCtx[idx], cachedJ); } - } + }); + + forEachVirtualWorker([&](uint32_t const, uint32_t const idx) { + if(frameCtx[idx].isValid()) + { + particlesInSuperCellCtx[idx] = numParticlesPerFrame; + for(int i = 0; i < numVirtualBlocks && frameCtx[idx].isValid(); ++i) + { + frameCtx[idx] = boxPar.getPreviousFrame(frameCtx[idx]); + } + } + }); } - ); - } - - /* we wait that all workers finish the loop */ - __syncthreads(); - - nvidia::functors::Add add; - DataSpace< simDim > const blockCell = block * SuperCellSize::toRT(); - ThreadCollective< - T_BlockDescription, - numWorkers - > collectiveAdd( workerIdx ); - auto fieldJBlock = fieldJ.shift( blockCell ); - - /* write scatter results back to the global memory */ - collectiveAdd( - acc, - add, - fieldJBlock, - cachedJ - ); - } -}; - -template -struct ComputeCurrentPerFrame -{ - HDINLINE ComputeCurrentPerFrame(const float_X deltaTime) : - m_deltaTime(deltaTime) - { - } - - template< - typename FrameType, - typename BoxJ, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - FrameType& frame, - const int localIdx, - BoxJ & jBox - ) - { + /* we wait that all workers finish the loop */ + cupla::__syncthreads(acc); - auto particle = frame[localIdx]; - const float_X weighting = particle[weighting_]; - const floatD_X pos = particle[position_]; - const int particleCellIdx = particle[localCellIdx_]; - const float_X charge = attribute::getCharge(weighting,particle); - const DataSpace localCell(DataSpaceOperations::template map (particleCellIdx)); - - Velocity velocity; - const float3_X vel = velocity( - particle[momentum_], - attribute::getMass(weighting,particle)); - auto fieldJShiftToParticle = jBox.shift(localCell); - ParticleAlgo perParticle; - perParticle( - acc, - fieldJShiftToParticle, - pos, - vel, - charge, - m_deltaTime - ); - } - -private: - PMACC_ALIGN(m_deltaTime, const float_32); -}; - -/** add current to electric and magnetic field - * - * @tparam T_numWorkers number of workers - */ -template< - uint32_t T_numWorkers -> -struct KernelAddCurrentToEMF -{ - template< - typename T_CurrentInterpolation, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - typename FieldE::DataBoxType fieldE, - typename FieldB::DataBoxType fieldB, - J_DataBox fieldJ, - T_CurrentInterpolation currentInterpolation, - T_Mapping mapper - ) const - { - using namespace mappings::threads; - - /* Caching of fieldJ */ - typedef SuperCellDescription< - SuperCellSize, - typename T_CurrentInterpolation::LowerMargin, - typename T_CurrentInterpolation::UpperMargin - > BlockArea; - - constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; - - uint32_t const workerIdx = threadIdx.x; - - auto cachedJ = CachedBox::create< - 0, - typename J_DataBox::ValueType - >( - acc, - BlockArea( ) - ); - - nvidia::functors::Assign assign; - DataSpace< simDim > const block( - mapper.getSuperCellIndex( DataSpace< simDim >( blockIdx ) ) - ); - DataSpace< simDim > const blockCell = block * MappingDesc::SuperCellSize::toRT(); - - - auto fieldJBlock = fieldJ.shift(blockCell); - - ThreadCollective< - BlockArea, - numWorkers - > collective( workerIdx ); - - collective( - acc, - assign, - cachedJ, - fieldJBlock - ); - - __syncthreads( ); - - ForEachIdx< - IdxConfig< - cellsPerSuperCell, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) + /* this memory is used by all virtual blocks */ + detail::Cache::template flush( + acc, + fieldJ.shift(blockCell), + cachedJ, + workerIdx); + } + }; + + template + struct ComputePerFrame + { + using ParticleAlgo = T_ParticleAlgo; + + HDINLINE ComputePerFrame(const float_X deltaTime) : m_deltaTime(deltaTime) + { + } + + template + DINLINE void operator()(T_Acc const& acc, FrameType& frame, const int localIdx, BoxJ& jBox) + { + auto particle = frame[localIdx]; + const float_X weighting = particle[weighting_]; + const floatD_X pos = particle[position_]; + const int particleCellIdx = particle[localCellIdx_]; + const float_X charge = attribute::getCharge(weighting, particle); + const DataSpace localCell(DataSpaceOperations::template map(particleCellIdx)); + + Velocity velocity; + const float3_X vel = velocity(particle[momentum_], attribute::getMass(weighting, particle)); + auto fieldJShiftToParticle = jBox.shift(localCell); + ParticleAlgo perParticle; + perParticle(acc, fieldJShiftToParticle, pos, vel, charge, m_deltaTime); + } + + private: + PMACC_ALIGN(m_deltaTime, const float_32); + }; + + namespace traits + { + template + struct GetStrategy> + { + using type = GetStrategy_t; + }; + } // namespace traits + + /** add current to electric and magnetic field + * + * @tparam T_numWorkers number of workers + */ + template + struct KernelAddCurrentToEMF + { + template + DINLINE void operator()( + T_Acc const& acc, + typename FieldE::DataBoxType fieldE, + typename FieldB::DataBoxType fieldB, + typename FieldJ::DataBoxType fieldJ, + T_CurrentInterpolationFunctor currentInterpolationFunctor, + T_Mapping mapper) const { - /* cell index within the superCell */ - DataSpace< simDim > const cellIdx = - DataSpaceOperations< simDim >::template map< SuperCellSize >( linearIdx ); - DataSpace< simDim > const cell( blockCell + cellIdx ); - - // Amperes Law: - // Change of the dE = - j / EPS0 * dt - // j = current density (= current per area) - // = fieldJ - currentInterpolation( - fieldE.shift( cell ), - fieldB.shift( cell ), - cachedJ.shift( cellIdx ) - ); + using namespace mappings::threads; + + /* Caching of fieldJ */ + typedef SuperCellDescription< + SuperCellSize, + typename T_CurrentInterpolationFunctor::LowerMargin, + typename T_CurrentInterpolationFunctor::UpperMargin> + BlockArea; + + constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + auto cachedJ = CachedBox::create<0, typename FieldJ::DataBoxType::ValueType>(acc, BlockArea()); + + nvidia::functors::Assign assign; + DataSpace const block(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + DataSpace const blockCell = block * MappingDesc::SuperCellSize::toRT(); + + + auto fieldJBlock = fieldJ.shift(blockCell); + + ThreadCollective collective(workerIdx); + + collective(acc, assign, cachedJ, fieldJBlock); + + cupla::__syncthreads(acc); + + ForEachIdx>{workerIdx}( + [&](uint32_t const linearIdx, uint32_t const) { + /* cell index within the superCell */ + DataSpace const cellIdx + = DataSpaceOperations::template map(linearIdx); + DataSpace const cell(blockCell + cellIdx); + + // Amperes Law: + // Change of the dE = - j / EPS0 * dt + // j = current density (= current per area) + // = fieldJ + currentInterpolationFunctor(fieldE.shift(cell), fieldB.shift(cell), cachedJ.shift(cellIdx)); + }); } - ); - } -}; + }; + } // namespace currentSolver } // namespace picongpu diff --git a/include/picongpu/fields/FieldJ.tpp b/include/picongpu/fields/FieldJ.tpp index 1ea6c35c60..9eea174353 100644 --- a/include/picongpu/fields/FieldJ.tpp +++ b/include/picongpu/fields/FieldJ.tpp @@ -1,5 +1,5 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, - * Richard Pausch, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, + * Richard Pausch, Benjamin Worpitz, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -23,22 +23,20 @@ #include "picongpu/simulation_defines.hpp" #include "picongpu/fields/FieldJ.hpp" #include "picongpu/fields/FieldJ.kernel" - +#include "picongpu/fields/currentInterpolation/CurrentInterpolation.hpp" +#include "picongpu/fields/currentDeposition/Deposit.hpp" +#include "picongpu/particles/traits/GetCurrentSolver.hpp" +#include "picongpu/traits/GetMargin.hpp" +#include "picongpu/traits/SIBaseUnits.hpp" #include - #include #include -#include #include #include -#include #include #include -#include "picongpu/particles/traits/GetCurrentSolver.hpp" -#include "picongpu/traits/GetMargin.hpp" #include -#include "picongpu/traits/SIBaseUnits.hpp" #include #include @@ -49,295 +47,234 @@ namespace picongpu { + using namespace pmacc; -using namespace pmacc; - -FieldJ::FieldJ( MappingDesc const & cellDescription ) : - SimulationFieldHelper( cellDescription ), - buffer( cellDescription.getGridLayout( ) ), - fieldJrecv( nullptr ) -{ - const DataSpace coreBorderSize = cellDescription.getGridLayout( ).getDataSpaceWithoutGuarding( ); - - /* cell margins the current might spread to due to particle shapes */ - using AllSpeciesWithCurrent = typename pmacc::particles::traits::FilterByFlag< - VectorAllSpecies, - current<> - >::type; - - using LowerMarginShapes = bmpl::accumulate< - AllSpeciesWithCurrent, - typename pmacc::math::CT::make_Int::type, - pmacc::math::CT::max > > - >::type; - - using UpperMarginShapes = bmpl::accumulate< - AllSpeciesWithCurrent, - typename pmacc::math::CT::make_Int::type, - pmacc::math::CT::max > > - >::type; - - /* margins are always positive, also for lower margins - * additional current interpolations and current filters on FieldJ might - * spread the dependencies on neighboring cells - * -> use max(shape,filter) */ - using LowerMargin = pmacc::math::CT::max< - LowerMarginShapes, - GetMargin::LowerMargin - >::type; - - using UpperMargin = pmacc::math::CT::max< - UpperMarginShapes, - GetMargin::UpperMargin - >::type; - - const DataSpace originGuard( LowerMargin( ).toRT( ) ); - const DataSpace endGuard( UpperMargin( ).toRT( ) ); - - /*go over all directions*/ - for ( uint32_t i = 1; i < NumberOfExchanges::value; ++i ) + FieldJ::FieldJ(MappingDesc const& cellDescription) + : SimulationFieldHelper(cellDescription) + , buffer(cellDescription.getGridLayout()) + , fieldJrecv(nullptr) { - DataSpace relativMask = Mask::getRelativeDirections ( i ); - /*guarding cells depend on direction - */ - DataSpace guardingCells; - for ( uint32_t d = 0; d < simDim; ++d ) + const DataSpace coreBorderSize = cellDescription.getGridLayout().getDataSpaceWithoutGuarding(); + + /* cell margins the current might spread to due to particle shapes */ + using AllSpeciesWithCurrent = + typename pmacc::particles::traits::FilterByFlag>::type; + + using LowerMarginShapes = bmpl::accumulate< + AllSpeciesWithCurrent, + typename pmacc::math::CT::make_Int::type, + pmacc::math::CT::max>>>::type; + + using UpperMarginShapes = bmpl::accumulate< + AllSpeciesWithCurrent, + typename pmacc::math::CT::make_Int::type, + pmacc::math::CT::max>>>::type; + + /* margins are always positive, also for lower margins + * additional current interpolations and current filters on FieldJ might + * spread the dependencies on neighboring cells + * -> use max(shape,filter) */ + auto const& interpolation = currentInterpolation::CurrentInterpolationInfo::get(); + auto const interpolationLowerMargin = interpolation.getLowerMargin(); + auto const interpolationUpperMargin = interpolation.getUpperMargin(); + auto const originGuard = pmacc::math::max(LowerMarginShapes::toRT(), interpolationLowerMargin); + auto const endGuard = pmacc::math::max(UpperMarginShapes::toRT(), interpolationUpperMargin); + + /*go over all directions*/ + for(uint32_t i = 1; i < NumberOfExchanges::value; ++i) { - /*originGuard and endGuard are switch because we send data - * e.g. from left I get endGuardingCells and from right I originGuardingCells + DataSpace relativMask = Mask::getRelativeDirections(i); + /*guarding cells depend on direction */ - switch ( relativMask[d] ) + DataSpace guardingCells; + for(uint32_t d = 0; d < simDim; ++d) { - // receive from negativ side to positiv (end) guarding cells - case -1: guardingCells[d] = endGuard[d]; - break; - // receive from positiv side to negativ (origin) guarding cells - case 1: guardingCells[d] = originGuard[d]; - break; - case 0: guardingCells[d] = coreBorderSize[d]; - break; - }; + /*originGuard and endGuard are switch because we send data + * e.g. from left I get endGuardingCells and from right I originGuardingCells + */ + switch(relativMask[d]) + { + // receive from negativ side to positiv (end) guarding cells + case -1: + guardingCells[d] = endGuard[d]; + break; + // receive from positiv side to negativ (origin) guarding cells + case 1: + guardingCells[d] = originGuard[d]; + break; + case 0: + guardingCells[d] = coreBorderSize[d]; + break; + }; + } + buffer.addExchangeBuffer(i, guardingCells, FIELD_J); + } + + /* Receive border values in own guard for "receive" communication pattern - necessary for current + * interpolation/filter */ + const DataSpace originRecvGuard = interpolationLowerMargin; + const DataSpace endRecvGuard = interpolationUpperMargin; + if(originRecvGuard != DataSpace::create(0) || endRecvGuard != DataSpace::create(0)) + { + fieldJrecv = std::make_unique>( + buffer.getDeviceBuffer(), + cellDescription.getGridLayout()); + /*go over all directions*/ + for(uint32_t i = 1; i < NumberOfExchanges::value; ++i) + { + DataSpace relativMask = Mask::getRelativeDirections(i); + /* guarding cells depend on direction + * for negative direction use originGuard else endGuard (relative direction ZERO is ignored) + * don't switch end and origin because this is a read buffer and no send buffer + */ + DataSpace guardingCells; + for(uint32_t d = 0; d < simDim; ++d) + guardingCells[d] = (relativMask[d] == -1 ? originRecvGuard[d] : endRecvGuard[d]); + fieldJrecv->addExchange(GUARD, i, guardingCells, FIELD_JRECV); + } } - // std::cout << "ex " << i << " x=" << guardingCells[0] << " y=" << guardingCells[1] << " z=" << guardingCells[2] << std::endl; - buffer.addExchangeBuffer( i, guardingCells, FIELD_J ); } - /* Receive border values in own guard for "receive" communication pattern - necessary for current interpolation/filter */ - const DataSpace originRecvGuard( GetMargin::LowerMargin( ).toRT( ) ); - const DataSpace endRecvGuard( GetMargin::UpperMargin( ).toRT( ) ); - if( originRecvGuard != DataSpace::create(0) || - endRecvGuard != DataSpace::create(0) ) + GridBuffer& FieldJ::getGridBuffer() { - fieldJrecv = pmacc::memory::makeUnique< GridBuffer >( - buffer.getDeviceBuffer(), - cellDescription.getGridLayout( ) - ); + return buffer; + } - /*go over all directions*/ - for ( uint32_t i = 1; i < NumberOfExchanges::value; ++i ) + GridLayout FieldJ::getGridLayout() + { + return cellDescription.getGridLayout(); + } + + EventTask FieldJ::asyncCommunication(EventTask serialEvent) + { + EventTask ret; + __startTransaction(serialEvent); + FieldFactory::getInstance().createTaskFieldReceiveAndInsert(*this); + ret = __endTransaction(); + + __startTransaction(serialEvent); + FieldFactory::getInstance().createTaskFieldSend(*this); + ret += __endTransaction(); + + if(fieldJrecv != nullptr) { - DataSpace relativMask = Mask::getRelativeDirections ( i ); - /* guarding cells depend on direction - * for negative direction use originGuard else endGuard (relative direction ZERO is ignored) - * don't switch end and origin because this is a read buffer and no send buffer - */ - DataSpace guardingCells; - for ( uint32_t d = 0; d < simDim; ++d ) - guardingCells[d] = ( relativMask[d] == -1 ? originRecvGuard[d] : endRecvGuard[d] ); - fieldJrecv->addExchange( GUARD, i, guardingCells, FIELD_JRECV ); + EventTask eJ = fieldJrecv->asyncCommunication(ret); + return eJ; } + else + return ret; } -} -GridBuffer &FieldJ::getGridBuffer( ) -{ - return buffer; -} + void FieldJ::reset(uint32_t) + { + } -GridLayout FieldJ::getGridLayout( ) -{ - return cellDescription.getGridLayout( ); -} + void FieldJ::synchronize() + { + buffer.deviceToHost(); + } -EventTask FieldJ::asyncCommunication( EventTask serialEvent ) -{ - EventTask ret; - __startTransaction( serialEvent ); - FieldFactory::getInstance( ).createTaskFieldReceiveAndInsert( *this ); - ret = __endTransaction( ); + SimulationDataId FieldJ::getUniqueId() + { + return getName(); + } - __startTransaction( serialEvent ); - FieldFactory::getInstance( ).createTaskFieldSend( *this ); - ret += __endTransaction( ); + HDINLINE + FieldJ::UnitValueType FieldJ::getUnit() + { + const float_64 UNIT_CURRENT = UNIT_CHARGE / UNIT_TIME / (UNIT_LENGTH * UNIT_LENGTH); + return UnitValueType(UNIT_CURRENT, UNIT_CURRENT, UNIT_CURRENT); + } - if( fieldJrecv != nullptr ) + HINLINE + std::vector FieldJ::getUnitDimension() { - EventTask eJ = fieldJrecv->asyncCommunication( ret ); - return eJ; + /* L, M, T, I, theta, N, J + * + * J is in A/m^2 + * -> L^-2 * I + */ + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::length) = -2.0; + unitDimension.at(SIBaseUnits::electricCurrent) = 1.0; + + return unitDimension; } - else - return ret; -} -void FieldJ::reset( uint32_t ) -{ -} + std::string FieldJ::getName() + { + return "J"; + } -void FieldJ::synchronize( ) -{ - buffer.deviceToHost( ); -} + void FieldJ::assign(ValueType value) + { + buffer.getDeviceBuffer().setValue(value); + // fieldJ.reset(false); + } -SimulationDataId FieldJ::getUniqueId( ) -{ - return getName( ); -} + template + void FieldJ::computeCurrent(T_Species& species, uint32_t) + { + using FrameType = typename T_Species::FrameType; + typedef typename pmacc::traits::Resolve>::type>::type + ParticleCurrentSolver; -HDINLINE -FieldJ::UnitValueType -FieldJ::getUnit( ) -{ - const float_64 UNIT_CURRENT = UNIT_CHARGE / UNIT_TIME / ( UNIT_LENGTH * UNIT_LENGTH ); - return UnitValueType( UNIT_CURRENT, UNIT_CURRENT, UNIT_CURRENT ); -} + using FrameSolver + = currentSolver::ComputePerFrame; -HINLINE -std::vector -FieldJ::getUnitDimension( ) -{ - /* L, M, T, I, theta, N, J - * - * J is in A/m^2 - * -> L^-2 * I - */ - std::vector unitDimension( 7, 0.0 ); - unitDimension.at(SIBaseUnits::length) = -2.0; - unitDimension.at(SIBaseUnits::electricCurrent) = 1.0; - - return unitDimension; -} - -std::string -FieldJ::getName( ) -{ - return "J"; -} + typedef SuperCellDescription< + typename MappingDesc::SuperCellSize, + typename GetMargin::LowerMargin, + typename GetMargin::UpperMargin> + BlockArea; -void FieldJ::assign( ValueType value ) -{ - buffer.getDeviceBuffer( ).setValue( value ); - //fieldJ.reset(false); -} + using Strategy = currentSolver::traits::GetStrategy_t; -template -void FieldJ::computeCurrent( T_Species & species, uint32_t ) -{ - /* tuning parameter to use more workers than cells in a supercell - * valid domain: 1 <= workerMultiplier - */ - const int workerMultiplier = 2; - - using FrameType = typename T_Species::FrameType; - typedef typename pmacc::traits::Resolve< - typename GetFlagType >::type - >::type ParticleCurrentSolver; - - typedef ComputeCurrentPerFrame FrameSolver; - - typedef SuperCellDescription< - typename MappingDesc::SuperCellSize, - typename GetMargin::LowerMargin, - typename GetMargin::UpperMargin - > BlockArea; - - /* The needed stride for the stride mapper depends on the stencil width. - * If the upper and lower margin of the stencil fits into one supercell - * a double checker board (stride 2) is needed. - * The round up sum of margins is the number of supercells to skip. - */ - using MarginPerDim = typename pmacc::math::CT::add< - typename GetMargin::LowerMargin, - typename GetMargin::UpperMargin - >::type; - using MaxMargin = typename pmacc::math::CT::max< MarginPerDim >::type; - using SuperCellMinSize = typename pmacc::math::CT::min< SuperCellSize >::type; - - /* number of supercells which must be skipped to avoid overlapping areas - * between different blocks in the kernel - */ - constexpr uint32_t skipSuperCells = ( MaxMargin::value + SuperCellMinSize::value - 1u ) / SuperCellMinSize::value; - StrideMapping< - T_area, - skipSuperCells + 1u, // stride 1u means each supercell is used - MappingDesc - > mapper( cellDescription ); - - typename T_Species::ParticlesBoxType pBox = species.getDeviceParticlesBox( ); - FieldJ::DataBoxType jBox = buffer.getDeviceBuffer( ).getDataBox( ); - FrameSolver solver( DELTA_T ); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value * workerMultiplier - >::value; - - do + constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< + pmacc::math::CT::volume::type::value * Strategy::workerMultiplier>::value; + + auto const depositionKernel = currentSolver::KernelComputeCurrent{}; + + typename T_Species::ParticlesBoxType pBox = species.getDeviceParticlesBox(); + FieldJ::DataBoxType jBox = buffer.getDeviceBuffer().getDataBox(); + FrameSolver solver(DELTA_T); + + auto const deposit = currentSolver::Deposit{}; + deposit.template execute(cellDescription, depositionKernel, solver, jBox, pBox); + } + + template + void FieldJ::addCurrentToEMF(T_CurrentInterpolationFunctor myCurrentInterpolationFunctor) { - PMACC_KERNEL( KernelComputeCurrent< numWorkers, BlockArea >{} ) - ( mapper.getGridDim( ), numWorkers ) - ( jBox, - pBox, solver, mapper ); + DataConnector& dc = Environment<>::get().DataConnector(); + auto fieldE = dc.get(FieldE::getName(), true); + auto fieldB = dc.get(FieldB::getName(), true); + + AreaMapping mapper(cellDescription); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(currentSolver::KernelAddCurrentToEMF{}) + (mapper.getGridDim(), numWorkers)( + fieldE->getDeviceDataBox(), + fieldB->getDeviceDataBox(), + buffer.getDeviceBuffer().getDataBox(), + myCurrentInterpolationFunctor, + mapper); + dc.releaseData(FieldE::getName()); + dc.releaseData(FieldB::getName()); } - while ( mapper.next( ) ); -} + void FieldJ::bashField(uint32_t exchangeType) + { + pmacc::fields::operations::CopyGuardToExchange{}(buffer, SuperCellSize{}, exchangeType); + } -template -void FieldJ::addCurrentToEMF( T_CurrentInterpolation& myCurrentInterpolation ) -{ - DataConnector &dc = Environment<>::get().DataConnector(); - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); - auto fieldB = dc.get< FieldB >( FieldB::getName(), true ); - - AreaMapping< - T_area, - MappingDesc - > mapper(cellDescription); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( KernelAddCurrentToEMF< numWorkers >{} )( - mapper.getGridDim(), - numWorkers - )( - fieldE->getDeviceDataBox( ), - fieldB->getDeviceDataBox( ), - buffer.getDeviceBuffer( ).getDataBox( ), - myCurrentInterpolation, - mapper - ); - dc.releaseData( FieldE::getName() ); - dc.releaseData( FieldB::getName() ); -} - -void FieldJ::bashField( uint32_t exchangeType ) -{ - pmacc::fields::operations::CopyGuardToExchange{ }( - buffer, - SuperCellSize{ }, - exchangeType - ); -} - -void FieldJ::insertField( uint32_t exchangeType ) -{ - pmacc::fields::operations::AddExchangeToBorder{ }( - buffer, - SuperCellSize{ }, - exchangeType - ); -} + void FieldJ::insertField(uint32_t exchangeType) + { + pmacc::fields::operations::AddExchangeToBorder{}(buffer, SuperCellSize{}, exchangeType); + } } // namespace picongpu diff --git a/include/picongpu/fields/FieldTmp.hpp b/include/picongpu/fields/FieldTmp.hpp index a3385c352e..812abaa53c 100644 --- a/include/picongpu/fields/FieldTmp.hpp +++ b/include/picongpu/fields/FieldTmp.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch, * Benjamin Worpitz * * This file is part of PIConGPU. @@ -38,7 +38,6 @@ namespace picongpu { - /** Representation of the temporary scalar field for plugins and temporary * particle data mapped to grid (charge density, energy density, etc.) * @@ -48,12 +47,11 @@ namespace picongpu * Implements interfaces defined by SimulationFieldHelper< MappingDesc > and * ISimulationData. */ - class FieldTmp : - public SimulationFieldHelper, - public ISimulationData + class FieldTmp + : public SimulationFieldHelper + , public ISimulationData { public: - //! Type of each field value using ValueType = float1_X; @@ -64,32 +62,29 @@ namespace picongpu using SuperCellSize = MappingDesc::SuperCellSize; //! Type of data box for field values on host and device - using DataBoxType = DataBox >; + using DataBoxType = DataBox>; /** Create a field * * @param cellDescription mapping for kernels * @param slotId index of the temporary field */ - HINLINE FieldTmp( - MappingDesc const & cellDescription, - uint32_t slotId - ); + HINLINE FieldTmp(MappingDesc const& cellDescription, uint32_t slotId); //! Destroy a field - virtual ~FieldTmp( ) = default; + virtual ~FieldTmp() = default; //! Get a reference to the host-device buffer for the field values - HINLINE GridBuffer& getGridBuffer( ); + HINLINE GridBuffer& getGridBuffer(); //! Get the grid layout - HINLINE GridLayout getGridLayout( ); + HINLINE GridLayout getGridLayout(); //! Get the host data box for the field values - HINLINE DataBoxType getHostDataBox( ); + HINLINE DataBoxType getHostDataBox(); //! Get the device data box for the field values - HINLINE DataBoxType getDeviceDataBox( ); + HINLINE DataBoxType getDeviceDataBox(); /** Start asynchronous send of field values * @@ -99,31 +94,31 @@ namespace picongpu * * @param serialEvent event to depend on */ - HINLINE virtual EventTask asyncCommunication( EventTask serialEvent ); + HINLINE virtual EventTask asyncCommunication(EventTask serialEvent); /** Reset the host-device buffer for field values * * @param currentStep index of time iteration */ - HINLINE void reset( uint32_t currentStep ) override; + HINLINE void reset(uint32_t currentStep) override; //! Synchronize device data with host data - HINLINE void syncToDevice( ) override; + HINLINE void syncToDevice() override; //! Synchronize host data with device data - HINLINE void synchronize( ) override; + HINLINE void synchronize() override; /** Get id * * @param slotId index of the temporary field */ - HINLINE static SimulationDataId getUniqueId( uint32_t slotId ); + HINLINE static SimulationDataId getUniqueId(uint32_t slotId); //! Get id HINLINE SimulationDataId getUniqueId() override; //! Get unit of field components - template< class FrameSolver > + template HDINLINE static UnitValueType getUnit(); /** Get unit representation as powers of the 7 base measures @@ -133,7 +128,7 @@ namespace picongpu * thermodynamic temperature theta, amount of substance N, * luminous intensity J) */ - template< class FrameSolver > + template HINLINE static std::vector getUnitDimension(); //! Get mapping for kernels @@ -151,7 +146,7 @@ namespace picongpu * This method can be called before or after asyncCommunication without * explicit handling to avoid race conditions between both methods. */ - HINLINE EventTask asyncCommunicationGather( EventTask serialEvent ); + HINLINE EventTask asyncCommunicationGather(EventTask serialEvent); /** Compute current density created by a species in an area * @@ -169,21 +164,20 @@ namespace picongpu * * @param exchangeType exchange type */ - HINLINE void bashField( uint32_t exchangeType ); + HINLINE void bashField(uint32_t exchangeType); /** Insert all particles which are in device exchange buffer * * @param exchangeType exchange type */ - HINLINE void insertField( uint32_t exchangeType ); + HINLINE void insertField(uint32_t exchangeType); private: - //! Host-device buffer for current density values - std::unique_ptr< GridBuffer > fieldTmp; + std::unique_ptr> fieldTmp; //! Buffer for receiving near-boundary values - std::unique_ptr< GridBuffer > fieldTmpRecv; + std::unique_ptr> fieldTmpRecv; //! Index of the temporary field uint32_t m_slotId; @@ -195,7 +189,6 @@ namespace picongpu //! Tags for communication uint32_t m_commTagScatter; uint32_t m_commTagGather; - }; } // namespace picongpu diff --git a/include/picongpu/fields/FieldTmp.kernel b/include/picongpu/fields/FieldTmp.kernel index 5c6b887996..e22184b319 100644 --- a/include/picongpu/fields/FieldTmp.kernel +++ b/include/picongpu/fields/FieldTmp.kernel @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Marco Garten +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Marco Garten * * This file is part of PIConGPU. * @@ -48,10 +48,7 @@ namespace picongpu * @tparam T_numWorkers number of workers * @tparam T_BlockDescription stance area description of the user functor */ - template< - uint32_t T_numWorkers, - typename T_BlockDescription - > + template struct KernelComputeSupercells { /** derive species property @@ -66,106 +63,63 @@ namespace picongpu * @param frameSolver functor to calculate the current for a frame * @param mapper functor to map a block to a supercell */ - template< - typename T_TmpBox, - typename T_ParBox, - typename T_FrameSolver, - typename T_Mapping, - typename T_Acc - > + template DINLINE void operator()( - T_Acc const & acc, + T_Acc const& acc, T_TmpBox fieldTmp, T_ParBox boxPar, T_FrameSolver frameSolver, - T_Mapping mapper - ) const + T_Mapping mapper) const { using namespace mappings::threads; using FramePtr = typename T_ParBox::FramePtr; using SuperCellSize = typename T_BlockDescription::SuperCellSize; - constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume< SuperCellSize >::type::value; + constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; - DataSpace< simDim > const block( mapper.getSuperCellIndex( DataSpace< simDim > ( blockIdx ) ) ); + DataSpace const block(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); FramePtr frame; lcellId_t particlesInSuperCell; - frame = boxPar.getLastFrame( block ); - particlesInSuperCell = boxPar.getSuperCell( block ).getSizeLastFrame( ); - - if( !frame.isValid() ) - return; //end kernel if we have no frames - - auto cachedVal = CachedBox::create < - 0, - typename T_TmpBox::ValueType - > ( - acc, - T_BlockDescription{ } - ); - Set< typename T_TmpBox::ValueType > set( float_X( 0.0 ) ); - - ThreadCollective< - T_BlockDescription, - numWorkers - > collective( workerIdx ); - collective( - acc, - set, - cachedVal - ); - - __syncthreads( ); - - while( frame.isValid() ) + frame = boxPar.getLastFrame(block); + particlesInSuperCell = boxPar.getSuperCell(block).getSizeLastFrame(); + + if(!frame.isValid()) + return; // end kernel if we have no frames + + auto cachedVal = CachedBox::create<0, typename T_TmpBox::ValueType>(acc, T_BlockDescription{}); + Set set(float_X(0.0)); + + ThreadCollective collective(workerIdx); + collective(acc, set, cachedVal); + + cupla::__syncthreads(acc); + + while(frame.isValid()) { - ForEachIdx< - IdxConfig< - cellsPerSuperCell, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - if( linearIdx < particlesInSuperCell ) + ForEachIdx>{workerIdx}( + [&](uint32_t const linearIdx, uint32_t const) { + if(linearIdx < particlesInSuperCell) { - frameSolver( - acc, - *frame, - linearIdx, - SuperCellSize::toRT(), - cachedVal - ); + frameSolver(acc, *frame, linearIdx, SuperCellSize::toRT(), cachedVal); } - } - ); + }); - frame = boxPar.getPreviousFrame( frame ); + frame = boxPar.getPreviousFrame(frame); particlesInSuperCell = cellsPerSuperCell; - - } - __syncthreads( ); + cupla::__syncthreads(acc); nvidia::functors::Add add; - DataSpace< simDim > const blockCell = block * SuperCellSize::toRT( ); - auto fieldTmpBlock = fieldTmp.shift( blockCell ); - collective( - acc, - add, - fieldTmpBlock, - cachedVal - ); + DataSpace const blockCell = block * SuperCellSize::toRT(); + auto fieldTmpBlock = fieldTmp.shift(blockCell); + collective(acc, add, fieldTmpBlock, cachedVal); } }; diff --git a/include/picongpu/fields/FieldTmp.tpp b/include/picongpu/fields/FieldTmp.tpp index a6f3d718d6..e274f7281b 100644 --- a/include/picongpu/fields/FieldTmp.tpp +++ b/include/picongpu/fields/FieldTmp.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch, Benjamin Worpitz * * This file is part of PIConGPU. @@ -27,7 +27,6 @@ #include "picongpu/particles/traits/GetInterpolation.hpp" #include -#include #include #include #include @@ -50,29 +49,23 @@ namespace picongpu { using namespace pmacc; - FieldTmp::FieldTmp( - MappingDesc const & cellDescription, - uint32_t slotId - ) : - SimulationFieldHelper( cellDescription ), - m_slotId( slotId ) + FieldTmp::FieldTmp(MappingDesc const& cellDescription, uint32_t slotId) + : SimulationFieldHelper(cellDescription) + , m_slotId(slotId) { /* Since this class is instantiated for each temporary field slot, * use getNextId( ) directly to get unique tags for each instance. * Add SPECIES_FIRSTTAG to avoid collisions with the tags for * other fields. */ - m_commTagScatter = pmacc::traits::getNextId( ) + SPECIES_FIRSTTAG; - m_commTagGather = pmacc::traits::getNextId( ) + SPECIES_FIRSTTAG; + m_commTagScatter = pmacc::traits::getNextId() + SPECIES_FIRSTTAG; + m_commTagGather = pmacc::traits::getNextId() + SPECIES_FIRSTTAG; - using Buffer = GridBuffer< ValueType, simDim >; - fieldTmp = memory::makeUnique< Buffer >( cellDescription.getGridLayout( ) ); + using Buffer = GridBuffer; + fieldTmp = std::make_unique(cellDescription.getGridLayout()); - if( fieldTmpSupportGatherCommunication ) - fieldTmpRecv = memory::makeUnique< Buffer >( - fieldTmp->getDeviceBuffer(), - cellDescription.getGridLayout( ) - ); + if(fieldTmpSupportGatherCommunication) + fieldTmpRecv = std::make_unique(fieldTmp->getDeviceBuffer(), cellDescription.getGridLayout()); /** \todo The exchange has to be resetted and set again regarding the * temporary "Fill-"Functor we want to use. @@ -80,39 +73,29 @@ namespace picongpu * Problem: buffers don't allow "bigger" exchange during run time. * so let's stay with the maximum guards. */ - const DataSpace coreBorderSize = cellDescription.getGridLayout( ).getDataSpaceWithoutGuarding( ); + const DataSpace coreBorderSize = cellDescription.getGridLayout().getDataSpaceWithoutGuarding(); - typedef typename pmacc::particles::traits::FilterByFlag - < - VectorAllSpecies, - interpolation<> - >::type VectorSpeciesWithInterpolation; + typedef typename pmacc::particles::traits::FilterByFlag>::type + VectorSpeciesWithInterpolation; /* ------------------ lower margin ----------------------------------*/ typedef bmpl::accumulate< VectorSpeciesWithInterpolation, typename pmacc::math::CT::make_Int::type, - pmacc::math::CT::max > > - >::type SpeciesLowerMargin; + pmacc::math::CT::max>>>::type SpeciesLowerMargin; typedef bmpl::accumulate< FieldTmpSolvers, typename pmacc::math::CT::make_Int::type, - pmacc::math::CT::max > - >::type FieldTmpLowerMargin; + pmacc::math::CT::max>>::type FieldTmpLowerMargin; - typedef pmacc::math::CT::max< - SpeciesLowerMargin, - FieldTmpLowerMargin>::type SpeciesFieldTmpLowerMargin; + typedef pmacc::math::CT::max::type SpeciesFieldTmpLowerMargin; typedef pmacc::math::CT::max< GetMargin::LowerMargin, - GetMargin::LowerMargin>::type - FieldSolverLowerMargin; + GetMargin::LowerMargin>::type FieldSolverLowerMargin; - typedef pmacc::math::CT::max< - SpeciesFieldTmpLowerMargin, - FieldSolverLowerMargin>::type LowerMargin; + typedef pmacc::math::CT::max::type LowerMargin; /* ------------------ upper margin -----------------------------------*/ @@ -120,216 +103,185 @@ namespace picongpu typedef bmpl::accumulate< VectorSpeciesWithInterpolation, typename pmacc::math::CT::make_Int::type, - pmacc::math::CT::max > > - >::type SpeciesUpperMargin; + pmacc::math::CT::max>>>::type SpeciesUpperMargin; typedef bmpl::accumulate< FieldTmpSolvers, typename pmacc::math::CT::make_Int::type, - pmacc::math::CT::max > - >::type FieldTmpUpperMargin; + pmacc::math::CT::max>>::type FieldTmpUpperMargin; - typedef pmacc::math::CT::max< - SpeciesUpperMargin, - FieldTmpUpperMargin>::type SpeciesFieldTmpUpperMargin; + typedef pmacc::math::CT::max::type SpeciesFieldTmpUpperMargin; typedef pmacc::math::CT::max< GetMargin::UpperMargin, - GetMargin::UpperMargin>::type - FieldSolverUpperMargin; + GetMargin::UpperMargin>::type FieldSolverUpperMargin; - typedef pmacc::math::CT::max< - SpeciesFieldTmpUpperMargin, - FieldSolverUpperMargin>::type UpperMargin; + typedef pmacc::math::CT::max::type UpperMargin; - const DataSpace originGuard( LowerMargin( ).toRT( ) ); - const DataSpace endGuard( UpperMargin( ).toRT( ) ); + const DataSpace originGuard(LowerMargin().toRT()); + const DataSpace endGuard(UpperMargin().toRT()); /*go over all directions*/ - for( uint32_t i = 1; i < NumberOfExchanges::value; ++i ) + for(uint32_t i = 1; i < NumberOfExchanges::value; ++i) { - DataSpace relativMask = Mask::getRelativeDirections ( i ); + DataSpace relativMask = Mask::getRelativeDirections(i); /*guarding cells depend on direction */ DataSpace guardingCells; - for( uint32_t d = 0; d < simDim; ++d ) + for(uint32_t d = 0; d < simDim; ++d) { /*originGuard and endGuard are switch because we send data * e.g. from left I get endGuardingCells and from right I originGuardingCells */ - switch( relativMask[d] ) + switch(relativMask[d]) { - // receive from negativ side to positiv (end) guarding cells - case -1: guardingCells[d] = endGuard[d]; - break; - // receive from positiv side to negativ (origin) guarding cells - case 1: guardingCells[d] = originGuard[d]; - break; - case 0: guardingCells[d] = coreBorderSize[d]; - break; + // receive from negativ side to positiv (end) guarding cells + case -1: + guardingCells[d] = endGuard[d]; + break; + // receive from positiv side to negativ (origin) guarding cells + case 1: + guardingCells[d] = originGuard[d]; + break; + case 0: + guardingCells[d] = coreBorderSize[d]; + break; }; - } - fieldTmp->addExchangeBuffer( i, guardingCells, m_commTagScatter ); + fieldTmp->addExchangeBuffer(i, guardingCells, m_commTagScatter); - if( fieldTmpRecv ) + if(fieldTmpRecv) { /* guarding cells depend on direction * for negative direction use originGuard else endGuard (relative direction ZERO is ignored) * don't switch end and origin because this is a read buffer and not send buffer */ - for ( uint32_t d = 0; d < simDim; ++d ) - guardingCells[d] = ( relativMask[d] == -1 ? originGuard[d] : endGuard[d] ); - fieldTmpRecv->addExchange( GUARD, i, guardingCells, m_commTagGather ); + for(uint32_t d = 0; d < simDim; ++d) + guardingCells[d] = (relativMask[d] == -1 ? originGuard[d] : endGuard[d]); + fieldTmpRecv->addExchange(GUARD, i, guardingCells, m_commTagGather); } } - } template - void FieldTmp::computeValue( ParticlesClass& parClass, uint32_t ) + void FieldTmp::computeValue(ParticlesClass& parClass, uint32_t) { typedef SuperCellDescription< typename MappingDesc::SuperCellSize, typename FrameSolver::LowerMargin, - typename FrameSolver::UpperMargin - > BlockArea; + typename FrameSolver::UpperMargin> + BlockArea; - StrideMapping mapper( cellDescription ); - typename ParticlesClass::ParticlesBoxType pBox = parClass.getDeviceParticlesBox( ); - FieldTmp::DataBoxType tmpBox = this->fieldTmp->getDeviceBuffer( ).getDataBox( ); + StrideMapping mapper(cellDescription); + typename ParticlesClass::ParticlesBoxType pBox = parClass.getDeviceParticlesBox(); + FieldTmp::DataBoxType tmpBox = this->fieldTmp->getDeviceBuffer().getDataBox(); FrameSolver solver; - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; do { - PMACC_KERNEL( KernelComputeSupercells< - numWorkers, - BlockArea - >{ } )( - mapper.getGridDim( ), - numWorkers - )( - tmpBox, - pBox, - solver, - mapper - ); - } while( mapper.next( ) ); + PMACC_KERNEL(KernelComputeSupercells{}) + (mapper.getGridDim(), numWorkers)(tmpBox, pBox, solver, mapper); + } while(mapper.next()); } - SimulationDataId - FieldTmp::getUniqueId( uint32_t slotId ) + SimulationDataId FieldTmp::getUniqueId(uint32_t slotId) { - return getName() + std::to_string( slotId ); + return getName() + std::to_string(slotId); } - SimulationDataId - FieldTmp::getUniqueId() + SimulationDataId FieldTmp::getUniqueId() { - return getUniqueId( m_slotId ); + return getUniqueId(m_slotId); } - void FieldTmp::synchronize( ) + void FieldTmp::synchronize() { - fieldTmp->deviceToHost( ); + fieldTmp->deviceToHost(); } - void FieldTmp::syncToDevice( ) + void FieldTmp::syncToDevice() { - fieldTmp->hostToDevice( ); + fieldTmp->hostToDevice(); } - EventTask FieldTmp::asyncCommunication( EventTask serialEvent ) + EventTask FieldTmp::asyncCommunication(EventTask serialEvent) { EventTask ret; - __startTransaction( serialEvent + m_gatherEv + m_scatterEv ); - FieldFactory::getInstance( ).createTaskFieldReceiveAndInsert( *this ); - ret = __endTransaction( ); + __startTransaction(serialEvent + m_gatherEv + m_scatterEv); + FieldFactory::getInstance().createTaskFieldReceiveAndInsert(*this); + ret = __endTransaction(); - __startTransaction( serialEvent + m_gatherEv + m_scatterEv); - FieldFactory::getInstance( ).createTaskFieldSend( *this ); - ret += __endTransaction( ); + __startTransaction(serialEvent + m_gatherEv + m_scatterEv); + FieldFactory::getInstance().createTaskFieldSend(*this); + ret += __endTransaction(); m_scatterEv = ret; return ret; } - EventTask FieldTmp::asyncCommunicationGather( EventTask serialEvent ) + EventTask FieldTmp::asyncCommunicationGather(EventTask serialEvent) { PMACC_VERIFY_MSG( fieldTmpSupportGatherCommunication == true, - "fieldTmpSupportGatherCommunication in memory.param must be set to true" - ); + "fieldTmpSupportGatherCommunication in memory.param must be set to true"); - if( fieldTmpRecv != nullptr ) - m_gatherEv = fieldTmpRecv->asyncCommunication( serialEvent + m_scatterEv + m_gatherEv ); + if(fieldTmpRecv != nullptr) + m_gatherEv = fieldTmpRecv->asyncCommunication(serialEvent + m_scatterEv + m_gatherEv); return m_gatherEv; } - void FieldTmp::bashField( uint32_t exchangeType ) + void FieldTmp::bashField(uint32_t exchangeType) { - pmacc::fields::operations::CopyGuardToExchange{ }( - *fieldTmp, - SuperCellSize{ }, - exchangeType - ); + pmacc::fields::operations::CopyGuardToExchange{}(*fieldTmp, SuperCellSize{}, exchangeType); } - void FieldTmp::insertField( uint32_t exchangeType ) + void FieldTmp::insertField(uint32_t exchangeType) { - pmacc::fields::operations::AddExchangeToBorder{ }( - *fieldTmp, - SuperCellSize{ }, - exchangeType - ); + pmacc::fields::operations::AddExchangeToBorder{}(*fieldTmp, SuperCellSize{}, exchangeType); } - FieldTmp::DataBoxType FieldTmp::getDeviceDataBox( ) + FieldTmp::DataBoxType FieldTmp::getDeviceDataBox() { - return fieldTmp->getDeviceBuffer( ).getDataBox( ); + return fieldTmp->getDeviceBuffer().getDataBox(); } - FieldTmp::DataBoxType FieldTmp::getHostDataBox( ) + FieldTmp::DataBoxType FieldTmp::getHostDataBox() { - return fieldTmp->getHostBuffer( ).getDataBox( ); + return fieldTmp->getHostBuffer().getDataBox(); } - GridBuffer &FieldTmp::getGridBuffer( ) + GridBuffer& FieldTmp::getGridBuffer() { return *fieldTmp; } - GridLayout< simDim> FieldTmp::getGridLayout( ) + GridLayout FieldTmp::getGridLayout() { - return cellDescription.getGridLayout( ); + return cellDescription.getGridLayout(); } - void FieldTmp::reset( uint32_t ) + void FieldTmp::reset(uint32_t) { - fieldTmp->getHostBuffer( ).reset( true ); - fieldTmp->getDeviceBuffer( ).reset( false ); + fieldTmp->getHostBuffer().reset(true); + fieldTmp->getDeviceBuffer().reset(false); } - template - HDINLINE FieldTmp::UnitValueType - FieldTmp::getUnit( ) + template + HDINLINE FieldTmp::UnitValueType FieldTmp::getUnit() { return FrameSolver().getUnit(); } - template - HINLINE std::vector - FieldTmp::getUnitDimension( ) + template + HINLINE std::vector FieldTmp::getUnitDimension() { return FrameSolver().getUnitDimension(); } - std::string - FieldTmp::getName( ) + std::string FieldTmp::getName() { return "FieldTmp"; } diff --git a/include/picongpu/fields/Fields.def b/include/picongpu/fields/Fields.def index 8a6be8cd29..0996a0ca55 100644 --- a/include/picongpu/fields/Fields.def +++ b/include/picongpu/fields/Fields.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Axel Huebl +/* Copyright 2013-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -21,7 +21,6 @@ namespace picongpu { - /** Define which operation is used to fill up FieldTmp * * This is better than use of boost::mtl::pair because diff --git a/include/picongpu/fields/Fields.hpp b/include/picongpu/fields/Fields.hpp index 38b0d4a6b0..961684c8bc 100644 --- a/include/picongpu/fields/Fields.hpp +++ b/include/picongpu/fields/Fields.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once #include "picongpu/fields/FieldB.hpp" diff --git a/include/picongpu/fields/Fields.tpp b/include/picongpu/fields/Fields.tpp index 7c7953de39..3f2040d993 100644 --- a/include/picongpu/fields/Fields.tpp +++ b/include/picongpu/fields/Fields.tpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * diff --git a/include/picongpu/fields/LaserPhysics.def b/include/picongpu/fields/LaserPhysics.def index 45f4b6d0fa..29ae1ad365 100644 --- a/include/picongpu/fields/LaserPhysics.def +++ b/include/picongpu/fields/LaserPhysics.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,4 +22,4 @@ namespace picongpu { struct LaserPhysics; -}//namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/fields/LaserPhysics.hpp b/include/picongpu/fields/LaserPhysics.hpp index 62a68d8fc3..6ffc5570d9 100644 --- a/include/picongpu/fields/LaserPhysics.hpp +++ b/include/picongpu/fields/LaserPhysics.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -35,185 +35,150 @@ namespace picongpu { -namespace fields -{ - /** compute the electric field of the laser - * - * @tparam T_numWorkers number of workers - * @tparam T_LaserPlaneSizeInSuperCell number of cells per dimension which - * initialize the laser (size must be less or equal than the supercell size) - */ - template< - uint32_t T_numWorkers, - typename T_LaserPlaneSizeInSuperCell - > - struct KernelLaser + namespace fields { - template< - typename T_Acc, - typename T_LaserFunctor - > - DINLINE void operator()( - T_Acc const & acc, - T_LaserFunctor laserFunctor - ) const + /** compute the electric field of the laser + * + * @tparam T_numWorkers number of workers + * @tparam T_LaserPlaneSizeInSuperCell number of cells per dimension which + * initialize the laser (size must be less or equal than the supercell size) + */ + template + struct KernelLaser { - using LaserPlaneSizeInSuperCell = T_LaserPlaneSizeInSuperCell; - using LaserFunctor = T_LaserFunctor; + template + DINLINE void operator()(T_Acc const& acc, T_LaserFunctor laserFunctor) const + { + using LaserPlaneSizeInSuperCell = T_LaserPlaneSizeInSuperCell; + using LaserFunctor = T_LaserFunctor; - PMACC_CASSERT_MSG( - __LaserPlaneSizeInSuperCell_y_must_be_less_or_equal_than_SuperCellSize_y, - LaserPlaneSizeInSuperCell::y::value <= SuperCellSize::y::value - ); + PMACC_CASSERT_MSG( + __LaserPlaneSizeInSuperCell_y_must_be_less_or_equal_than_SuperCellSize_y, + LaserPlaneSizeInSuperCell::y::value <= SuperCellSize::y::value); - constexpr uint32_t planeSize = pmacc::math::CT::volume< LaserPlaneSizeInSuperCell >::type::value; - PMACC_CONSTEXPR_CAPTURE uint32_t numWorkers = T_numWorkers; + constexpr uint32_t planeSize = pmacc::math::CT::volume::type::value; + PMACC_CONSTEXPR_CAPTURE uint32_t numWorkers = T_numWorkers; - const uint32_t workerIdx = threadIdx.x; + const uint32_t workerIdx = cupla::threadIdx(acc).x; - // offset of the superCell (in cells, without any guards) to the origin of the local domain + // offset of the superCell (in cells, without any guards) to the origin of the local domain - DataSpace< simDim > localSuperCellOffset = DataSpace< simDim >( blockIdx ); + DataSpace localSuperCellOffset = DataSpace(cupla::blockIdx(acc)); - // add not handled supercells from LaserFunctor::Unitless::initPlaneY - localSuperCellOffset.y() += LaserFunctor::Unitless::initPlaneY / SuperCellSize::y::value; + // add not handled supercells from LaserFunctor::Unitless::initPlaneY + localSuperCellOffset.y() += LaserFunctor::Unitless::initPlaneY / SuperCellSize::y::value; - uint32_t cellOffsetInSuperCellFromInitPlaneY = LaserFunctor::Unitless::initPlaneY % SuperCellSize::y::value; + uint32_t cellOffsetInSuperCellFromInitPlaneY + = LaserFunctor::Unitless::initPlaneY % SuperCellSize::y::value; - mappings::threads::ForEachIdx< - mappings::threads::IdxConfig< - planeSize, - numWorkers - > - > { workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - auto accLaserFunctor = laserFunctor( - acc, - localSuperCellOffset, - mappings::threads::WorkerCfg< numWorkers >{ workerIdx } - ); + mappings::threads::ForEachIdx>{ + workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + auto accLaserFunctor + = laserFunctor(acc, localSuperCellOffset, mappings::threads::WorkerCfg{workerIdx}); /* cell index within the superCell */ - DataSpace< simDim > cellIdxInSuperCell = DataSpaceOperations< simDim >::template map< LaserPlaneSizeInSuperCell >( linearIdx ); + DataSpace cellIdxInSuperCell + = DataSpaceOperations::template map(linearIdx); cellIdxInSuperCell.y() += cellOffsetInSuperCellFromInitPlaneY; - accLaserFunctor( acc, cellIdxInSuperCell ); - } - ); - } - }; + accLaserFunctor(acc, cellIdxInSuperCell); + }); + } + }; - /** Laser init in a single xz plane */ - struct LaserPhysics - { - void operator()(uint32_t currentStep) const + /** Laser init in a single xz plane */ + struct LaserPhysics { - /* The laser can be initialized in the plane of the first cell or - * any later x-z plane inside the simulation. Initializing the - * laser in planes inside the simulation corresponds to an - * evaluation of the field at negatively shifted time. - */ - constexpr float_X laserTimeShift = laserProfiles::Selected::Unitless::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; - - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); - - /* Disable laser if - * - init time of laser is over or - * - we have periodic boundaries in Y direction or - * - we already performed a slide - */ - bool const laserNone = ( laserProfiles::Selected::Unitless::INIT_TIME == float_X(0.0) ); - bool const laserInitTimeOver = - ( ( currentStep * DELTA_T - laserTimeShift ) >= laserProfiles::Selected::Unitless::INIT_TIME ); - bool const topBoundariesArePeriodic = - ( Environment::get().GridController().getCommunicationMask( ).isSet( TOP ) ); - bool const boxHasSlided = ( numSlides != 0 ); - - bool const disableLaser = - laserNone || - laserInitTimeOver || - topBoundariesArePeriodic || - boxHasSlided; - if( !disableLaser ) + void operator()(uint32_t currentStep) const { - PMACC_VERIFY_MSG( - laserProfiles::Selected::Unitless::initPlaneY < static_cast( Environment::get().SubGrid().getLocalDomain().size.y() ), - "initPlaneY must be located in the top GPU" - ); - - // laser is disabled e.g. laserNone - constexpr bool isLaserDisabled = laserProfiles::Selected::Unitless::INIT_TIME == 0.0_X; - constexpr bool isLaserInitInFirstCell = laserProfiles::Selected::Unitless::initPlaneY == 0; - // X + 1 is a workaround to avoid warning: pointless comparison of unsigned integer with zero - constexpr bool isInitPlaneYOutsideOfAbsorber = - laserProfiles::Selected::Unitless::initPlaneY + 1 > absorber::numCells[1][0] + 1; - PMACC_CASSERT_MSG( - __initPlaneY_needs_to_be_greater_than_the_top_absorber_cells_or_zero, - isLaserDisabled || isLaserInitInFirstCell || isInitPlaneYOutsideOfAbsorber - ); - - /* Calculate how many neighbors to the left we have - * to initialize the laser in the E-Field - * - * Example: Yee needs one neighbor to perform dB = curlE - * -> initialize in y=0 plane - * A second order solver could need 2 neighbors left: - * -> initialize in y=0 and y=1 plane - * - * Question: Why do other codes initialize the B-Field instead? - * Answer: Because our fields are defined on the lower cell side - * (C-Style ftw). Therefore, our curls (for example Yee) - * are shifted nabla+ <-> nabla- compared to Fortran codes - * (in other words: curlLeft <-> curlRight) - * for E and B. - * For this reason, we have to initialize E instead of B. - * - * Problem: that's still not our case. For example our Yee does a - * dE = curlLeft(B) - therefor, we should init B, too. - * - * - * @todo: might also lack temporal offset since our formulas are E(x,z,t) instead of E(x,y,z,t) - * `const int max_y_neighbors = Get::value;` - * - * @todo Right now, the phase could be wrong ( == is cloned) - * @see LaserPhysics.hpp - * - * @todo What about the B-Field in the second plane? - * + /* The laser can be initialized in the plane of the first cell or + * any later x-z plane inside the simulation. Initializing the + * laser in planes inside the simulation corresponds to an + * evaluation of the field at negatively shifted time. */ - constexpr int laserInitCellsInY = 1; + constexpr float_X laserTimeShift + = laserProfiles::Selected::Unitless::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; - using LaserPlaneSizeInSuperCells = typename pmacc::math::CT::AssignIfInRange< + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + + /* Disable laser if + * - init time of laser is over or + * - we have periodic boundaries in Y direction or + * - we already performed a slide + */ + bool const laserNone = (laserProfiles::Selected::Unitless::INIT_TIME == float_X(0.0)); + bool const laserInitTimeOver + = ((currentStep * DELTA_T - laserTimeShift) >= laserProfiles::Selected::Unitless::INIT_TIME); + bool const topBoundariesArePeriodic + = (Environment::get().GridController().getCommunicationMask().isSet(TOP)); + bool const boxHasSlided = (numSlides != 0); + + bool const disableLaser = laserNone || laserInitTimeOver || topBoundariesArePeriodic || boxHasSlided; + if(!disableLaser) + { + PMACC_VERIFY_MSG( + laserProfiles::Selected::Unitless::initPlaneY + < static_cast(Environment::get().SubGrid().getLocalDomain().size.y()), + "initPlaneY must be located in the top GPU"); + + // laser is disabled e.g. laserNone + constexpr bool isLaserDisabled = laserProfiles::Selected::Unitless::INIT_TIME == 0.0_X; + constexpr bool isLaserInitInFirstCell = laserProfiles::Selected::Unitless::initPlaneY == 0; + // X + 1 is a workaround to avoid warning: pointless comparison of unsigned integer with zero + constexpr bool isInitPlaneYOutsideOfAbsorber + = laserProfiles::Selected::Unitless::initPlaneY + 1 > absorber::numCells[1][0] + 1; + PMACC_CASSERT_MSG( + __initPlaneY_needs_to_be_greater_than_the_top_absorber_cells_or_zero, + isLaserDisabled || isLaserInitInFirstCell || isInitPlaneYOutsideOfAbsorber); + + /* Calculate how many neighbors to the left we have + * to initialize the laser in the E-Field + * + * Example: Yee needs one neighbor to perform dB = curlE + * -> initialize in y=0 plane + * A second order solver could need 2 neighbors left: + * -> initialize in y=0 and y=1 plane + * + * Question: Why do other codes initialize the B-Field instead? + * Answer: Because our fields are defined on the lower cell side + * (C-Style ftw). Therefore, our curls (for example Yee) + * are shifted nabla+ <-> nabla- compared to Fortran codes + * (in other words: curlLeft <-> curlRight) + * for E and B. + * For this reason, we have to initialize E instead of B. + * + * Problem: that's still not our case. For example our Yee does a + * dE = curlLeft(B) - therefor, we should init B, too. + * + * + * @todo: might also lack temporal offset since our formulas are E(x,z,t) instead of E(x,y,z,t) + * `const int max_y_neighbors = Get::value;` + * + * @todo Right now, the phase could be wrong ( == is cloned) + * @see LaserPhysics.hpp + * + * @todo What about the B-Field in the second plane? + * + */ + constexpr int laserInitCellsInY = 1; + + using LaserPlaneSizeInSuperCells = typename pmacc::math::CT::AssignIfInRange< typename SuperCellSize::vector_type, - bmpl::integral_c< uint32_t, 1 >, /* y direction */ - bmpl::integral_c< int, laserInitCellsInY > - >::type; - - DataSpace< simDim > gridBlocks = Environment< simDim >::get().SubGrid().getLocalDomain().size / SuperCellSize::toRT(); - // use the one supercell in y to initialize the laser plane - gridBlocks.y() = 1; - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< LaserPlaneSizeInSuperCells >::type::value - >::value; - - PMACC_KERNEL( - KernelLaser< - numWorkers, - LaserPlaneSizeInSuperCells - >{} - )( - gridBlocks, - numWorkers - )( - laserProfiles::Selected( currentStep ) - ); + bmpl::integral_c, /* y direction */ + bmpl::integral_c>::type; + + DataSpace gridBlocks + = Environment::get().SubGrid().getLocalDomain().size / SuperCellSize::toRT(); + // use the one supercell in y to initialize the laser plane + gridBlocks.y() = 1; + + constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< + pmacc::math::CT::volume::type::value>::value; + + PMACC_KERNEL(KernelLaser{}) + (gridBlocks, numWorkers)(laserProfiles::Selected(currentStep)); + } } - } - }; -} // namespace fields + }; + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/ArbitraryOrderFDTD.def b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/ArbitraryOrderFDTD.def new file mode 100644 index 0000000000..8f56074612 --- /dev/null +++ b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/ArbitraryOrderFDTD.def @@ -0,0 +1,82 @@ +/* Copyright 2020-2021 Klaus Steiniger, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" +#include "picongpu/fields/differentiation/Curl.def" +#include "picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Derivative.def" +#include "picongpu/fields/MaxwellSolver/Yee/Yee.def" + +#include + + +namespace picongpu +{ + namespace fields + { + namespace maxwellSolver + { + namespace aoFDTD + { + /** Curl to be applied for the E-field + * + * @tparam T_neighbors Number of neighbors used to calculate the derivative from finite differences. + * Same number of neighbors is used along all directions. + * Order of derivative approximation is 2 * T_neighbors. + */ + template + using CurlE = differentiation::Curl>; + + /** Curl to be applied for the B-field + * + * @tparam T_neighbors Number of neighbors used to calculate the derivative from finite differences. + * Same number of neighbors is used along all directions. + * Order of derivative approximation is 2 * T_neighbors. + */ + template + using CurlB = differentiation::Curl>; + + } // namespace aoFDTD + + /** Finite difference field solver of chosen order. + * + * References: M Ghrist + * High-Order Finite Difference Methods for Wave Equations + * PhD thesis (2000) + * Department of Applied Mathematics, University of Colarado + * + * H Vincenti et al + * doi:10.1016/j.cpc.2015.11.009 + * + * @tparam T_neighbors Number of neighbors used to calculate the derivative from finite differences. + * Same number of neighbors is used along all directions. + * Order of derivative approximation is 2 * T_neighbors. + */ + template + using ArbitraryOrderFDTD = ::picongpu::fields::maxwellSolver:: + Yee, aoFDTD::CurlB>; + + /* We need no definition of margins, because the Yee solver uses its curl + * classes to define margins + */ + + } // namespace maxwellSolver + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/ArbitraryOrderFDTD.hpp b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/ArbitraryOrderFDTD.hpp new file mode 100644 index 0000000000..bce5f341eb --- /dev/null +++ b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/ArbitraryOrderFDTD.hpp @@ -0,0 +1,49 @@ +/* Copyright 2020-2021 Klaus Steiniger, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/ArbitraryOrderFDTD.def" +#include "picongpu/fields/differentiation/Curl.hpp" +#include "picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Derivative.hpp" + +#include +#include + + +namespace pmacc +{ + namespace traits + { + template + struct StringProperties< + ::picongpu::fields::maxwellSolver::ArbitraryOrderFDTD> + { + static StringProperty get() + { + pmacc::traits::StringProperty propList("name", "other"); + propList["param"] = std::string("Arbitrary order FDTD, order ") + std::to_string(T_neighbors); + + return propList; + } + }; + + } // namespace traits +} // namespace pmacc diff --git a/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Derivative.def b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Derivative.def new file mode 100644 index 0000000000..4e5fdb60a1 --- /dev/null +++ b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Derivative.def @@ -0,0 +1,56 @@ +/* Copyright 2020-2021 Klaus Steiniger, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include + + +namespace picongpu +{ + namespace fields + { + namespace maxwellSolver + { + namespace aoFDTD + { + /** Tag for forward derivative used in CurlE + * of the ArbitraryOrderFDTD solver + * + * @tparam T_neighbors Number of neighbors used to calculate + * the spatial derivatives with finite differences. + * Order of approximation is 2 * T_neighbors. + */ + template + struct Forward; + + /** Tag for forward derivative used in CurlE + * of the ArbitraryOrderFDTD solver + * + * @tparam T_neighbors Number of neighbors used to calculate + * the spatial derivatives with finite differences. + * Order of approximation is 2 * T_neighbors. + */ + template + struct Backward; + + } // namespace aoFDTD + } // namespace maxwellSolver + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Derivative.hpp b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Derivative.hpp new file mode 100644 index 0000000000..56b3b4670d --- /dev/null +++ b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Derivative.hpp @@ -0,0 +1,194 @@ +/* Copyright 2020-2021 Klaus Steiniger, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Derivative.def" +#include "picongpu/fields/differentiation/Traits.hpp" +#include + +#include +#include + +#include + + +namespace picongpu +{ + namespace fields + { + namespace maxwellSolver + { + namespace aoFDTD + { + namespace detail + { + /** Abstraction of the arbitrary-order finite-difference time domain + * derivative functor. + * + * @tparam T_lowerNeighbors Number of neighbors required in negative + * direction to calculate field derivative + * + * + * @tparam T_upperNeighbors Number of neighbors required in positive + * direction to calculate field derivative + * + * @tparam T_neighbors Number of neighbors used to calculate + * the derivative from finite differences. + * Order of derivative approximation is + * 2 * T_neighbors + * + * @tparam T_direction Direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template< + uint32_t T_lowerNeighbors, + uint32_t T_upperNeighbors, + uint32_t T_neighbors, + uint32_t T_direction> + struct GeneralAofdtdDerivative + { + //! Lower margin + using LowerMargin = typename pmacc::math::CT::mul< + typename pmacc::math::CT::make_Int::type, + typename pmacc::math::CT::make_BasisVector::type>::type; + + //! Upper margin + using UpperMargin = typename pmacc::math::CT::mul< + typename pmacc::math::CT::make_Int::type, + typename pmacc::math::CT::make_BasisVector::type>::type; + + /** Return derivative value at the given point + * + * @tparam T_DataBox data box type with field data + * @param data position in the data box to compute derivative at + */ + template + HDINLINE typename T_DataBox::ValueType operator()(T_DataBox const& data) const + { + // Define shorthand type to access DataBox + using IndexType = pmacc::DataSpace; + + // Define indice vectors for data access + auto lowerIndex = IndexType{}; // Vector initialized with zeros + auto upperIndex = IndexType{}; + + // lowerIndex: 0 if ( Forward ) else -1 + lowerIndex[T_direction] + = static_cast(T_upperNeighbors) - static_cast(T_neighbors); + // upperIndex: 1 if ( Forward ) else 0 + upperIndex[T_direction] + = static_cast(T_neighbors) - static_cast(T_lowerNeighbors); + + AOFDTDWeights const weights{}; + + // shortest distance finite difference as initial value + auto finiteDifference = weights[0] * (data(upperIndex) - data(lowerIndex)); + + // Compute next finite differences according to order + for(uint32_t l = 1u; l < T_neighbors; ++l) + { + lowerIndex[T_direction] -= 1; + upperIndex[T_direction] += 1; + + finiteDifference += weights[l] * (data(upperIndex) - data(lowerIndex)); + } + + return finiteDifference / cellSize[T_direction]; + } + }; + } // namespace detail + + + /**@{*/ + /** Functors for forward and backward derivative along the given direction used in ArbitraryOrderFDTD + * solver + * + * Compute an approximation of the derivative of a field f by a finite difference of + * order 2 * T_neighbors, where T_neighbors is the number of neighbors + * used to calculate the finite difference. + * + * This finite difference approximations for the forward and backward derivative are computed on a + * staggered grid. That is, the forward derivative will be known at a position i+1/2, if the field f is + * known at 2 * T_neighbors grid nodes i - T_neighbors + 1, i - T_neighbors + 2, ..., i + T_neighbors. + * The backward derivative will be known at a position i-1/2, if the field f is known + * at 2 * T_neighbors grid nodes i - T_neighbors, i - T_neighbors + 1, ..., i + T_neighbors - 1. + * + * The finite difference calculation can be expressed as a sum of finite differences where the + * distance of field components used in individual finite differences computations increases, e.g. + * D_x f(i+1/2) = sum_{l=0}^{T_neighbors-1} g_l^{2T_neighbors} * ( f(i+1+l) - f(i-l) ) / dx, + * for the forward derivative and where D_x is the derivative operator along x, dx the grid spacing + * along x, and g_l^{2T_neighbors} weightings for the finite differences of different distance l from + * the point i of computation. + * + * @tparam T_neighbors Number of neighbors used to calculate + * the derivative from finite differences. + * Order of derivative approximation is + * 2 * T_neighbors + * + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + using ForwardDerivativeFunctor + = detail::GeneralAofdtdDerivative; + + + template + using BackwardDerivativeFunctor + = detail::GeneralAofdtdDerivative; + /**@}*/ + + } // namespace aoFDTD + } // namespace maxwellSolver + + namespace differentiation + { + namespace traits + { + /**@{*/ + /** DerivativeFunctor type trait specialization for Forward and Backward derivative in + * ArbitraryOrderFDTD solver + * + * @tparam T_neighbors Number of neighbors used to calculate + * the derivative from finite differences. + * Order of derivative approximation is + * 2 * T_neighbors + * + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + struct DerivativeFunctor, T_direction> + : pmacc::meta::accessors::Identity< + maxwellSolver::aoFDTD::ForwardDerivativeFunctor> + { + }; + + + template + struct DerivativeFunctor, T_direction> + : pmacc::meta::accessors::Identity< + maxwellSolver::aoFDTD::BackwardDerivativeFunctor> + { + }; + /**@}*/ + + } // namespace traits + } // namespace differentiation + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Weights.hpp b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Weights.hpp new file mode 100644 index 0000000000..281d592a48 --- /dev/null +++ b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/Weights.hpp @@ -0,0 +1,83 @@ +/* Copyright 2020-2021 Klaus Steiniger, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include +#include +#include "picongpu/plugins/radiation/utilities.hpp" + + +namespace picongpu +{ + namespace fields + { + namespace maxwellSolver + { + namespace aoFDTD + { + /** Compute weights of finite differences in + * + * @tparam T_neighbors Number of neighbors used to calculate + * the derivative from finite differences. + * Order of derivative approximation is + * 2 * T_neighbors + */ + template + struct AOFDTDWeights + { + HDINLINE constexpr AOFDTDWeights() + { + namespace powSpace = ::picongpu::plugins::radiation::util; + // Set initial value + weights[0] = 4.0_X * T_neighbors + * powSpace::pow( + (factorial(2 * T_neighbors) + / float_X( + powSpace::pow(2.0_X, 2 * T_neighbors) + * powSpace::pow(factorial(T_neighbors), 2))), + 2); + + // Compute all other values + for(uint32_t l = 1u; l < T_neighbors; ++l) + { + weights[l] = -1.0_X * powSpace::pow(float_X(l) - 0.5_X, 2) * (T_neighbors - l) + / float_X(T_neighbors + l) / float_X(powSpace::pow(float_X(l) + 0.5_X, 2)) + * weights[l - 1]; + } + } + + HDINLINE constexpr float_X operator[](uint32_t const l) const + { + PMACC_ASSERT_MSG(l < T_neighbors, "NUMBER_OF_COEFFICIENTS_IS_LIMITED_BY_NUMBER_OF_NEIGHBORS"); + return weights[l]; + } + + private: + HDINLINE constexpr uint32_t factorial(uint32_t const n) const + { + return n <= 1u ? 1u : (n * factorial(n - 1u)); + } + + float_X weights[T_neighbors]; + }; + } // namespace aoFDTD + } // namespace maxwellSolver + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTDPML/ArbitraryOrderFDTDPML.def b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTDPML/ArbitraryOrderFDTDPML.def new file mode 100644 index 0000000000..bdcfd14944 --- /dev/null +++ b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTDPML/ArbitraryOrderFDTDPML.def @@ -0,0 +1,57 @@ +/* Copyright 2020-2021 Klaus Steiniger + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" +#include "picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/ArbitraryOrderFDTD.def" +#include "picongpu/fields/MaxwellSolver/YeePML/YeePML.def" + +#include + + +namespace picongpu +{ + namespace fields + { + namespace maxwellSolver + { + /** + * Finite difference field solver of chosen order with + * Perfectly Matched Layer Absorbing Boundary Conditions (PML). + * + * References: M Ghrist + * High-Order Finite Difference Methods for Wave Equations + * PhD thesis (2000) + * Department of Applied Mathematics, University of Colarado + * + * H Vincenti et al + * doi:10.1016/j.cpc.2015.11.009 + * + * @tparam T_neighbors Number of neighbors used to calculate the derivative from finite differences. + * Same number of neighbors is used along all directions. + * Order of derivative approximation is 2 * T_neighbors. + */ + template + using ArbitraryOrderFDTDPML = ::picongpu::fields::maxwellSolver:: + YeePML, aoFDTD::CurlB>; + + } // namespace maxwellSolver + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTDPML/ArbitraryOrderFDTDPML.hpp b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTDPML/ArbitraryOrderFDTDPML.hpp new file mode 100644 index 0000000000..0416c07230 --- /dev/null +++ b/include/picongpu/fields/MaxwellSolver/ArbitraryOrderFDTDPML/ArbitraryOrderFDTDPML.hpp @@ -0,0 +1,47 @@ +/* Copyright 2020-2021 Klaus Steiniger + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/ArbitraryOrderFDTD.hpp" +#include "picongpu/fields/MaxwellSolver/ArbitraryOrderFDTDPML/ArbitraryOrderFDTDPML.def" + +#include + + +namespace pmacc +{ + namespace traits + { + template + struct StringProperties< + ::picongpu::fields::maxwellSolver::ArbitraryOrderFDTDPML> + { + static StringProperty get() + { + pmacc::traits::StringProperty propList("name", "other"); + propList["param"] = std::string("Arbitrary order FDTD with PML, order ") + std::to_string(T_neighbors); + + return propList; + } + }; + + } // namespace traits +} // namespace pmacc diff --git a/include/picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.def b/include/picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.def deleted file mode 100644 index 89065c17de..0000000000 --- a/include/picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.def +++ /dev/null @@ -1,70 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/simulation_defines.hpp" -#include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" - -namespace picongpu -{ -namespace fields -{ -namespace maxwellSolver -{ - - template< typename T_CurrentInterpolation = currentInterpolation::NoneDS > - class DirSplitting; - -} // namespace maxwellSolver -} // namespace fields - -namespace traits -{ - - template< typename T_CurrentInterpolation > - struct GetMargin< - picongpu::fields::maxwellSolver::DirSplitting< T_CurrentInterpolation >, - picongpu::FIELD_B - > - { - using LowerMargin = pmacc::math::CT::Int < - 1, - 1, - 1 - >; - using UpperMargin = LowerMargin; - }; - - template< typename T_CurrentInterpolation > - struct GetMargin< - picongpu::fields::maxwellSolver::DirSplitting< T_CurrentInterpolation >, - picongpu::FIELD_E - > - { - using LowerMargin = pmacc::math::CT::Int < - 1, - 1, - 1 - >; - using UpperMargin = LowerMargin; - }; - -} //namespace traits -} // picongpu diff --git a/include/picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.hpp b/include/picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.hpp deleted file mode 100644 index d990871d4d..0000000000 --- a/include/picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.hpp +++ /dev/null @@ -1,197 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.def" -#include "picongpu/simulation_defines.hpp" -#include "picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.kernel" -#include "picongpu/fields/FieldB.hpp" -#include "picongpu/fields/FieldE.hpp" -#include "picongpu/fields/cellType/Centered.hpp" -#include "picongpu/fields/LaserPhysics.hpp" - -#include -#include -#include -#include -#include -#include -#include - - -namespace picongpu -{ -namespace fields -{ -namespace maxwellSolver -{ -namespace dirSplitting -{ - /** Check Directional Splitting grid and time conditions - * - * This is a workaround that the condition check is only - * triggered if the current used solver is `DirSplitting` - */ - template - struct ConditionCheck - { - }; - - template - struct ConditionCheck, T_Dummy> - { - /* Directional Splitting conditions: - * - * using SI units to avoid round off errors - * - * The compiler is allowed to evaluate an expression those not depends on a template parameter - * even if the class is never instantiated. In that case static assert is always - * evaluated (e.g. with clang), this results in an error if the condition is false. - * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html - * - * A workaround is to add a template dependency to the expression. - * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. - */ - PMACC_CASSERT_MSG(DirectionSplitting_Set_dX_equal_dt_times_c____check_your_grid_param_file, - (SI::SPEED_OF_LIGHT_SI * SI::DELTA_T_SI) == SI::CELL_WIDTH_SI && - (sizeof(T_Dummy) != 0)); - PMACC_CASSERT_MSG(DirectionSplitting_use_cubic_cells____check_your_grid_param_file, - SI::CELL_HEIGHT_SI == SI::CELL_WIDTH_SI && - (sizeof(T_Dummy) != 0)); -#if (SIMDIM == DIM3) - PMACC_CASSERT_MSG(DirectionSplitting_use_cubic_cells____check_your_grid_param_file, - SI::CELL_DEPTH_SI == SI::CELL_WIDTH_SI && - (sizeof(T_Dummy) != 0)); -#endif - }; -} // namespace dirSplitting - - template< typename T_CurrentInterpolation > - class DirSplitting: private dirSplitting::ConditionCheck< DirSplitting< T_CurrentInterpolation > > - { - private: - template - void propagate(CursorE cursorE, CursorB cursorB, GridSize gridSize) const - { - using namespace cursor::tools; - using namespace pmacc::math; - - auto gridSizeTwisted = twistComponents(gridSize); - - /* twist components of the supercell */ - using BlockDim = typename CT::TwistComponents::type; - - algorithm::kernel::ForeachBlock foreach; - foreach(zone::SphericZone<3>(pmacc::math::Size_t<3>(BlockDim::x::value, gridSizeTwisted.y(), gridSizeTwisted.z())), - cursor::make_NestedCursor(twistVectorFieldAxes(cursorE)), - cursor::make_NestedCursor(twistVectorFieldAxes(cursorB)), - DirSplittingKernel((int)gridSizeTwisted.x())); - } - public: - - using CellType = cellType::Centered; - using CurrentInterpolation = T_CurrentInterpolation; - - DirSplitting(MappingDesc) {} - - void update_beforeCurrent(uint32_t currentStep) const - { - using GuardDim = SuperCellSize; - - DataConnector &dc = Environment<>::get().DataConnector(); - - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); - auto fieldB = dc.get< FieldB >( FieldB::getName(), true ); - - auto fieldE_coreBorder = - fieldE->getGridBuffer().getDeviceBuffer(). - cartBuffer().view(GuardDim().toRT(), - -GuardDim().toRT()); - auto fieldB_coreBorder = - fieldB->getGridBuffer().getDeviceBuffer(). - cartBuffer().view(GuardDim().toRT(), - -GuardDim().toRT()); - - using namespace cursor::tools; - using namespace pmacc::math; - - pmacc::math::Size_t<3> gridSize = fieldE_coreBorder.size(); - - - using Orientation_X = pmacc::math::CT::Int<0,1,2>; - propagate( - fieldE_coreBorder.origin(), - fieldB_coreBorder.origin(), - gridSize); - - __setTransactionEvent(fieldE->asyncCommunication(__getTransactionEvent())); - __setTransactionEvent(fieldB->asyncCommunication(__getTransactionEvent())); - - using Orientation_Y = pmacc::math::CT::Int<1,2,0>; - propagate( - fieldE_coreBorder.origin(), - fieldB_coreBorder.origin(), - gridSize); - - __setTransactionEvent(fieldE->asyncCommunication(__getTransactionEvent())); - __setTransactionEvent(fieldB->asyncCommunication(__getTransactionEvent())); - - using Orientation_Z = pmacc::math::CT::Int<2,0,1>; - propagate( - fieldE_coreBorder.origin(), - fieldB_coreBorder.origin(), - gridSize); - - if (laserProfiles::Selected::INIT_TIME > float_X(0.0)) - LaserPhysics{}(currentStep); - - __setTransactionEvent(fieldE->asyncCommunication(__getTransactionEvent())); - __setTransactionEvent(fieldB->asyncCommunication(__getTransactionEvent())); - - dc.releaseData( FieldE::getName() ); - dc.releaseData( FieldB::getName() ); - } - - void update_afterCurrent(uint32_t) const - { - DataConnector &dc = Environment<>::get().DataConnector(); - - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); - auto fieldB = dc.get< FieldB >( FieldB::getName(), true ); - - EventTask eRfieldE = fieldE->asyncCommunication(__getTransactionEvent()); - EventTask eRfieldB = fieldB->asyncCommunication(__getTransactionEvent()); - __setTransactionEvent(eRfieldE); - __setTransactionEvent(eRfieldB); - - dc.releaseData( FieldE::getName() ); - dc.releaseData( FieldB::getName() ); - } - - static pmacc::traits::StringProperty getStringProperties() - { - pmacc::traits::StringProperty propList( "name", "DS" ); - return propList; - } - }; - -} // namespace maxwellSolver -} // namespace fields -} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.kernel b/include/picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.kernel deleted file mode 100644 index 86d73ad14d..0000000000 --- a/include/picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.kernel +++ /dev/null @@ -1,131 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - - -namespace picongpu -{ -namespace fields -{ -namespace maxwellSolver -{ - -template -struct DirSplittingKernel -{ - using result_type = void; - - PMACC_ALIGN(m_totalLength,int); - DirSplittingKernel(int totalLength) : m_totalLength(totalLength) {} - - template - DINLINE void propagate(CursorE cursorE, CursorB cursorB) const - { - float_X a_plus = (*cursorB(-1, 0, 0)).z() + (*cursorE(-1, 0, 0)).y(); - float_X a_minus = (*cursorB(1, 0, 0)).z() - (*cursorE(1, 0, 0)).y(); - float_X a_prime_plus = (*cursorB(-1, 0, 0)).y() - (*cursorE(-1, 0, 0)).z(); - float_X a_prime_minus = (*cursorB(1, 0, 0)).y() + (*cursorE(1, 0, 0)).z(); - - __syncthreads(); - - (*cursorB).z() = float_X(0.5) * (a_plus + a_minus); - (*cursorE).y() = float_X(0.5) * (a_plus - a_minus); - (*cursorB).y() = float_X(0.5) * (a_prime_plus + a_prime_minus); - (*cursorE).z() = float_X(0.5) * (a_prime_minus - a_prime_plus); - - __syncthreads(); - } - - template< - typename CursorE, - typename CursorB, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - CursorE globalE, - CursorB globalB - ) const - { - //\todo: optimize cache size - typedef typename pmacc::math::CT::add< - typename BlockDim::vector_type, - typename pmacc::math::CT::Int < 2, 0, 0 > ::vector_type>::type CacheSize; - - typedef container::CT::SharedBuffer CacheE; - typedef container::CT::SharedBuffer CacheB; - CacheE cacheE( acc ); - CacheB cacheB( acc ); - - float3_X fieldE_old; - float3_X fieldB_old; - int threadPos_x = threadIdx.x; - - //!@todo remove this explicit index calculation, this is a workaround during the lockstep refactoring - int linearThreadIdx = threadIdx.z * BlockDim::x::value * BlockDim::y::value + - threadIdx.y * BlockDim::x::value + - threadIdx.x; - algorithm::cudaBlock::Foreach foreach(linearThreadIdx); - - for (int x_offset = 0; x_offset < this->m_totalLength; x_offset += BlockDim::x::value) - { - foreach(acc, typename CacheE::Zone(), cacheE.origin(), globalE(-1 + x_offset, 0, 0), pmacc::nvidia::functors::Assign{}); - foreach(acc, typename CacheB::Zone(), cacheB.origin(), globalB(-1 + x_offset, 0, 0), pmacc::nvidia::functors::Assign{}); - __syncthreads(); - - auto cursorE = cacheE.origin()(1, 0, 0)(threadPos_x, threadIdx.y, threadIdx.z); - auto cursorB = cacheB.origin()(1, 0, 0)(threadPos_x, threadIdx.y, threadIdx.z); - - if(threadPos_x == BlockDim::x::value - 1) - { - fieldE_old = *cursorE; - fieldB_old = *cursorB; - } - if(threadPos_x == 0 && x_offset > 0) - { - *cursorE(-1,0,0) = fieldE_old; - *cursorB(-1,0,0) = fieldB_old; - } - - propagate(cursorE, cursorB); - - typedef zone::CT::SphericZone BlockZone; - foreach(acc, BlockZone(), globalE(x_offset, 0, 0), cacheE.origin()(1, 0, 0), pmacc::nvidia::functors::Assign{}); - foreach(acc, BlockZone(), globalB(x_offset, 0, 0), cacheB.origin()(1, 0, 0), pmacc::nvidia::functors::Assign{}); - - __syncthreads(); - - threadPos_x = BlockDim::x::value - 1 - threadPos_x; - } - } - -}; - -} // namespace maxwellSolver -} // namespace fields -} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/Lehe/Curl.def b/include/picongpu/fields/MaxwellSolver/Lehe/Curl.def deleted file mode 100644 index d051a838fc..0000000000 --- a/include/picongpu/fields/MaxwellSolver/Lehe/Curl.def +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Remi Lehe - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include -#include - - -namespace picongpu -{ -namespace fields -{ -namespace maxwellSolver -{ -namespace lehe -{ - - class CherenkovFreeDirection_X{ }; - class CherenkovFreeDirection_Y{ }; - - template< class Direction > - struct CurlE; - -} // namespace lehe -} // namespace maxwellSolver -} // namespace fields -} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/Lehe/Curl.hpp b/include/picongpu/fields/MaxwellSolver/Lehe/Curl.hpp deleted file mode 100644 index 59e4d7b042..0000000000 --- a/include/picongpu/fields/MaxwellSolver/Lehe/Curl.hpp +++ /dev/null @@ -1,339 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Remi Lehe - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/fields/MaxwellSolver/Lehe/Curl.def" - -#include -#include -#include - - -namespace picongpu -{ -namespace fields -{ -namespace maxwellSolver -{ -namespace lehe -{ - template< > - struct CurlE< CherenkovFreeDirection_X > - { - typedef pmacc::math::CT::Int< 1, 1, 1 > LowerMargin; - typedef pmacc::math::CT::Int< 2, 2, 2 > UpperMargin; - - float_X mySin; - - HINLINE CurlE( ) - { - mySin = float_X( - math::sin( - pmacc::algorithms::math::Pi< float_64 >::halfValue * - float_64( SPEED_OF_LIGHT ) * - float_64( DELTA_T ) / float_64( CELL_WIDTH ) - ) - ); - } - - template - HDINLINE typename Memory::ValueType operator( )(const Memory & mem ) const - { - /* Distinguished direction where the numerical Cherenkov Radiation - * of moving particles is suppressed. - */ - constexpr float_X isDir_x = float_X( 1.0 ); - constexpr float_X isDir_y = float_X( 0.0 ); - constexpr float_X isDir_z = float_X( 0.0 ); - - constexpr float_X isNotDir_x = float_X( 1.0 ) - isDir_x; - constexpr float_X isNotDir_y = float_X( 1.0 ) - isDir_y; - constexpr float_X isNotDir_z = float_X( 1.0 ) - isDir_z; - - constexpr float_X dx2 = CELL_WIDTH * CELL_WIDTH; - constexpr float_X dy2 = CELL_HEIGHT * CELL_HEIGHT; - constexpr float_X dz2 = CELL_DEPTH * CELL_DEPTH; - constexpr float_X dt2 = DELTA_T * DELTA_T; - constexpr float_X c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; - - constexpr float_X reci_dx = float_X( 1.0 ) / CELL_WIDTH; - constexpr float_X reci_dy = float_X( 1.0 ) / CELL_HEIGHT; - constexpr float_X reci_dz = float_X( 1.0 ) / CELL_DEPTH; - - constexpr float_X beta_xy = float_X( 0.125 ) * dx2 / dy2 * isDir_x - + float_X( 0.125 ) * isNotDir_x * isDir_y; - constexpr float_X beta_xz = float_X( 0.125 ) * dx2 / dz2 * isDir_x - + float_X( 0.125 ) * isNotDir_x * isDir_z; - - constexpr float_X beta_yx = float_X( 0.125 ) * dy2 / dx2 * isDir_y - + float_X( 0.125 ) * isNotDir_y * isDir_x; - constexpr float_X beta_yz = float_X( 0.125 ) * dy2 / dz2 * isDir_y - + float_X( 0.125 ) * isNotDir_y * isDir_z; - - constexpr float_X beta_zx = float_X( 0.125 ) * dz2 / dx2 * isDir_z - + float_X( 0.125 ) * isNotDir_z * isDir_x; - constexpr float_X beta_zy = float_X( 0.125 ) * dz2 / dy2 * isDir_z - + float_X( 0.125 ) * isNotDir_z * isDir_y; - - constexpr float_X d_dir = CELL_WIDTH * isDir_x - + CELL_HEIGHT * isDir_y - + CELL_DEPTH * isDir_z; - constexpr float_X d_dir2 = d_dir * d_dir; - - // delta_x0 == delta_x - // delta_dir0 == delta_dir - const float_X delta_dir0 = float_X( 0.25 ) * - ( float_X( 1.0 ) - d_dir2 / ( c2 * dt2 ) * mySin * mySin ); - - const float_X alpha_x = float_X( 1.0 ) - - float_X( 2.0 ) * beta_xy * isNotDir_x * isDir_y - - float_X( 2.0 ) * beta_xz * isNotDir_x * isDir_z - - float_X( 2.0 ) * beta_xy * isDir_x - - float_X( 2.0 ) * beta_xz * isDir_x - - float_X( 3.0 ) * delta_dir0 * isDir_x; - - const float_X alpha_y = float_X( 1.0 ) - - float_X( 2.0 ) * beta_yx * isNotDir_y * isDir_x - - float_X( 2.0 ) * beta_yz * isNotDir_y * isDir_z - - float_X( 2.0 ) * beta_yx * isDir_y - - float_X( 2.0 ) * beta_yz * isDir_y - - float_X( 3.0 ) * delta_dir0 * isDir_y; - - const float_X alpha_z = float_X( 1.0 ) - - float_X( 2.0 ) * beta_zx * isNotDir_z * isDir_x - - float_X( 2.0 ) * beta_zy * isNotDir_z * isDir_y - - float_X( 2.0 ) * beta_zx * isDir_z - - float_X( 2.0 ) * beta_zy * isDir_z - - float_X( 3.0 ) * delta_dir0 * isDir_z; - - - const float_X curl_x - = ( - alpha_y * ( mem[0][0][0].z( ) - mem[0][-1][0].z( ) ) - + beta_yx * ( mem[1][0][0].z( ) - mem[1][-1][0].z( ) ) - + beta_yx * ( mem[-1][0][0].z( ) - mem[-1][-1][0].z( ) ) - ) * reci_dy - - ( - alpha_z * ( mem[0][0][0].y( ) - mem[0][0][-1].y( ) ) - + beta_zx * ( mem[1][0][0].y( ) - mem[1][0][-1].y( ) ) - + beta_zx * ( mem[-1][0][0].y( ) - mem[-1][0][-1].y( ) ) - ) * reci_dz; - - - const float_X curl_y - = ( - alpha_z * ( mem[0][0][0].x( ) - mem[0][0][-1].x( ) ) - + beta_zx * ( mem[1][0][0].x( ) - mem[1][0][-1].x( ) ) - + beta_zx * ( mem[-1][0][0].x( ) - mem[-1][0][-1].x( ) ) - ) * reci_dz - - ( - alpha_x * ( mem[0][0][0].z( ) - mem[-1][0][0].z( ) ) - + delta_dir0 * ( mem[1][0][0].z( ) - mem[-2][0][0].z( ) ) - + beta_xy * ( mem[0][1][0].z( ) - mem[-1][1][0].z( ) ) - + beta_xy * ( mem[0][-1][0].z( ) - mem[-1][-1][0].z( ) ) - + beta_xz * ( mem[0][0][1].z( ) - mem[-1][0][1].z( ) ) - + beta_xz * ( mem[0][0][-1].z( ) - mem[-1][0][-1].z( ) ) - ) * reci_dx; - - - const float_X curl_z - = ( - alpha_x * ( mem[0][0][0].y( ) - mem[-1][0][0].y( ) ) - + delta_dir0 * ( mem[1][0][0].y( ) - mem[-2][0][0].y( ) ) - + beta_xy * ( mem[0][1][0].y( ) - mem[-1][1][0].y( ) ) - + beta_xy * ( mem[0][-1][0].y( ) - mem[-1][-1][0].y( ) ) - + beta_xz * ( mem[0][0][1].y( ) - mem[-1][0][1].y( ) ) - + beta_xz * ( mem[0][0][-1].y( ) - mem[-1][0][-1].y( ) ) - ) * reci_dx - - ( - alpha_y * ( mem[0][0][0].x( ) - mem[0][-1][0].x( ) ) - + beta_yx * ( mem[1][0][0].x( ) - mem[1][-1][0].x( ) ) - + beta_yx * ( mem[-1][0][0].x( ) - mem[-1][-1][0].x( ) ) - ) * reci_dy; - - return float3_X( curl_x, curl_y, curl_z ); - - //return float3_X(diff(mem, 1).z() - diff(mem, 2).y(), - // diff(mem, 2).x() - diff(mem, 0).z(), - // diff(mem, 0).y() - diff(mem, 1).x()); - } - }; - - - template< > - struct CurlE< CherenkovFreeDirection_Y > - { - typedef pmacc::math::CT::Int< 1, 1, 1 > LowerMargin; - typedef pmacc::math::CT::Int< 2, 2, 2 > UpperMargin; - - float_X mySin; - - HINLINE CurlE( ) - { - mySin = float_X( - math::sin( - pmacc::algorithms::math::Pi< float_64 >::halfValue * - float_64( SPEED_OF_LIGHT ) * - float_64( DELTA_T ) / float_64( CELL_HEIGHT ) - ) - ); - } - - template - HDINLINE typename Memory::ValueType operator( )(const Memory & mem ) const - { - /* Distinguished direction where the numerical Cherenkov Radiation - * of moving particles is suppressed. - */ - constexpr float_X isDir_x = float_X( 0.0 ); - constexpr float_X isDir_y = float_X( 1.0 ); - constexpr float_X isDir_z = float_X( 0.0 ); - - constexpr float_X isNotDir_x = float_X( 1.0 ) - isDir_x; - constexpr float_X isNotDir_y = float_X( 1.0 ) - isDir_y; - constexpr float_X isNotDir_z = float_X( 1.0 ) - isDir_z; - - constexpr float_X dx2 = CELL_WIDTH * CELL_WIDTH; - constexpr float_X dy2 = CELL_HEIGHT * CELL_HEIGHT; - constexpr float_X dz2 = CELL_DEPTH * CELL_DEPTH; - constexpr float_X dt2 = DELTA_T * DELTA_T; - constexpr float_X c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; - - constexpr float_X reci_dx = float_X( 1.0 ) / CELL_WIDTH; - constexpr float_X reci_dy = float_X( 1.0 ) / CELL_HEIGHT; - constexpr float_X reci_dz = float_X( 1.0 ) / CELL_DEPTH; - - /** Naming of the coefficients - * 1st letter: direction of differentiation - * 2nd letter: direction of averaging - */ - constexpr float_X beta_xy = float_X( 0.125 ) * dx2 / dy2 * isDir_x - + float_X( 0.125 ) * isNotDir_x * isDir_y; - constexpr float_X beta_xz = float_X( 0.125 ) * dx2 / dz2 * isDir_x - + float_X( 0.125 ) * isNotDir_x * isDir_z; - - constexpr float_X beta_yx = float_X( 0.125 ) * dy2 / dx2 * isDir_y - + float_X( 0.125 ) * isNotDir_y * isDir_x; - constexpr float_X beta_yz = float_X( 0.125 ) * dy2 / dz2 * isDir_y - + float_X( 0.125 ) * isNotDir_y * isDir_z; - - constexpr float_X beta_zx = float_X( 0.125 ) * dz2 / dx2 * isDir_z - + float_X( 0.125 ) * isNotDir_z * isDir_x; - constexpr float_X beta_zy = float_X( 0.125 ) * dz2 / dy2 * isDir_z - + float_X( 0.125 ) * isNotDir_z * isDir_y; - - constexpr float_X d_dir = CELL_WIDTH * isDir_x - + CELL_HEIGHT * isDir_y - + CELL_DEPTH * isDir_z; - constexpr float_X d_dir2 = d_dir * d_dir; - - // delta_y0 == delta_y - // delta_dir0 == delta_dir - const float_X delta_dir0 = float_X( 0.25 ) * - ( float_X( 1.0 ) - d_dir2 / ( c2 * dt2 ) * mySin * mySin ); - - const float_X alpha_x = float_X( 1.0 ) - - float_X( 2.0 ) * beta_xy * isNotDir_x * isDir_y - - float_X( 2.0 ) * beta_xz * isNotDir_x * isDir_z - - float_X( 2.0 ) * beta_xy * isDir_x - - float_X( 2.0 ) * beta_xz * isDir_x - - float_X( 3.0 ) * delta_dir0 * isDir_x; - - const float_X alpha_y = float_X( 1.0 ) - - float_X( 2.0 ) * beta_yx * isNotDir_y * isDir_x - - float_X( 2.0 ) * beta_yz * isNotDir_y * isDir_z - - float_X( 2.0 ) * beta_yx * isDir_y - - float_X( 2.0 ) * beta_yz * isDir_y - - float_X( 3.0 ) * delta_dir0 * isDir_y; - - const float_X alpha_z = float_X( 1.0 ) - - float_X( 2.0 ) * beta_zx * isNotDir_z * isDir_x - - float_X( 2.0 ) * beta_zy * isNotDir_z * isDir_y - - float_X( 2.0 ) * beta_zx * isDir_z - - float_X( 2.0 ) * beta_zy * isDir_z - - float_X( 3.0 ) * delta_dir0 * isDir_z; - - // Typedef an accessor to access mem[z][y][x] - // in (x,y,z) order :) - typedef DataSpace Space; - - const float_X curl_x - = ( - alpha_y * ( mem(Space(0,0,0)*(-1)).z( ) - mem(Space(0,-1,0)*(-1)).z( ) ) - + beta_yz * ( mem(Space(0,0,1)*(-1)).z( ) - mem(Space(0,-1,1)*(-1)).z( ) ) - + beta_yz * ( mem(Space(0,0,-1)*(-1)).z( ) - mem(Space(0,-1,-1)*(-1)).z( ) ) - + beta_yx * ( mem(Space(1,0,0)*(-1)).z( ) - mem(Space(1,-1,0)*(-1)).z( ) ) - + beta_yx * ( mem(Space(-1,0,0)*(-1)).z( ) - mem(Space(-1,-1,0)*(-1)).z( ) ) - + delta_dir0 * ( mem(Space(0,1,0)*(-1)).z( ) - mem(Space(0,-2,0)*(-1)).z( ) ) - ) * reci_dy - - ( - alpha_z * ( mem(Space(0,0,0)*(-1)).y( ) - mem(Space(0,0,-1)*(-1)).y( ) ) - + beta_zx * ( mem(Space(1,0,0)*(-1)).y( ) - mem(Space(1,0,-1)*(-1)).y( ) ) - + beta_zx * ( mem(Space(-1,0,0)*(-1)).y( ) - mem(Space(-1,0,-1)*(-1)).y( ) ) - + beta_zy * ( mem(Space(0,1,0)*(-1)).y( ) - mem(Space(0,1,-1)*(-1)).y( ) ) - + beta_zy * ( mem(Space(0,-1,0)*(-1)).y( ) - mem(Space(0,-1,-1)*(-1)).y( ) ) - ) * reci_dz; - - - const float_X curl_y - = ( - alpha_z * ( mem(Space(0,0,0)*(-1)).x( ) - mem(Space(0,0,-1)*(-1)).x( ) ) - + beta_zx * ( mem(Space(1,0,0)*(-1)).x( ) - mem(Space(1,0,-1)*(-1)).x( ) ) - + beta_zx * ( mem(Space(-1,0,0)*(-1)).x( ) - mem(Space(-1,0,-1)*(-1)).x( ) ) - + beta_zy * ( mem(Space(0,1,0)*(-1)).x( ) - mem(Space(0,1,-1)*(-1)).x( ) ) - + beta_zy * ( mem(Space(0,-1,0)*(-1)).x( ) - mem(Space(0,-1,-1)*(-1)).x( ) ) - ) * reci_dz - - ( - alpha_x * ( mem(Space(0,0,0)*(-1)).z( ) - mem(Space(-1,0,0)*(-1)).z( ) ) - + beta_xy * ( mem(Space(0,1,0)*(-1)).z( ) - mem(Space(-1,1,0)*(-1)).z( ) ) - + beta_xy * ( mem(Space(0,-1,0)*(-1)).z( ) - mem(Space(-1,-1,0)*(-1)).z( ) ) - + beta_xz * ( mem(Space(0,0,1)*(-1)).z( ) - mem(Space(-1,0,1)*(-1)).z( ) ) - + beta_xz * ( mem(Space(0,0,-1)*(-1)).z( ) - mem(Space(-1,0,-1)*(-1)).z( ) ) - ) * reci_dx; - - - const float_X curl_z - = ( - alpha_x * ( mem(Space(0,0,0)*(-1)).y( ) - mem(Space(-1,0,0)*(-1)).y( ) ) - + beta_xy * ( mem(Space(0,1,0)*(-1)).y( ) - mem(Space(-1,1,0)*(-1)).y( ) ) - + beta_xy * ( mem(Space(0,-1,0)*(-1)).y( ) - mem(Space(-1,-1,0)*(-1)).y( ) ) - + beta_xz * ( mem(Space(0,0,1)*(-1)).y( ) - mem(Space(-1,0,1)*(-1)).y( ) ) - + beta_xz * ( mem(Space(0,0,-1)*(-1)).y( ) - mem(Space(-1,0,-1)*(-1)).y( ) ) - ) * reci_dx - - ( - alpha_y * ( mem(Space(0,0,0)*(-1)).x( ) - mem(Space(0,-1,0)*(-1)).x( ) ) - + beta_yz * ( mem(Space(0,0,1)*(-1)).x( ) - mem(Space(0,-1,1)*(-1)).x( ) ) - + beta_yz * ( mem(Space(0,0,-1)*(-1)).x( ) - mem(Space(0,-1,-1)*(-1)).x( ) ) - + beta_yx * ( mem(Space(1,0,0)*(-1)).x( ) - mem(Space(1,-1,0)*(-1)).x( ) ) - + beta_yx * ( mem(Space(-1,0,0)*(-1)).x( ) - mem(Space(-1,-1,0)*(-1)).x( ) ) - + delta_dir0 * ( mem(Space(0,1,0)*(-1)).x( ) - mem(Space(0,-2,0)*(-1)).x( ) ) - ) * reci_dy; - - return float3_X( -curl_x, -curl_y, -curl_z ); - - //return float3_X(diff(mem, 1).z() - diff(mem, 2).y(), - // diff(mem, 2).x() - diff(mem, 0).z(), - // diff(mem, 0).y() - diff(mem, 1).x()); - } - }; -} // namespace lehe -} // namespace maxwellSolver -} // namespace fields -} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/Lehe/Derivative.def b/include/picongpu/fields/MaxwellSolver/Lehe/Derivative.def new file mode 100644 index 0000000000..095242d043 --- /dev/null +++ b/include/picongpu/fields/MaxwellSolver/Lehe/Derivative.def @@ -0,0 +1,49 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Remi Lehe, + * Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include + + +namespace picongpu +{ + namespace fields + { + namespace maxwellSolver + { + namespace lehe + { + /** Tag for derivative used in the Lehe solver + * + * Implements eq. (6) in R. Lehe et al + * Phys. Rev. ST Accel. Beams 16, 021301 (2013) + * + * @tparam T_cherenkovFreeDirection direction to remove numerical Cherenkov + * radiation in, 0 = x, 1 = y, 2 = z + * (unrelated to differentiating direction) + */ + template + struct Derivative; + + } // namespace lehe + } // namespace maxwellSolver + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/Lehe/Derivative.hpp b/include/picongpu/fields/MaxwellSolver/Lehe/Derivative.hpp new file mode 100644 index 0000000000..c4cab1f0dc --- /dev/null +++ b/include/picongpu/fields/MaxwellSolver/Lehe/Derivative.hpp @@ -0,0 +1,249 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Remi Lehe, + * Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/fields/differentiation/Derivative.hpp" +#include "picongpu/fields/differentiation/ForwardDerivative.hpp" +#include "picongpu/fields/differentiation/Traits.hpp" +#include "picongpu/fields/MaxwellSolver/Lehe/Derivative.def" +#include "picongpu/traits/GetMargin.hpp" + +#include +#include +#include +#include +#include + + +namespace picongpu +{ + namespace fields + { + namespace maxwellSolver + { + namespace lehe + { + /** Functor for derivative used in the Lehe solver + * + * Implements eq. (6) in R. Lehe et al + * Phys. Rev. ST Accel. Beams 16, 021301 (2013) + * This derivative can only be applied for the E field. + * + * @tparam T_cherenkovFreeDirection direction to remove numerical Cherenkov + * radiation in, 0 = x, 1 = y, 2 = z + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + struct DerivativeFunctor; + + /** Functor for derivative along the Cherenkov free direction + * + * Implements eq. (6) in R. Lehe et al + * Phys. Rev. ST Accel. Beams 16, 021301 (2013) + * + * @tparam T_direction Cherenkov free direction and derivative direction, + * 0 = x, 1 = y, 2 = z + */ + template + struct DerivativeFunctor + { + private: + //! Internally used derivative functor + using InternalDerivativeFunctor + = differentiation::DerivativeFunctor; + + public: + /** Lower margin: we move by 1 along each direction and + * apply InternalDerivativeFunctor, add those up + */ + using LowerMargin = typename pmacc::math::CT::add< + typename pmacc::math::CT::make_Int::type, + typename GetLowerMargin::type>::type; + + /** Upper margin: we move by 1 along each direction and + * effectively apply InternalDerivativeFunctor (for T_direction not + * literally, but structurally), add those up + */ + using UpperMargin = typename pmacc::math::CT::add< + typename pmacc::math::CT::make_Int::type, + typename GetUpperMargin::type>::type; + + //! Create a functor + HDINLINE DerivativeFunctor() + { + // differentiate along dir0; dir1 and dir2 are the other two directions + constexpr uint32_t dir0 = T_direction; + constexpr uint32_t dir1 = (dir0 + 1) % 3; + constexpr uint32_t dir2 = (dir0 + 2) % 3; + + float_64 const stepRatio = cellSize[dir0] / (SPEED_OF_LIGHT * DELTA_T); + float_64 const coeff = stepRatio + * math::sin(pmacc::math::Pi::halfValue * float_64(SPEED_OF_LIGHT) + * float_64(DELTA_T) / float_64(cellSize[dir0])); + delta = static_cast(0.25 * (1.0 - coeff * coeff)); + // for 2D the betas corresponding to z are 0 + float_64 const stepRatio1 = dir1 < simDim ? cellSize[dir0] / cellSize[dir1] : 0.0; + float_64 const stepRatio2 = dir2 < simDim ? cellSize[dir0] / cellSize[dir2] : 0.0; + float_64 const betaDir1 = 0.125 * stepRatio1 * stepRatio1; + float_64 const betaDir2 = 0.125 * stepRatio2 * stepRatio2; + alpha = static_cast(1.0 - 2.0 * betaDir1 - 2.0 * betaDir2 - 3.0 * delta); + } + + /** Return derivative value at the given point + * + * @tparam T_DataBox data box type with field data + * @param data position in the data box to compute derivative at + */ + template + HDINLINE typename T_DataBox::ValueType operator()(T_DataBox const& data) const + { + // differentiate along dir0; dir1 and dir2 are the other two directions + constexpr uint32_t dir0 = T_direction; + constexpr uint32_t dir1 = (dir0 + 1) % 3; + constexpr uint32_t dir2 = (dir0 + 2) % 3; + + // cellSize is not constexpr currently, so make an own constexpr array + constexpr float_X step[3] = {CELL_WIDTH, CELL_HEIGHT, CELL_DEPTH}; + + /* beta_xy and beta_xz from eq. (11), generic for any T_direction; + * for 2D the betas corresponding to z are 0 + */ + constexpr float_X stepRatio1 = dir1 < simDim ? step[dir0] / step[dir1] : 0.0_X; + constexpr float_X stepRatio2 = dir2 < simDim ? step[dir0] / step[dir2] : 0.0_X; + constexpr float_X betaDir1 = 0.125_X * stepRatio1 * stepRatio1; + constexpr float_X betaDir2 = 0.125_X * stepRatio2 * stepRatio2; + + // finite-difference expression from eq. (6), generic for any T_direction + using Index = pmacc::DataSpace; + auto const secondUpperIndexDir0 = 2 * pmacc::math::basisVector(); + auto const lowerIndexDir0 = -pmacc::math::basisVector(); + auto const upperNeighborDir1 = pmacc::math::basisVector(); + auto const upperNeighborDir2 = pmacc::math::basisVector(); + InternalDerivativeFunctor forwardDerivative + = differentiation::makeDerivativeFunctor(); + return alpha * forwardDerivative(data) + + betaDir1 * forwardDerivative(data.shift(upperNeighborDir1)) + + betaDir1 * forwardDerivative(data.shift(-upperNeighborDir1)) + + betaDir2 * forwardDerivative(data.shift(upperNeighborDir2)) + + betaDir2 * forwardDerivative(data.shift(-upperNeighborDir2)) + + delta * (data(secondUpperIndexDir0) - data(lowerIndexDir0)) / step[T_direction]; + } + + private: + //! alpha_x from eq. (7), generic for any T_direction + float_X alpha; + + //! delta_x0 from eq. (10), generic for any T_direction + float_X delta; + }; + + /** Functor for derivative not along the Cherenkov free direction + * + * Implements eq. (6) in R. Lehe et al + * Phys. Rev. ST Accel. Beams 16, 021301 (2013) + * Implementation is separated as a few terms vanish in this case + * + * @tparam T_cherenkovFreeDirection direction to remove numerical Cherenkov + * radiation in, 0 = x, 1 = y, 2 = z + * @tparam T_direction direction to take derivative in, not equal to + * T_cherenkovFreeDirection, 0 = x, 1 = y, 2 = z + */ + template + struct DerivativeFunctor + { + PMACC_CASSERT_MSG( + _lehe_solver_cherenkov_free_direction_z_is_not_supported_for_2d, + T_cherenkovFreeDirection < simDim); + + PMACC_CASSERT_MSG( + _internal_error_wrong_lehe_derivative_functor_specialization, + T_cherenkovFreeDirection != T_direction); + + private: + //! Internally used derivative functor + using InternalDerivativeFunctor + = differentiation::DerivativeFunctor; + + public: + /** Lower margin: we move by 1 along T_cherenkovFreeDirection and + * apply InternalDerivativeFunctor, add those up + */ + using LowerMargin = typename pmacc::math::CT::add< + typename pmacc::math::CT::make_BasisVector::type, + typename GetLowerMargin::type>::type; + + /** Upper margin: we move by 1 along T_cherenkovFreeDirection and + * apply InternalDerivativeFunctor, add those up + */ + using UpperMargin = typename pmacc::math::CT::add< + typename pmacc::math::CT::make_BasisVector::type, + typename GetUpperMargin::type>::type; + + /** Return derivative value at the given point + * + * @tparam T_DataBox data box type with field data + * @param data position in the data box to compute derivative at + */ + template + HDINLINE typename T_DataBox::ValueType operator()(T_DataBox const& data) const + { + /* To obtain the following scheme, consider eq. (6) for x direction + * being Cherenkov-free and taking derivatives along y, z. + * Then in eq. (11) delta_y = delta_z = 0, beta_yz = beta_zy = 0, + * so only 3 terms are left in the derivative expression. + * It is implemented generically for any T_cherenkovFreeDirection + * and T_direction that are not equal to one another + */ + constexpr float_X beta = 0.125_X; + constexpr float_X alpha = 1.0_X - 2.0_X * beta; + InternalDerivativeFunctor forwardDerivative + = differentiation::makeDerivativeFunctor(); + auto const upperNeighbor + = pmacc::math::basisVector, T_cherenkovFreeDirection>(); + return alpha * forwardDerivative(data) + beta * forwardDerivative(data.shift(upperNeighbor)) + + beta * forwardDerivative(data.shift(-upperNeighbor)); + } + }; + + } // namespace lehe + } // namespace maxwellSolver + + namespace differentiation + { + namespace traits + { + /** Functor type trait specialization for the Lehe solver derivative derivative + * + * @tparam T_cherenkovFreeDirection direction to remove numerical Cherenkov + * radiation in, 0 = x, 1 = y, 2 = z + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + struct DerivativeFunctor, T_direction> + : pmacc::meta::accessors::Identity< + maxwellSolver::lehe::DerivativeFunctor> + { + }; + + } // namespace traits + } // namespace differentiation + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/Lehe/Lehe.def b/include/picongpu/fields/MaxwellSolver/Lehe/Lehe.def index 65520fdd3d..80e3b049c7 100644 --- a/include/picongpu/fields/MaxwellSolver/Lehe/Lehe.def +++ b/include/picongpu/fields/MaxwellSolver/Lehe/Lehe.def @@ -1,4 +1,5 @@ -/* Copyright 2013-2020 Axel Huebl, Remi Lehe +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Remi Lehe, + * Sergei Bastrakov * * This file is part of PIConGPU. * @@ -19,39 +20,63 @@ #pragma once -#include "picongpu/fields/MaxwellSolver/Lehe/Curl.def" -#include "picongpu/fields/MaxwellSolver/Yee/Yee.def" #include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" +#include "picongpu/fields/differentiation/Curl.def" +#include "picongpu/fields/MaxwellSolver/Lehe/Derivative.def" +#include "picongpu/fields/MaxwellSolver/Yee/Yee.def" + +#include namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ + namespace fields + { + namespace maxwellSolver + { + namespace lehe + { + /* Note: Directions are kept as global names for compatibility with the + * previously used solver interface + */ + + //! Remove numerical Cherenkov radiation along x + constexpr uint32_t CherenkovFreeDirection_X = 0; + + //! Remove numerical Cherenkov radiation along y + constexpr uint32_t CherenkovFreeDirection_Y = 1; + + //! Remove numerical Cherenkov radiation along z + constexpr uint32_t CherenkovFreeDirection_Z = 2; + + /** Curl to be applied for the E field + * + * @tparam T_cherenkovFreeDirection direction to remove numerical Cherenkov + * radiation in, 0 = x, 1 = y, 2 = z + */ + template + using CurlE = differentiation::Curl>; + + } // namespace lehe + + /** modified Yee solver + * + * Reference: R. Lehe et al + * Phys. Rev. ST Accel. Beams 16, 021301 (2013) + * + * @tparam T_CherenkovFreeDir the direction (axis) which should be free of cherenkov radiation + * 0 = x, 1 = y, 2 = z + */ + template< + typename T_CurrentInterpolation = currentInterpolation::None, + uint32_t T_cherenkovFreeDir = lehe::CherenkovFreeDirection_Y> + using Lehe + = ::picongpu::fields::maxwellSolver::Yee>; + + /* We need no definition of margins, because the Yee solver uses its curl + * classes to define margins + */ - /** modified Yee solver - * - * Reference: R. Lehe et al - * Phys. Rev. ST Accel. Beams 16, 021301 (2013) - * - * @tparam T_CherenkovFreeDir the direction (axis) which should be free of cherenkov radiation - * valid types: lehe::CherenkovFreeDirection_Y, lehe::CherenkovFreeDirection_Y - */ - template< - typename T_CurrentInterpolation = currentInterpolation::None, - typename T_CherenkovFreeDir = lehe::CherenkovFreeDirection_Y - > - using Lehe = ::picongpu::fields::maxwellSolver::Yee< - T_CurrentInterpolation, - lehe::CurlE< T_CherenkovFreeDir > - >; - - /* we need no definition of margins, because the YeeSolver uses its curl - * classes to define margins - */ - -} // namespace maxwellSolver -} // namespace fields + } // namespace maxwellSolver + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/Lehe/Lehe.hpp b/include/picongpu/fields/MaxwellSolver/Lehe/Lehe.hpp index 7134f433c7..47f18c1c80 100644 --- a/include/picongpu/fields/MaxwellSolver/Lehe/Lehe.hpp +++ b/include/picongpu/fields/MaxwellSolver/Lehe/Lehe.hpp @@ -1,4 +1,5 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Remi Lehe +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Remi Lehe, + * Sergei Bastrakov * * This file is part of PIConGPU. * @@ -17,40 +18,31 @@ * If not, see . */ - - #pragma once -#include "picongpu/fields/MaxwellSolver/Lehe/Lehe.def" -#include "picongpu/fields/MaxwellSolver/Lehe/Curl.hpp" #include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/MaxwellSolver/Lehe/Lehe.def" +#include "picongpu/fields/MaxwellSolver/Lehe/Derivative.hpp" + +#include + namespace pmacc { -namespace traits -{ - template< - typename T_CurrentInterpolation, - typename T_CherenkovFreeDir - > - struct StringProperties< - ::picongpu::fields::maxwellSolver::Lehe< - T_CurrentInterpolation, - T_CherenkovFreeDir - > - > + namespace traits { - static StringProperty get() + template + struct StringProperties<::picongpu::fields::maxwellSolver::Lehe> { - auto propList = - ::picongpu::fields::maxwellSolver::Lehe< - T_CurrentInterpolation, - T_CherenkovFreeDir - >::getStringProperties(); - // overwrite the name of the yee solver (inherit all other properties) - propList["name"].value = "Lehe"; - return propList; - } - }; -} // namespace traits + static StringProperty get() + { + auto propList = ::picongpu::fields::maxwellSolver::Lehe:: + getStringProperties(); + // overwrite the name of the Yee solver (inherit all other properties) + propList["name"].value = "Lehe"; + return propList; + } + }; + + } // namespace traits } // namespace pmacc diff --git a/include/picongpu/fields/MaxwellSolver/LehePML/LehePML.def b/include/picongpu/fields/MaxwellSolver/LehePML/LehePML.def new file mode 100644 index 0000000000..10410b3390 --- /dev/null +++ b/include/picongpu/fields/MaxwellSolver/LehePML/LehePML.def @@ -0,0 +1,56 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Remi Lehe, + * Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" +#include "picongpu/fields/MaxwellSolver/Lehe/Lehe.def" +#include "picongpu/fields/MaxwellSolver/YeePML/YeePML.def" + +#include + + +namespace picongpu +{ + namespace fields + { + namespace maxwellSolver + { + /** modified Yee solver with PML absorber + * + * Reference: R. Lehe et al + * Phys. Rev. ST Accel. Beams 16, 021301 (2013) + * + * @tparam T_CherenkovFreeDir the direction (axis) which should be free of cherenkov radiation + * 0 = x, 1 = y, 2 = z + */ + template< + typename T_CurrentInterpolation = currentInterpolation::None, + uint32_t T_cherenkovFreeDir = lehe::CherenkovFreeDirection_Y> + using LehePML + = ::picongpu::fields::maxwellSolver::YeePML>; + + /* We need no definition of margins, because the YeePML solver uses its curl + * classes to define margins + */ + + } // namespace maxwellSolver + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/LehePML/LehePML.hpp b/include/picongpu/fields/MaxwellSolver/LehePML/LehePML.hpp new file mode 100644 index 0000000000..dcb55b674b --- /dev/null +++ b/include/picongpu/fields/MaxwellSolver/LehePML/LehePML.hpp @@ -0,0 +1,48 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Remi Lehe, + * Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/MaxwellSolver/Lehe/Lehe.hpp" +#include "picongpu/fields/MaxwellSolver/LehePML/LehePML.def" + +#include + + +namespace pmacc +{ + namespace traits + { + template + struct StringProperties<::picongpu::fields::maxwellSolver::LehePML> + { + static StringProperty get() + { + auto propList = ::picongpu::fields::maxwellSolver:: + LehePML::getStringProperties(); + // overwrite the name of the solver (inherit all other properties) + propList["name"].value = "Lehe"; + return propList; + } + }; + + } // namespace traits +} // namespace pmacc diff --git a/include/picongpu/fields/MaxwellSolver/None/None.def b/include/picongpu/fields/MaxwellSolver/None/None.def index 6c7402cb82..0036d3f63c 100644 --- a/include/picongpu/fields/MaxwellSolver/None/None.def +++ b/include/picongpu/fields/MaxwellSolver/None/None.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -25,45 +25,31 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ - - template< typename T_CurrentInterpolation = currentInterpolation::None > - class None; - -} // namespace maxwellSolver -} // namespace fields - -namespace traits -{ - - template< typename T_CurrentInterpolation > - struct GetMargin< - picongpu::fields::maxwellSolver::None< T_CurrentInterpolation >, - FIELD_B - > + namespace fields { - using LowerMargin = typename pmacc::math::CT::make_Int< - simDim, - 0 - >::type; - using UpperMargin = LowerMargin; - }; + namespace maxwellSolver + { + template + class None; - template< typename T_CurrentInterpolation > - struct GetMargin< - picongpu::fields::maxwellSolver::None< T_CurrentInterpolation >, - FIELD_E - > - { - using LowerMargin = typename pmacc::math::CT::make_Int< - simDim, - 0 - >::type; - using UpperMargin = LowerMargin; - }; + } // namespace maxwellSolver + } // namespace fields -} // namespace traits + namespace traits + { + template + struct GetMargin, FIELD_B> + { + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = LowerMargin; + }; + + template + struct GetMargin, FIELD_E> + { + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = LowerMargin; + }; + + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/None/None.hpp b/include/picongpu/fields/MaxwellSolver/None/None.hpp index 4335a5341f..b3384cec95 100644 --- a/include/picongpu/fields/MaxwellSolver/None/None.hpp +++ b/include/picongpu/fields/MaxwellSolver/None/None.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -28,66 +28,66 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ -namespace none -{ - /** Check Yee grid and time conditions - * - * This is a workaround that the condition check is only - * triggered if the current used solver is `NoSolver` - */ - template - struct ConditionCheck - { - }; - - template - struct ConditionCheck< - None< T_CurrentInterpolation > , - T_Dummy - > + namespace fields { - /* Courant-Friedrichs-Levy-Condition for Yee Field Solver: */ - PMACC_CASSERT_MSG(Courant_Friedrichs_Levy_condition_failure____check_your_grid_param_file, - (SPEED_OF_LIGHT*SPEED_OF_LIGHT*DELTA_T*DELTA_T*INV_CELL2_SUM)<=1.0); - }; -} // namespace none - - template< typename T_CurrentInterpolation > - class None : private none::ConditionCheck< None< T_CurrentInterpolation> > - { - private: - typedef MappingDesc::SuperCellSize SuperCellSize; - - public: - using CellType = cellType::Yee; - using CurrentInterpolation = T_CurrentInterpolation; - - None(MappingDesc) + namespace maxwellSolver { - - } - - void update_beforeCurrent(uint32_t) - { - - } - - void update_afterCurrent(uint32_t) - { - - } - - static pmacc::traits::StringProperty getStringProperties() - { - pmacc::traits::StringProperty propList( "name", "none" ); - return propList; - } - }; - -} // namespace maxwellSolver -} // namespace fields + namespace none + { + /** Check Yee grid and time conditions + * + * This is a workaround that the condition check is only + * triggered if the current used solver is `NoSolver` + */ + template + struct ConditionCheck + { + }; + + template + struct ConditionCheck, T_Dummy> + { + /* Courant-Friedrichs-Levy-Condition for Yee Field Solver: + * + * A workaround is to add a template dependency to the expression. + * `sizeof(ANY_TYPE*) != 0` is always true and defers the evaluation. + */ + PMACC_CASSERT_MSG( + Courant_Friedrichs_Levy_condition_failure____check_your_grid_param_file, + (SPEED_OF_LIGHT * SPEED_OF_LIGHT * DELTA_T * DELTA_T * INV_CELL2_SUM) <= 1.0 + && sizeof(T_Dummy*) != 0); + }; + } // namespace none + + template + class None : private none::ConditionCheck> + { + private: + typedef MappingDesc::SuperCellSize SuperCellSize; + + public: + using CellType = cellType::Yee; + using CurrentInterpolation = T_CurrentInterpolation; + + None(MappingDesc) + { + } + + void update_beforeCurrent(uint32_t) + { + } + + void update_afterCurrent(uint32_t) + { + } + + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "none"); + return propList; + } + }; + + } // namespace maxwellSolver + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/Solvers.def b/include/picongpu/fields/MaxwellSolver/Solvers.def index 815871126c..1f37df1af2 100644 --- a/include/picongpu/fields/MaxwellSolver/Solvers.def +++ b/include/picongpu/fields/MaxwellSolver/Solvers.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -17,16 +17,12 @@ * If not, see . */ - - #pragma once #include "picongpu/fields/MaxwellSolver/None/None.def" #include "picongpu/fields/MaxwellSolver/Yee/Yee.def" #include "picongpu/fields/MaxwellSolver/YeePML/YeePML.def" -#if (SIMDIM==3) #include "picongpu/fields/MaxwellSolver/Lehe/Lehe.def" -#if( PMACC_CUDA_ENABLED == 1 ) -# include "picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.def" -#endif -#endif +#include "picongpu/fields/MaxwellSolver/LehePML/LehePML.def" +#include "picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/ArbitraryOrderFDTD.def" +#include "picongpu/fields/MaxwellSolver/ArbitraryOrderFDTDPML/ArbitraryOrderFDTDPML.def" diff --git a/include/picongpu/fields/MaxwellSolver/Solvers.hpp b/include/picongpu/fields/MaxwellSolver/Solvers.hpp index 8994dbf417..4b1608c390 100644 --- a/include/picongpu/fields/MaxwellSolver/Solvers.hpp +++ b/include/picongpu/fields/MaxwellSolver/Solvers.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -17,16 +17,12 @@ * If not, see . */ - - #pragma once #include "picongpu/fields/MaxwellSolver/None/None.hpp" #include "picongpu/fields/MaxwellSolver/Yee/Yee.hpp" #include "picongpu/fields/MaxwellSolver/YeePML/YeePML.hpp" -#if (SIMDIM==3) #include "picongpu/fields/MaxwellSolver/Lehe/Lehe.hpp" -#if( PMACC_CUDA_ENABLED == 1 ) -# include "picongpu/fields/MaxwellSolver/DirSplitting/DirSplitting.hpp" -#endif -#endif +#include "picongpu/fields/MaxwellSolver/LehePML/LehePML.hpp" +#include "picongpu/fields/MaxwellSolver/ArbitraryOrderFDTD/ArbitraryOrderFDTD.hpp" +#include "picongpu/fields/MaxwellSolver/ArbitraryOrderFDTDPML/ArbitraryOrderFDTDPML.hpp" diff --git a/include/picongpu/fields/MaxwellSolver/Yee/Curl.def b/include/picongpu/fields/MaxwellSolver/Yee/Curl.def deleted file mode 100644 index f5bca11df4..0000000000 --- a/include/picongpu/fields/MaxwellSolver/Yee/Curl.def +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/algorithms/DifferenceToUpper.def" -#include "picongpu/algorithms/DifferenceToLower.def" - -#include - - -namespace picongpu -{ -namespace fields -{ -namespace maxwellSolver -{ -namespace yee -{ - - template< typename Difference > - struct Curl; - - using CurlLeft = Curl< DifferenceToLower< simDim > >; - using CurlRight = Curl< DifferenceToUpper< simDim > >; - -} // namespace yee -} // namespace maxwellSolver -} // namespace fields -} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/Yee/Curl.hpp b/include/picongpu/fields/MaxwellSolver/Yee/Curl.hpp deleted file mode 100644 index 5023c37f58..0000000000 --- a/include/picongpu/fields/MaxwellSolver/Yee/Curl.hpp +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/algorithms/DifferenceToUpper.hpp" -#include "picongpu/algorithms/DifferenceToLower.hpp" -#include "picongpu/fields/MaxwellSolver/Yee/Curl.def" - -#include - - -namespace picongpu -{ -namespace fields -{ -namespace maxwellSolver -{ -namespace yee -{ - - template< typename T_Difference > - struct Curl - { - using Difference = T_Difference; - using LowerMargin = typename Difference::OffsetOrigin; - using UpperMargin = typename Difference::OffsetEnd; - - template - HDINLINE typename Memory::ValueType operator()( Memory const & mem ) const - { - const typename Difference::template GetDifference< 0 > Dx; - const typename Difference::template GetDifference< 1 > Dy; - const typename Difference::template GetDifference< 2 > Dz; - - return float3_X( - Dy( mem ).z() - Dz( mem ).y(), - Dz( mem ).x() - Dx( mem ).z(), - Dx( mem ).y() - Dy( mem ).x() - ); - } - }; - -} // namespace yee -} // namespace maxwellSolver -} // namespace fields -} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/Yee/Yee.def b/include/picongpu/fields/MaxwellSolver/Yee/Yee.def index 0274616cca..f9442aba06 100644 --- a/include/picongpu/fields/MaxwellSolver/Yee/Yee.def +++ b/include/picongpu/fields/MaxwellSolver/Yee/Yee.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -19,64 +19,30 @@ #pragma once -#include "picongpu/fields/MaxwellSolver/Yee/Curl.def" #include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" +#include "picongpu/fields/differentiation/Curl.def" +#include "picongpu/fields/differentiation/Derivative.def" namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ - - template< - typename T_CurrentInterpolation = currentInterpolation::None, - typename CurlE = yee::CurlRight, - typename CurlB = yee::CurlLeft - > - class Yee; - -} // namespace maxwellSolver -} // namespace fields - -namespace traits -{ - - template< - typename T_CurrentInterpolation, - class CurlE, - class CurlB - > - struct GetMargin< - picongpu::fields::maxwellSolver::Yee< - T_CurrentInterpolation, - CurlE, - CurlB - >, FIELD_B - > + namespace fields { - using LowerMargin = typename CurlB::LowerMargin; - using UpperMargin = typename CurlB::UpperMargin; - }; - - template< - typename T_CurrentInterpolation, - class CurlE, - class CurlB - > - struct GetMargin< - picongpu::fields::maxwellSolver::Yee< - T_CurrentInterpolation, - CurlE, - CurlB - >, - FIELD_E - > - { - using LowerMargin = typename CurlE::LowerMargin; - using UpperMargin = typename CurlE::UpperMargin; - }; - -} //namespace traits + namespace maxwellSolver + { + namespace yee + { + using CurlLeft = differentiation::Curl; + using CurlRight = differentiation::Curl; + + } // namespace yee + + template< + typename T_CurrentInterpolation = currentInterpolation::None, + typename CurlE = yee::CurlRight, + typename CurlB = yee::CurlLeft> + class Yee; + + } // namespace maxwellSolver + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/Yee/Yee.hpp b/include/picongpu/fields/MaxwellSolver/Yee/Yee.hpp index cf09534251..699454f1c8 100644 --- a/include/picongpu/fields/MaxwellSolver/Yee/Yee.hpp +++ b/include/picongpu/fields/MaxwellSolver/Yee/Yee.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -21,13 +21,14 @@ #include "picongpu/simulation_defines.hpp" #include "picongpu/fields/MaxwellSolver/Yee/Yee.def" -#include "picongpu/fields/MaxwellSolver/Yee/Curl.hpp" #include "picongpu/fields/absorber/ExponentialDamping.hpp" #include "picongpu/fields/FieldE.hpp" #include "picongpu/fields/FieldB.hpp" #include "picongpu/fields/MaxwellSolver/Yee/Yee.kernel" #include "picongpu/fields/cellType/Yee.hpp" #include "picongpu/fields/LaserPhysics.hpp" +#include "picongpu/fields/differentiation/Curl.hpp" +#include "picongpu/traits/GetMargin.hpp" #include #include @@ -37,135 +38,135 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ - - template< - typename T_CurrentInterpolation, - class CurlE, - class CurlB - > - class Yee + namespace fields { - private: - typedef MappingDesc::SuperCellSize SuperCellSize; - - - std::shared_ptr< FieldE > fieldE; - std::shared_ptr< FieldB > fieldB; - MappingDesc m_cellDescription; - - template - void updateE() - { - /* Courant-Friedrichs-Levy-Condition for Yee Field Solver: */ - PMACC_CASSERT_MSG(Courant_Friedrichs_Levy_condition_failure____check_your_grid_param_file, - (SPEED_OF_LIGHT*SPEED_OF_LIGHT*DELTA_T*DELTA_T*INV_CELL2_SUM)<=1.0); - - typedef SuperCellDescription< - SuperCellSize, - typename CurlB::LowerMargin, - typename CurlB::UpperMargin - > BlockArea; - - AreaMapping mapper(m_cellDescription); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - PMACC_KERNEL(yee::KernelUpdateE< numWorkers, BlockArea >{ }) - ( mapper.getGridDim(), numWorkers )( - CurlB( ), - this->fieldE->getDeviceDataBox(), - this->fieldB->getDeviceDataBox(), - mapper - ); - } - - template - void updateBHalf() + namespace maxwellSolver { - typedef SuperCellDescription< - SuperCellSize, - typename CurlE::LowerMargin, - typename CurlE::UpperMargin - > BlockArea; - - AreaMapping mapper(m_cellDescription); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - PMACC_KERNEL(yee::KernelUpdateBHalf< numWorkers, BlockArea >{ }) - ( mapper.getGridDim(), numWorkers )( - CurlE( ), - this->fieldB->getDeviceDataBox(), - this->fieldE->getDeviceDataBox(), - mapper - ); - } - - public: - - using CellType = cellType::Yee; - using CurrentInterpolation = T_CurrentInterpolation; - - Yee(MappingDesc cellDescription) : m_cellDescription(cellDescription) + template + class Yee + { + private: + typedef MappingDesc::SuperCellSize SuperCellSize; + + + std::shared_ptr fieldE; + std::shared_ptr fieldB; + MappingDesc m_cellDescription; + + template + void updateE() + { + /* Courant-Friedrichs-Levy-Condition for Yee Field Solver: + * + * A workaround is to add a template dependency to the expression. + * `sizeof(ANY_TYPE*) != 0` is always true and defers the evaluation. + */ + PMACC_CASSERT_MSG( + Courant_Friedrichs_Levy_condition_failure____check_your_grid_param_file, + (SPEED_OF_LIGHT * SPEED_OF_LIGHT * DELTA_T * DELTA_T * INV_CELL2_SUM) <= 1.0 + && sizeof(T_CurrentInterpolation*) != 0); + + typedef SuperCellDescription< + SuperCellSize, + typename traits::GetLowerMargin::type, + typename traits::GetUpperMargin::type> + BlockArea; + + AreaMapping mapper(m_cellDescription); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(yee::KernelUpdateE{}) + (mapper.getGridDim(), + numWorkers)(CurlB(), this->fieldE->getDeviceDataBox(), this->fieldB->getDeviceDataBox(), mapper); + } + + template + void updateBHalf() + { + typedef SuperCellDescription< + SuperCellSize, + typename CurlE::LowerMargin, + typename CurlE::UpperMargin> + BlockArea; + + AreaMapping mapper(m_cellDescription); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(yee::KernelUpdateBHalf{}) + (mapper.getGridDim(), + numWorkers)(CurlE(), this->fieldB->getDeviceDataBox(), this->fieldE->getDeviceDataBox(), mapper); + } + + public: + using CellType = cellType::Yee; + using CurrentInterpolation = T_CurrentInterpolation; + + Yee(MappingDesc cellDescription) : m_cellDescription(cellDescription) + { + DataConnector& dc = Environment<>::get().DataConnector(); + + this->fieldE = dc.get(FieldE::getName(), true); + this->fieldB = dc.get(FieldB::getName(), true); + } + + void update_beforeCurrent(uint32_t) + { + updateBHalf(); + EventTask eRfieldB = fieldB->asyncCommunication(__getTransactionEvent()); + + updateE(); + __setTransactionEvent(eRfieldB); + updateE(); + } + + void update_afterCurrent(uint32_t currentStep) + { + using Absorber = absorber::ExponentialDamping; + Absorber::run(currentStep, this->m_cellDescription, this->fieldE->getDeviceDataBox()); + if(laserProfiles::Selected::INIT_TIME > float_X(0.0)) + LaserPhysics{}(currentStep); + + EventTask eRfieldE = fieldE->asyncCommunication(__getTransactionEvent()); + + updateBHalf(); + __setTransactionEvent(eRfieldE); + updateBHalf(); + + Absorber::run(currentStep, this->m_cellDescription, fieldB->getDeviceDataBox()); + + EventTask eRfieldB = fieldB->asyncCommunication(__getTransactionEvent()); + __setTransactionEvent(eRfieldB); + } + + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "Yee"); + return propList; + } + }; + + } // namespace maxwellSolver + } // namespace fields + + namespace traits + { + template + struct GetMargin, FIELD_B> { - DataConnector &dc = Environment<>::get().DataConnector(); + using LowerMargin = typename CurlB::LowerMargin; + using UpperMargin = typename CurlB::UpperMargin; + }; - this->fieldE = dc.get< FieldE >( FieldE::getName(), true ); - this->fieldB = dc.get< FieldB >( FieldB::getName(), true ); - } - - void update_beforeCurrent(uint32_t) + template + struct GetMargin, FIELD_E> { - updateBHalf < CORE+BORDER >(); - EventTask eRfieldB = fieldB->asyncCommunication(__getTransactionEvent()); - - updateE(); - __setTransactionEvent(eRfieldB); - updateE(); - } + using LowerMargin = typename CurlE::LowerMargin; + using UpperMargin = typename CurlE::UpperMargin; + }; - void update_afterCurrent(uint32_t currentStep) - { - using Absorber = absorber::ExponentialDamping; - Absorber::run( - currentStep, - this->m_cellDescription, - this->fieldE->getDeviceDataBox() - ); - if (laserProfiles::Selected::INIT_TIME > float_X(0.0)) - LaserPhysics{}(currentStep); - - EventTask eRfieldE = fieldE->asyncCommunication(__getTransactionEvent()); - - updateBHalf < CORE> (); - __setTransactionEvent(eRfieldE); - updateBHalf < BORDER > (); - - Absorber::run( - currentStep, - this->m_cellDescription, - fieldB->getDeviceDataBox() - ); - - EventTask eRfieldB = fieldB->asyncCommunication(__getTransactionEvent()); - __setTransactionEvent(eRfieldB); - } - - static pmacc::traits::StringProperty getStringProperties() - { - pmacc::traits::StringProperty propList( "name", "Yee" ); - return propList; - } - }; - -} // namespace maxwellSolver -} // namespace fields -} // picongpu + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/Yee/Yee.kernel b/include/picongpu/fields/MaxwellSolver/Yee/Yee.kernel index ca90edb75d..82365a5515 100644 --- a/include/picongpu/fields/MaxwellSolver/Yee/Yee.kernel +++ b/include/picongpu/fields/MaxwellSolver/Yee/Yee.kernel @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten * * This file is part of PIConGPU. * @@ -28,212 +28,148 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ -namespace yee -{ - using namespace pmacc; - - /** compute electric field - * - * @tparam T_numWorkers number of workers - * @tparam T_BlockDescription field (electric and magnetic) domain description - */ - template< - uint32_t T_workers, - typename T_BlockDescription - > - struct KernelUpdateE + namespace fields { - /** update electric field - * - * @tparam T_Curl curl functor type - * @tparam T_EBox pmacc::DataBox, electric field box type - * @tparam T_BBox pmacc::DataBox, magnetic field box type - * @tparam T_Mapping mapper functor type - * @tparam T_Acc alpaka accelerator type - * - * @param acc alpaka accelerator - * @param curl functor to calculate the electric field, interface must be - * `operator()(T_BBox)` - * @param fieldE electric field iterator - * @param fieldB magnetic field iterator - * @param mapper functor to map a block to a supercell - */ - template< - typename T_Curl, - typename T_EBox, - typename T_BBox, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_Curl const curl, - T_EBox fieldE, - T_BBox const fieldB, - T_Mapping mapper - ) const + namespace maxwellSolver { - using namespace mappings::threads; - - constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_workers; - - uint32_t const workerIdx = threadIdx.x; - - auto cachedB = CachedBox::create< - 0u, - typename T_BBox::ValueType - >( - acc, - T_BlockDescription( ) - ); - - nvidia::functors::Assign assign; - DataSpace< simDim > const block( mapper.getSuperCellIndex( DataSpace< simDim >( blockIdx ) ) ); - DataSpace< simDim > const blockCell = block * MappingDesc::SuperCellSize::toRT( ); - - auto fieldBBlock = fieldB.shift( blockCell ); - - ThreadCollective< - T_BlockDescription, - numWorkers - > collective( workerIdx ); - - collective( - acc, - assign, - cachedB, - fieldBBlock - ); - - __syncthreads(); - - constexpr float_X c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; - constexpr float_X dt = DELTA_T; - - ForEachIdx< - IdxConfig< - cellsPerSuperCell, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) + namespace yee + { + using namespace pmacc; + + /** compute electric field + * + * @tparam T_numWorkers number of workers + * @tparam T_BlockDescription field (electric and magnetic) domain description + */ + template + struct KernelUpdateE { - /* cell index within the superCell */ - DataSpace< simDim > const cellIdx = DataSpaceOperations< simDim >::template map< SuperCellSize >( linearIdx ); - - fieldE( blockCell + cellIdx ) += curl( cachedB.shift( cellIdx ) ) * c2 * dt; - } - ); - } - }; - - /** compute magnetic field - * - * @tparam T_numWorkers number of workers - * @tparam T_BlockDescription field (electric and magnetic) domain description - */ - template< - uint32_t T_workers, - typename T_BlockDescription - > - struct KernelUpdateBHalf - { - /** update magnetic field - * - * @tparam T_Curl curl functor type - * @tparam T_EBox pmacc::DataBox, electric field box type - * @tparam T_BBox pmacc::DataBox, magnetic field box type - * @tparam T_Mapping mapper functor type - * @tparam T_Acc alpaka accelerator type - * - * @param acc alpaka accelerator - * @param curl functor to calculate the electric field, interface must be - * `operator()(T_EBox)` - * @param fieldB magnetic field iterator - * @param fieldE electric field iterator - * @param mapper functor to map a block to a supercell - */ - template< - typename T_Curl, - typename T_EBox, - typename T_BBox, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_Curl const curl, - T_BBox fieldB, - T_EBox const fieldE, - T_Mapping mapper - ) const - { - using namespace mappings::threads; - - constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_workers; - - uint32_t const workerIdx = threadIdx.x; - - auto cachedE = CachedBox::create< - 0u, - typename T_EBox::ValueType - >( - acc, - T_BlockDescription( ) - ); - - nvidia::functors::Assign assign; - DataSpace< simDim > const block( mapper.getSuperCellIndex( DataSpace< simDim >( blockIdx ) ) ); - DataSpace< simDim > const blockCell = block * MappingDesc::SuperCellSize::toRT( ); - - auto fieldEBlock = fieldE.shift( blockCell ); - - ThreadCollective< - T_BlockDescription, - numWorkers - > collective( workerIdx ); - - collective( - acc, - assign, - cachedE, - fieldEBlock - ); - - __syncthreads(); - - constexpr float_X dt = DELTA_T; - - ForEachIdx< - IdxConfig< - cellsPerSuperCell, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) + /** update electric field + * + * @tparam T_Curl curl functor type + * @tparam T_EBox pmacc::DataBox, electric field box type + * @tparam T_BBox pmacc::DataBox, magnetic field box type + * @tparam T_Mapping mapper functor type + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param curl functor to calculate the electric field, interface must be + * `operator()(T_BBox)` + * @param fieldE electric field iterator + * @param fieldB magnetic field iterator + * @param mapper functor to map a block to a supercell + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_Curl const curl, + T_EBox fieldE, + T_BBox const fieldB, + T_Mapping mapper) const + { + using namespace mappings::threads; + + constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_workers; + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + auto cachedB = CachedBox::create<0u, typename T_BBox::ValueType>(acc, T_BlockDescription()); + + nvidia::functors::Assign assign; + DataSpace const block( + mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + DataSpace const blockCell = block * MappingDesc::SuperCellSize::toRT(); + + auto fieldBBlock = fieldB.shift(blockCell); + + ThreadCollective collective(workerIdx); + + collective(acc, assign, cachedB, fieldBBlock); + + cupla::__syncthreads(acc); + + constexpr float_X c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; + constexpr float_X dt = DELTA_T; + + ForEachIdx>{workerIdx}( + [&](uint32_t const linearIdx, uint32_t const) { + /* cell index within the superCell */ + DataSpace const cellIdx + = DataSpaceOperations::template map(linearIdx); + + fieldE(blockCell + cellIdx) += curl(cachedB.shift(cellIdx)) * c2 * dt; + }); + } + }; + + /** compute magnetic field + * + * @tparam T_numWorkers number of workers + * @tparam T_BlockDescription field (electric and magnetic) domain description + */ + template + struct KernelUpdateBHalf { - /* cell index within the superCell */ - DataSpace< simDim > const cellIdx = DataSpaceOperations< simDim >::template map< SuperCellSize >( linearIdx ); - - fieldB( blockCell + cellIdx ) -= curl( cachedE.shift( cellIdx ) ) * float_X( 0.5 ) * dt; - } - ); - } - }; - -} // namespace yee -} // namespace maxwellSolver -} // namespace fields + /** update magnetic field + * + * @tparam T_Curl curl functor type + * @tparam T_EBox pmacc::DataBox, electric field box type + * @tparam T_BBox pmacc::DataBox, magnetic field box type + * @tparam T_Mapping mapper functor type + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param curl functor to calculate the electric field, interface must be + * `operator()(T_EBox)` + * @param fieldB magnetic field iterator + * @param fieldE electric field iterator + * @param mapper functor to map a block to a supercell + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_Curl const curl, + T_BBox fieldB, + T_EBox const fieldE, + T_Mapping mapper) const + { + using namespace mappings::threads; + + constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_workers; + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + auto cachedE = CachedBox::create<0u, typename T_EBox::ValueType>(acc, T_BlockDescription()); + + nvidia::functors::Assign assign; + DataSpace const block( + mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + DataSpace const blockCell = block * MappingDesc::SuperCellSize::toRT(); + + auto fieldEBlock = fieldE.shift(blockCell); + + ThreadCollective collective(workerIdx); + + collective(acc, assign, cachedE, fieldEBlock); + + cupla::__syncthreads(acc); + + constexpr float_X dt = DELTA_T; + + ForEachIdx>{workerIdx}( + [&](uint32_t const linearIdx, uint32_t const) { + /* cell index within the superCell */ + DataSpace const cellIdx + = DataSpaceOperations::template map(linearIdx); + + fieldB(blockCell + cellIdx) -= curl(cachedE.shift(cellIdx)) * float_X(0.5) * dt; + }); + } + }; + + } // namespace yee + } // namespace maxwellSolver + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/YeePML/Field.hpp b/include/picongpu/fields/MaxwellSolver/YeePML/Field.hpp index b27e0d6144..e9b6759a8c 100644 --- a/include/picongpu/fields/MaxwellSolver/YeePML/Field.hpp +++ b/include/picongpu/fields/MaxwellSolver/YeePML/Field.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Benjamin Worpitz, Sergei Bastrakov * * This file is part of PIConGPU. @@ -44,507 +44,434 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ -namespace yeePML -{ - - //! Additional node values for E or B in PML - struct NodeValues + namespace fields { - - /* The first letter corresponds to x, y, z field components, - * the second to transverse directions for the component - */ - float_X xy, xz, yx, yz, zx, zy; - - //! Number of components per node value - static constexpr int numComponents = 6; - - /** Construct node values - * - * @param initialValue initial value for all components - */ - HDINLINE NodeValues( float_X const initialValue = 0._X ); - - /** Construction for compatibility with pmacc vectors - * - * @param initialValue initial value for all components - */ - HDINLINE static const NodeValues create( float_X const initialValue ); - - /** Element access for compatibility with pmacc vectors - * - * This is a utility for checkpointing and does not need a device - * version. For performance considerations does not check that the index - * is valid and relies on the components being stored in order, without - * padding. - * - * @param idx index less than 6 - */ - float_X & operator[ ]( uint32_t const idx ); - - /** Const element access for compatibility with pmacc vectors - * - * This is a utility for checkpointing and does not need a device - * version. For performance considerations does not check that the index - * is valid and relies on the components being stored in order, without - * padding. - * - * @param idx index less than 6 - */ - float_X const & operator[ ]( uint32_t const idx ) const; - - }; - - /** Data box type used for PML fields in kernels - * - * Only stores data in the PML area using the given 1d data box. - * Access is provided via a simDim-dimensional index, same as for other - * grid values. - * - * @tparam T_DataBox1d underlying 1d data box type - */ - template< typename T_DataBox1d > - class OuterLayerBox - { - public: - - //! Underlying data box type - using DataBox = T_DataBox1d; - - //! Element type - using ValueType = typename DataBox::ValueType; - - //! Grid index type to be used for access - using Idx = pmacc::DataSpace< simDim >; - - /** Create an outer layer box - * - * Only stores data in the PML area using the given 1d data box. - * Access is provided via a simDim-dimensional index, same as for other - * grid values. - * - * @param gridLayout grid layout, as for normal fields - * @param globalThickness global PML thickness - * @param box underlying data box, preallocated to fit all data - * the constructed OuterLayerBox does not own the box memory, - * so can only be used before the box is reallocated - */ - OuterLayerBox( - GridLayout< simDim > const & gridLayout, - Thickness const & globalThickness, - DataBox box - ); - - /** Constant element access by a simDim-dimensional index - * - * @param idx grid index - */ - HDINLINE ValueType const & operator( )( Idx const & idx ) const; - - /** Element access by a simDim-dimensional index - * - * @param idx grid index - */ - HDINLINE ValueType & operator( )( Idx const & idx ); - - private: - - /** Convert a simDim-dimensional index to a linear one - * - * @param idxWithGuard grid index with guard - */ - HDINLINE int getLinearIdx( Idx const & idxWithGuard ) const; - - //! A single Cartesial layer that is part of the outer layer box - class Layer + namespace maxwellSolver { - public: - - /** Create a layer - * - * @param beginIdx first index - * @param endIdx index right after the last - */ - HDINLINE Layer( - Idx const & beginIdx = Idx::create( 0 ), - Idx const & endIdx = Idx::create( 0 ) - ); - - /** Check if the layer contains given index - * - * @param idx grid index without guard - */ - HDINLINE bool contains( Idx const & idx ) const; - - //! Get the simDim-dimensional volume of the layer - HDINLINE int getVolume( ) const; - - /** Get a linear index inside a layer - * - * Same as in pmacc::DataBox, x is minor and z is major. - * - * @param idx grid index without guard - */ - HDINLINE int getLinearIdx( Idx const & idx ) const; - - private: - - //! First index of the layer - Idx beginIdx; - - //! Size of the layer - Idx size; - - //! simDim-dimensional volume of the layer - int volume; - - }; - - //! Number of layers: a positive and a negative one for each axis - static constexpr auto numLayers = 2 * simDim; - - /** Cartesian layers constituting the outer layer - * - * The ordering inside the array is z-y-x for 3d and y-x for 2d. - * However, it should not be relevant since the layers do not intersect, - * and logically it represents a set of layers - */ - Layer layers[ numLayers ]; - - //! Data box, does not own memory - DataBox box; - - //! Guard size - Idx const guardSize; - - }; - - /** Base class for implementation inheritance in classes for the - * electromagnetic fields in PML - * - * Stores field values on host and device and provides data synchronization - * between them. - * - * Implements interfaces defined by SimulationFieldHelper< MappingDesc > and - * ISimulationData. - */ - class Field : - public SimulationFieldHelper< MappingDesc >, - public ISimulationData + namespace yeePML + { + //! Additional node values for E or B in PML + struct NodeValues + { + /* The first letter corresponds to x, y, z field components, + * the second to transverse directions for the component + */ + float_X xy, xz, yx, yz, zx, zy; + + //! Number of components per node value + static constexpr int numComponents = 6; + + /** Construct node values + * + * @param initialValue initial value for all components + */ + HDINLINE NodeValues(float_X const initialValue = 0._X); + + /** Construction for compatibility with pmacc vectors + * + * @param initialValue initial value for all components + */ + HDINLINE static const NodeValues create(float_X const initialValue); + + /** Element access for compatibility with pmacc vectors + * + * This is a utility for checkpointing and does not need a device + * version. For performance considerations does not check that the index + * is valid and relies on the components being stored in order, without + * padding. + * + * @param idx index less than 6 + */ + float_X& operator[](uint32_t const idx); + + /** Const element access for compatibility with pmacc vectors + * + * This is a utility for checkpointing and does not need a device + * version. For performance considerations does not check that the index + * is valid and relies on the components being stored in order, without + * padding. + * + * @param idx index less than 6 + */ + float_X const& operator[](uint32_t const idx) const; + }; + + /** Data box type used for PML fields in kernels + * + * Only stores data in the PML area using the given 1d data box. + * Access is provided via a simDim-dimensional index, same as for other + * grid values. + * + * @tparam T_DataBox1d underlying 1d data box type + */ + template + class OuterLayerBox + { + public: + //! Underlying data box type + using DataBox = T_DataBox1d; + + //! Element type + using ValueType = typename DataBox::ValueType; + + //! Grid index type to be used for access + using Idx = pmacc::DataSpace; + + /** Create an outer layer box + * + * Only stores data in the PML area using the given 1d data box. + * Access is provided via a simDim-dimensional index, same as for other + * grid values. + * + * @param gridLayout grid layout, as for normal fields + * @param globalThickness global PML thickness + * @param box underlying data box, preallocated to fit all data + * the constructed OuterLayerBox does not own the box memory, + * so can only be used before the box is reallocated + */ + OuterLayerBox(GridLayout const& gridLayout, Thickness const& globalThickness, DataBox box); + + /** Constant element access by a simDim-dimensional index + * + * @param idx grid index + */ + HDINLINE ValueType const& operator()(Idx const& idx) const; + + /** Element access by a simDim-dimensional index + * + * @param idx grid index + */ + HDINLINE ValueType& operator()(Idx const& idx); + + private: + /** Convert a simDim-dimensional index to a linear one + * + * @param idxWithGuard grid index with guard + */ + HDINLINE int getLinearIdx(Idx const& idxWithGuard) const; + + //! A single Cartesial layer that is part of the outer layer box + class Layer + { + public: + /** Create a layer + * + * @param beginIdx first index + * @param endIdx index right after the last + */ + HDINLINE Layer(Idx const& beginIdx = Idx::create(0), Idx const& endIdx = Idx::create(0)); + + /** Check if the layer contains given index + * + * @param idx grid index without guard + */ + HDINLINE bool contains(Idx const& idx) const; + + //! Get the simDim-dimensional volume of the layer + HDINLINE int getVolume() const; + + /** Get a linear index inside a layer + * + * Same as in pmacc::DataBox, x is minor and z is major. + * + * @param idx grid index without guard + */ + HDINLINE int getLinearIdx(Idx const& idx) const; + + private: + //! First index of the layer + Idx beginIdx; + + //! Size of the layer + Idx size; + + //! simDim-dimensional volume of the layer + int volume; + }; + + //! Number of layers: a positive and a negative one for each axis + static constexpr auto numLayers = 2 * simDim; + + /** Cartesian layers constituting the outer layer + * + * The ordering inside the array is z-y-x for 3d and y-x for 2d. + * However, it should not be relevant since the layers do not intersect, + * and logically it represents a set of layers + */ + Layer layers[numLayers]; + + //! Data box, does not own memory + DataBox box; + + //! Guard size + Idx const guardSize; + }; + + /** Base class for implementation inheritance in classes for the + * electromagnetic fields in PML + * + * Stores field values on host and device and provides data synchronization + * between them. + * + * Implements interfaces defined by SimulationFieldHelper< MappingDesc > and + * ISimulationData. + */ + class Field + : public SimulationFieldHelper + , public ISimulationData + { + public: + //! Type of each field value + using ValueType = NodeValues; + + //! Number of components of ValueType, for serialization + static constexpr int numComponents = NodeValues::numComponents; + + //! Unit type of field components + using UnitValueType = pmacc::math::Vector; + + /** Type of host-device buffer for field values + * + * The buffer is logically 1d, but technically multidimentional + * for easier coupling to output utilities. + */ + using Buffer = pmacc::GridBuffer; + + /** Type of data box for field values on host and device + * + * The data box is logically 1d, but technically multidimentional + * for easier coupling to output utilities. + */ + using DataBoxType = pmacc::DataBox>; + + //! Data box type used for PML fields in kernels + using OuterLayerBoxType = OuterLayerBox>; + + //! Size of supercell + using SuperCellSize = MappingDesc::SuperCellSize; + + /** Create a field + * + * @param cellDescription mapping for kernels + * @param globalThickness global PML thickness + */ + HINLINE Field(MappingDesc const& cellDescription, Thickness const& globalThickness); + + //! Get a reference to the host-device buffer for the field values + HINLINE Buffer& getGridBuffer(); + + //! Get the grid layout + HINLINE pmacc::GridLayout getGridLayout(); + + //! Get the host data box for the field values + HINLINE DataBoxType getHostDataBox(); + + //! Get the device data box for the field values + HINLINE DataBoxType getDeviceDataBox(); + + //! Get the device outer layer data box for the field values + HINLINE OuterLayerBoxType getDeviceOuterLayerBox(); + + /** Start asynchronous communication of field values + * + * @param serialEvent event to depend on + */ + HINLINE virtual EventTask asyncCommunication(EventTask serialEvent); + + /** Reset the host-device buffer for field values + * + * @param currentStep index of time iteration + */ + HINLINE void reset(uint32_t currentStep) override; + + //! Synchronize device data with host data + HINLINE void syncToDevice() override; + + //! Synchronize host data with device data + HINLINE void synchronize() override; + + private: + //! Host-device buffer for field values + std::unique_ptr data; + + //! Grid layout for normal (non-PML) fields + pmacc::GridLayout gridLayout; + + // PML global thickness + Thickness globalThickness; + }; + + //! Data box type used for PML fields in kernels + using FieldBox = Field::OuterLayerBoxType; + + /** Representation of the additinal electric field components in PML + * + * Stores field values on host and device and provides data synchronization + * between them. + * + * Implements interfaces defined by SimulationFieldHelper< MappingDesc > and + * ISimulationData. + */ + class FieldE : public Field + { + public: + /** Create a field + * + * @param cellDescription mapping for kernels + * @param globalThickness global PML thickness + */ + HINLINE FieldE(MappingDesc const& cellDescription, Thickness const& globalThickness) + : Field(cellDescription, globalThickness) + { + } + + //! Get id + HINLINE SimulationDataId getUniqueId() + { + return getName(); + } + + //! Get units of field components + HDINLINE static UnitValueType getUnit() + { + return UnitValueType::create(UNIT_EFIELD); + } + + /** Get unit representation as powers of the 7 base measures + * + * Characterizing the record's unit in SI + * (length L, mass M, time T, electric current I, + * thermodynamic temperature theta, amount of substance N, + * luminous intensity J) + */ + HINLINE static std::vector getUnitDimension() + { + return picongpu::FieldE::getUnitDimension(); + } + + //! Get text name + HINLINE static std::string getName() + { + return "Convolutional PML E"; + } + }; + + /** Representation of the additinal magnetic field components in PML + * + * Stores field values on host and device and provides data synchronization + * between them. + * + * Implements interfaces defined by SimulationFieldHelper< MappingDesc > and + * ISimulationData. + */ + class FieldB : public Field + { + public: + /** Create a field + * + * @param cellDescription mapping for kernels + * @param globalThickness global PML thickness + */ + HINLINE FieldB(MappingDesc const& cellDescription, Thickness const& globalThickness) + : Field(cellDescription, globalThickness) + { + } + + //! Get id + HINLINE SimulationDataId getUniqueId() + { + return getName(); + } + + //! Get units of field components + HDINLINE static UnitValueType getUnit() + { + return UnitValueType::create(UNIT_BFIELD); + } + + /** Get unit representation as powers of the 7 base measures + * + * Characterizing the record's unit in SI + * (length L, mass M, time T, electric current I, + * thermodynamic temperature theta, amount of substance N, + * luminous intensity J) + */ + HINLINE static std::vector getUnitDimension() + { + return picongpu::FieldB::getUnitDimension(); + } + + //! Get text name + HINLINE static std::string getName() + { + return "Convolutional PML B"; + } + }; + + } // namespace yeePML + } // namespace maxwellSolver + } // namespace fields + + namespace traits { - public: - - //! Type of each field value - using ValueType = NodeValues; - - //! Number of components of ValueType, for serialization - static constexpr int numComponents = NodeValues::numComponents; - - //! Unit type of field components - using UnitValueType = pmacc::math::Vector< float_64, numComponents >; - - /** Type of host-device buffer for field values - * - * The buffer is logically 1d, but technically multidimentional - * for easier coupling to output utilities. - */ - using Buffer = pmacc::GridBuffer< - ValueType, - simDim - >; - - /** Type of data box for field values on host and device - * - * The data box is logically 1d, but technically multidimentional - * for easier coupling to output utilities. - */ - using DataBoxType = pmacc::DataBox< - pmacc::PitchedBox< - ValueType, - simDim - > - >; - - //! Data box type used for PML fields in kernels - using OuterLayerBoxType = OuterLayerBox< - pmacc::DataBoxDim1Access< DataBoxType > - >; - - //! Size of supercell - using SuperCellSize = MappingDesc::SuperCellSize ; - - /** Create a field + /** Field position traits for checkpointing * - * @param cellDescription mapping for kernels - * @param globalThickness global PML thickness + * PML fields do not fit well, for now just copy the normal fields. + * Specialize only for Yee cell type, as this is the only one supported. */ - HINLINE Field( - MappingDesc const & cellDescription, - Thickness const & globalThickness - ); - - //! Get a reference to the host-device buffer for the field values - HINLINE Buffer & getGridBuffer( ); - - //! Get the grid layout - HINLINE pmacc::GridLayout< simDim > getGridLayout( ); - - //! Get the host data box for the field values - HINLINE DataBoxType getHostDataBox( ); - - //! Get the device data box for the field values - HINLINE DataBoxType getDeviceDataBox( ); - - //! Get the device outer layer data box for the field values - HINLINE OuterLayerBoxType getDeviceOuterLayerBox( ); - - /** Start asynchronous communication of field values - * - * @param serialEvent event to depend on - */ - HINLINE virtual EventTask asyncCommunication( EventTask serialEvent ); - - /** Reset the host-device buffer for field values - * - * @param currentStep index of time iteration - */ - HINLINE void reset( uint32_t currentStep ) override; - - //! Synchronize device data with host data - HINLINE void syncToDevice( ) override; - - //! Synchronize host data with device data - HINLINE void synchronize( ) override; - - private: - - //! Host-device buffer for field values - std::unique_ptr< Buffer > data; - - //! Grid layout for normal (non-PML) fields - pmacc::GridLayout< simDim > gridLayout; - - // PML global thickness - Thickness globalThickness; - - }; - - //! Data box type used for PML fields in kernels - using FieldBox = Field::OuterLayerBoxType; - - /** Representation of the additinal electric field components in PML - * - * Stores field values on host and device and provides data synchronization - * between them. - * - * Implements interfaces defined by SimulationFieldHelper< MappingDesc > and - * ISimulationData. - */ - class FieldE : public Field - { - public: - - /** Create a field - * - * @param cellDescription mapping for kernels - * @param globalThickness global PML thickness - */ - HINLINE FieldE( - MappingDesc const & cellDescription, - Thickness const & globalThickness - ): - Field( - cellDescription, - globalThickness - ) - { - } - - //! Get id - HINLINE SimulationDataId getUniqueId( ) - { - return getName( ); - } - - //! Get units of field components - HDINLINE static UnitValueType getUnit( ) + template + struct FieldPosition + : FieldPosition { - return UnitValueType::create( UNIT_EFIELD ); - } + }; - /** Get unit representation as powers of the 7 base measures + /** Field position traits for checkpointing * - * Characterizing the record's unit in SI - * (length L, mass M, time T, electric current I, - * thermodynamic temperature theta, amount of substance N, - * luminous intensity J) + * PML fields do not fit well, for now just copy the normal fields. + * Specialize only for Yee cell type, as this is the only one supported. */ - HINLINE static std::vector< float_64 > getUnitDimension( ) + template + struct FieldPosition + : FieldPosition { - return picongpu::FieldE::getUnitDimension( ); - } - - //! Get text name - HINLINE static std::string getName( ) - { - return "Convolutional PML E"; - } - - }; - - /** Representation of the additinal magnetic field components in PML - * - * Stores field values on host and device and provides data synchronization - * between them. - * - * Implements interfaces defined by SimulationFieldHelper< MappingDesc > and - * ISimulationData. - */ - class FieldB : public Field - { - public: + }; - /** Create a field - * - * @param cellDescription mapping for kernels - * @param globalThickness global PML thickness + /** Field domain boundness trait for output and checkpointing: + * PML fields are not domain-bound */ - HINLINE FieldB( - MappingDesc const & cellDescription, - Thickness const & globalThickness - ): - Field( - cellDescription, - globalThickness - ) - { - } - - //! Get id - HINLINE SimulationDataId getUniqueId( ) + template<> + struct IsFieldDomainBound : std::false_type { - return getName( ); - } - - //! Get units of field components - HDINLINE static UnitValueType getUnit( ) - { - return UnitValueType::create( UNIT_BFIELD ); - } + }; - /** Get unit representation as powers of the 7 base measures - * - * Characterizing the record's unit in SI - * (length L, mass M, time T, electric current I, - * thermodynamic temperature theta, amount of substance N, - * luminous intensity J) + /** Field domain boundness trait for output and checkpointing: + * PML fields are not domain-bound */ - HINLINE static std::vector< float_64 > getUnitDimension( ) - { - return picongpu::FieldB::getUnitDimension( ); - } - - //! Get text name - HINLINE static std::string getName( ) + template<> + struct IsFieldDomainBound : std::false_type { - return "Convolutional PML B"; - } - - }; - -} // namespace yeePML -} // namespace maxwellSolver -} // namespace fields - -namespace traits -{ - - /** Field position traits for checkpointing - * - * PML fields do not fit well, for now just copy the normal fields. - * Specialize only for Yee cell type, as this is the only one supported. - */ - template< uint32_t T_dim > - struct FieldPosition< - fields::cellType::Yee, - fields::maxwellSolver::yeePML::FieldE, - T_dim - > : FieldPosition< - fields::cellType::Yee, - FieldE, - T_dim - > - { - }; - - /** Field position traits for checkpointing - * - * PML fields do not fit well, for now just copy the normal fields. - * Specialize only for Yee cell type, as this is the only one supported. - */ - template< uint32_t T_dim > - struct FieldPosition< - fields::cellType::Yee, - fields::maxwellSolver::yeePML::FieldB, - T_dim - > : FieldPosition< - fields::cellType::Yee, - FieldB, - T_dim - > - { - }; - - /** Field domain boundness trait for output and checkpointing: - * PML fields are not domain-bound - */ - template< > - struct IsFieldDomainBound< fields::maxwellSolver::yeePML::FieldE > : - std::false_type - { - }; - - /** Field domain boundness trait for output and checkpointing: - * PML fields are not domain-bound - */ - template< > - struct IsFieldDomainBound< fields::maxwellSolver::yeePML::FieldB > : - std::false_type - { - }; + }; -} // namespace traits + } // namespace traits } // namespace picongpu namespace pmacc { -namespace traits -{ - - //! Node value traits for checkpointing - template< > - struct GetComponentsType< - picongpu::fields::maxwellSolver::yeePML::NodeValues, - false - > - { - typedef picongpu::float_X type; - }; - - //! Node value traits for checkpointing - template< > - struct GetNComponents< - picongpu::fields::maxwellSolver::yeePML::NodeValues, - false - > + namespace traits { - static constexpr uint32_t value = - picongpu::fields::maxwellSolver::yeePML::NodeValues::numComponents; - }; + //! Node value traits for checkpointing + template<> + struct GetComponentsType + { + typedef picongpu::float_X type; + }; + + //! Node value traits for checkpointing + template<> + struct GetNComponents + { + static constexpr uint32_t value = picongpu::fields::maxwellSolver::yeePML::NodeValues::numComponents; + }; -} // namespace traits + } // namespace traits } // namespace pmacc diff --git a/include/picongpu/fields/MaxwellSolver/YeePML/Field.tpp b/include/picongpu/fields/MaxwellSolver/YeePML/Field.tpp index 63d98e0ff8..a80ecb4689 100644 --- a/include/picongpu/fields/MaxwellSolver/YeePML/Field.tpp +++ b/include/picongpu/fields/MaxwellSolver/YeePML/Field.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch, Benjamin Worpitz, Sergei Bastrakov * * This file is part of PIConGPU. @@ -31,315 +31,266 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ -namespace yeePML -{ - - namespace detail + namespace fields { - - /** Construct an simDim-dimensional index out of 3 components. - * - * For 2d z is ignored - * - * @param x x component - * @param y y component - * @param z z component - */ - HDINLINE pmacc::DataSpace< simDim > makeIdx( - int const x, - int const y, - int const z - ) - { - auto const idx = pmacc::DataSpace< 3 >{ x, y, z }; - pmacc::DataSpace< simDim > result; - for( uint32_t dim = 0u; dim < simDim; dim++ ) - result[ dim ] = idx[ dim ]; - return result; - } - - /** Get linear size of the outer layer box - * - * @param gridLayout grid layout, as for normal fields - * @param globalThickness global PML thickness - */ - HDINLINE int getOuterLayerBoxLinearSize( - GridLayout< simDim > const & gridLayout, - Thickness const & globalThickness - ) + namespace maxwellSolver { - // All sizes are without guard, since Pml is only on the internal area - auto const gridDataSpace = gridLayout.getDataSpaceWithoutGuarding( ); - auto const nonPmlDataSpace = gridDataSpace - - ( globalThickness.positiveBorder + globalThickness.negativeBorder ); - auto const numGridCells = gridDataSpace.productOfComponents( ); - auto const numNonPmlCells = nonPmlDataSpace.productOfComponents( ); - return numGridCells - numNonPmlCells; - } - - } // namespace detail - - HDINLINE NodeValues::NodeValues( float_X const initialValue /* = 0._X */ ): - xy( initialValue ), - xz( initialValue ), - yx( initialValue ), - yz( initialValue ), - zx( initialValue ), - zy( initialValue ) - { - } - - HDINLINE const NodeValues NodeValues::create( - float_X const initialValue - ) - { - return NodeValues{ initialValue }; - } - - float_X & NodeValues::operator[ ]( uint32_t const idx ) - { - // Here it is safe to call the const version - auto constThis = const_cast< NodeValues const * >( this ); - return const_cast< float_X & >( ( *constThis )[ idx ] ); - } - - float_X const & NodeValues::operator[ ]( uint32_t const idx ) const - { - return *( &xy + idx ); - } - - template< typename T_Value > - OuterLayerBox< T_Value >::OuterLayerBox( - GridLayout< simDim > const & gridLayout, - Thickness const & globalThickness, - DataBox box - ): - guardSize( gridLayout.getGuard( ) ), - box( box ) - { - auto const negativeSize = globalThickness.negativeBorder; - auto const positiveSize = globalThickness.positiveBorder; - /* The region of interest is grid without guard, - * which consists of PML and internal area - */ - auto const gridSize = gridLayout.getDataSpaceWithoutGuarding( ); - auto const positiveBegin = gridSize - positiveSize; - - // Note: since this should compile for 2d, .z( ) can't be used - using detail::makeIdx; - int layerIdx = 0; - if( simDim == 3 ) - { - auto const negativeZLayer = Layer{ - makeIdx( 0, 0, 0 ), - makeIdx( gridSize[ 0 ], gridSize[ 1 ], negativeSize[ 2 ] ) - }; - layers[ layerIdx++ ] = negativeZLayer; - auto const positiveZLayer = Layer{ - makeIdx( 0, 0, positiveBegin[ 2 ] ), - makeIdx( gridSize[ 0 ], gridSize[ 1 ], gridSize[ 2 ] ) - }; - layers[ layerIdx++ ] = positiveZLayer; - } - - auto const negativeYLayer = Layer{ - makeIdx( 0, 0, negativeSize[ 2 ] ), - makeIdx( gridSize[ 0 ], negativeSize[ 1 ], positiveBegin[ 2 ] ) - }; - layers[ layerIdx++ ] = negativeYLayer; - auto const positiveYLayer = Layer{ - makeIdx( 0, positiveBegin[ 1 ], negativeSize[ 2 ] ), - makeIdx( gridSize[ 0 ], gridSize[ 1 ], positiveBegin[ 2 ] ) - }; - layers[ layerIdx++ ] = positiveYLayer; - - auto const negativeXLayer = Layer{ - makeIdx( 0, negativeSize[ 1 ], negativeSize[ 2 ] ), - makeIdx( negativeSize[ 0 ], positiveBegin[ 1 ], positiveBegin[ 2 ] ) - }; - layers[ layerIdx++ ] = negativeXLayer; - auto const positiveXLayer = Layer{ - makeIdx( positiveBegin[ 0 ], negativeSize[ 1 ], negativeSize[ 2 ] ), - makeIdx( gridSize[ 0 ], positiveBegin[ 1 ], positiveBegin[ 2 ] ) - }; - layers[ layerIdx++ ] = positiveXLayer; - } - - template< typename T_Value > - HDINLINE typename OuterLayerBox< T_Value >::ValueType const & - OuterLayerBox< T_Value >::operator( )( Idx const & idx ) const - { - return box( - getLinearIdx( idx ) - ); - } - - template< typename T_Value > - HDINLINE typename OuterLayerBox< T_Value >::ValueType & - OuterLayerBox< T_Value >::operator( )( Idx const & idx ) - { - return box( - getLinearIdx( idx ) - ); - } - - template< typename T_Value > - HDINLINE int OuterLayerBox< T_Value >::getLinearIdx( - Idx const & idxWithGuard - ) const - { - /* Each PML layer provide a contiguous 1d index range. - * The resulting index is a sum of the baseIdx representing the total - * size of all previous layers and an index inside the current layer. - */ - auto const idx = idxWithGuard - guardSize; - int currentLayerBeginIdx = 0; - int result = -1; - for( Layer const & layer : layers ) - if( layer.contains( idx ) ) + namespace yeePML { - /* Note: here we could have returned the result directly, - * but chose to have a single return for potential - * performance gains on GPU. The break is not required, - * since each valid index belonds to exactly one layer. - */ - result = currentLayerBeginIdx + layer.getLinearIdx( idx ); - break; - } - else - currentLayerBeginIdx += layer.getVolume( ); - return result; - } - - template< typename T_Value > - HDINLINE OuterLayerBox< T_Value >::Layer::Layer( - Idx const & beginIdx, - Idx const & endIdx - ): - beginIdx{ beginIdx }, - size{ endIdx - beginIdx }, - volume{ size.productOfComponents( ) } - { - } - - template< typename T_Value > - HDINLINE bool OuterLayerBox< T_Value >::Layer::contains( - Idx const & idx - ) const - { - for( uint32_t dim = 0u; dim < simDim; dim++ ) - if( ( idx[ dim ] < beginIdx[ dim ] ) || - ( idx[ dim ] >= beginIdx[ dim ] + size[ dim ] ) ) - return false; - return true; - } - - template< typename T_Value > - HDINLINE int OuterLayerBox< T_Value >::Layer::getVolume( ) const - { - return volume; - } - - template< typename T_Value > - HDINLINE int OuterLayerBox< T_Value >::Layer::getLinearIdx( - Idx const & idx - ) const - { - // Convert to 3d zero-based index, for 2d keep .z( ) == 0 - pmacc::DataSpace< 3 > zeroBasedIdx{ 0, 0, 0 }; - for( uint32_t dim = 0u; dim < simDim; dim++ ) - zeroBasedIdx[ dim ] = idx[ dim ] - beginIdx[ dim ]; - return zeroBasedIdx.x( ) + zeroBasedIdx.y( ) * size.x( ) + - zeroBasedIdx.z( ) * size.y( ) * size.x( ); - } - - Field::Field( - MappingDesc const & cellDescription, - Thickness const & globalThickness ) : - SimulationFieldHelper< MappingDesc >( cellDescription ), - gridLayout( cellDescription.getGridLayout( ) ), - globalThickness( globalThickness ) - { - /* Create a simDim-dimentional buffer - * with size = linearSize x 1 [x 1 for 3d] - */ - auto size = pmacc::DataSpace< simDim >::create( 1 ); - size[ 0 ] = detail::getOuterLayerBoxLinearSize( - gridLayout, - globalThickness - ); - auto const guardSize = pmacc::DataSpace< simDim >::create( 0 ); - auto const layout = pmacc::GridLayout< simDim >( - size, - guardSize - ); - data.reset( - new Buffer( layout ) - ); - } - - Field::Buffer & Field::getGridBuffer( ) - { - return *data; - } - - pmacc::GridLayout< simDim > Field::getGridLayout( ) - { - return data->getGridLayout( ); - } - - Field::DataBoxType Field::getHostDataBox( ) - { - return data->getHostBuffer( ).getDataBox( ); - } - - Field::DataBoxType Field::getDeviceDataBox( ) - { - return data->getDeviceBuffer( ).getDataBox( ); - } - - Field::OuterLayerBoxType Field::getDeviceOuterLayerBox( ) - { - auto const boxWrapper1d = pmacc::DataBoxDim1Access< DataBoxType >{ - getDeviceDataBox( ), - data->getGridLayout( ).getDataSpace( ) - }; - /* Note: the outer layer box type just provides access to data, - * it does not own or make copy of the data (nor is that required) - */ - return OuterLayerBoxType{ - gridLayout, - globalThickness, - boxWrapper1d - }; - } - - EventTask Field::asyncCommunication( EventTask serialEvent ) - { - return data->asyncCommunication( serialEvent ); - } - - void Field::reset( uint32_t ) - { - data->getHostBuffer( ).reset( true ); - data->getDeviceBuffer( ).reset( false ); - } - - void Field::syncToDevice( ) - { - data->hostToDevice( ); - } - - void Field::synchronize( ) - { - data->deviceToHost( ); - } - -} // namespace yeePML -} // namespace maxwellSolver -} // namespace fields + namespace detail + { + /** Construct an simDim-dimensional index out of 3 components. + * + * For 2d z is ignored + * + * @param x x component + * @param y y component + * @param z z component + */ + HDINLINE pmacc::DataSpace makeIdx(int const x, int const y, int const z) + { + auto const idx = pmacc::DataSpace<3>{x, y, z}; + pmacc::DataSpace result; + for(uint32_t dim = 0u; dim < simDim; dim++) + result[dim] = idx[dim]; + return result; + } + + /** Get linear size of the outer layer box + * + * @param gridLayout grid layout, as for normal fields + * @param globalThickness global PML thickness + */ + HDINLINE int getOuterLayerBoxLinearSize( + GridLayout const& gridLayout, + Thickness const& globalThickness) + { + // All sizes are without guard, since Pml is only on the internal area + auto const gridDataSpace = gridLayout.getDataSpaceWithoutGuarding(); + auto const nonPmlDataSpace + = gridDataSpace - (globalThickness.positiveBorder + globalThickness.negativeBorder); + auto const numGridCells = gridDataSpace.productOfComponents(); + auto const numNonPmlCells = nonPmlDataSpace.productOfComponents(); + return numGridCells - numNonPmlCells; + } + + } // namespace detail + + HDINLINE NodeValues::NodeValues(float_X const initialValue /* = 0._X */) + : xy(initialValue) + , xz(initialValue) + , yx(initialValue) + , yz(initialValue) + , zx(initialValue) + , zy(initialValue) + { + } + + HDINLINE const NodeValues NodeValues::create(float_X const initialValue) + { + return NodeValues{initialValue}; + } + + float_X& NodeValues::operator[](uint32_t const idx) + { + // Here it is safe to call the const version + auto constThis = const_cast(this); + return const_cast((*constThis)[idx]); + } + + float_X const& NodeValues::operator[](uint32_t const idx) const + { + return *(&xy + idx); + } + + template + OuterLayerBox::OuterLayerBox( + GridLayout const& gridLayout, + Thickness const& globalThickness, + DataBox box) + : guardSize(gridLayout.getGuard()) + , box(box) + { + auto const negativeSize = globalThickness.negativeBorder; + auto const positiveSize = globalThickness.positiveBorder; + /* The region of interest is grid without guard, + * which consists of PML and internal area + */ + auto const gridSize = gridLayout.getDataSpaceWithoutGuarding(); + auto const positiveBegin = gridSize - positiveSize; + + // Note: since this should compile for 2d, .z( ) can't be used + using detail::makeIdx; + int layerIdx = 0; + if(simDim == 3) + { + auto const negativeZLayer + = Layer{makeIdx(0, 0, 0), makeIdx(gridSize[0], gridSize[1], negativeSize[2])}; + layers[layerIdx++] = negativeZLayer; + auto const positiveZLayer + = Layer{makeIdx(0, 0, positiveBegin[2]), makeIdx(gridSize[0], gridSize[1], gridSize[2])}; + layers[layerIdx++] = positiveZLayer; + } + + auto const negativeYLayer = Layer{ + makeIdx(0, 0, negativeSize[2]), + makeIdx(gridSize[0], negativeSize[1], positiveBegin[2])}; + layers[layerIdx++] = negativeYLayer; + auto const positiveYLayer = Layer{ + makeIdx(0, positiveBegin[1], negativeSize[2]), + makeIdx(gridSize[0], gridSize[1], positiveBegin[2])}; + layers[layerIdx++] = positiveYLayer; + + auto const negativeXLayer = Layer{ + makeIdx(0, negativeSize[1], negativeSize[2]), + makeIdx(negativeSize[0], positiveBegin[1], positiveBegin[2])}; + layers[layerIdx++] = negativeXLayer; + auto const positiveXLayer = Layer{ + makeIdx(positiveBegin[0], negativeSize[1], negativeSize[2]), + makeIdx(gridSize[0], positiveBegin[1], positiveBegin[2])}; + layers[layerIdx++] = positiveXLayer; + } + + template + HDINLINE typename OuterLayerBox::ValueType const& OuterLayerBox::operator()( + Idx const& idx) const + { + return box(getLinearIdx(idx)); + } + + template + HDINLINE typename OuterLayerBox::ValueType& OuterLayerBox::operator()(Idx const& idx) + { + return box(getLinearIdx(idx)); + } + + template + HDINLINE int OuterLayerBox::getLinearIdx(Idx const& idxWithGuard) const + { + /* Each PML layer provide a contiguous 1d index range. + * The resulting index is a sum of the baseIdx representing the total + * size of all previous layers and an index inside the current layer. + */ + auto const idx = idxWithGuard - guardSize; + int currentLayerBeginIdx = 0; + int result = -1; + for(Layer const& layer : layers) + if(layer.contains(idx)) + { + /* Note: here we could have returned the result directly, + * but chose to have a single return for potential + * performance gains on GPU. The break is not required, + * since each valid index belonds to exactly one layer. + */ + result = currentLayerBeginIdx + layer.getLinearIdx(idx); + break; + } + else + currentLayerBeginIdx += layer.getVolume(); + return result; + } + + template + HDINLINE OuterLayerBox::Layer::Layer(Idx const& beginIdx, Idx const& endIdx) + : beginIdx{beginIdx} + , size{endIdx - beginIdx} + , volume{size.productOfComponents()} + { + } + + template + HDINLINE bool OuterLayerBox::Layer::contains(Idx const& idx) const + { + for(uint32_t dim = 0u; dim < simDim; dim++) + if((idx[dim] < beginIdx[dim]) || (idx[dim] >= beginIdx[dim] + size[dim])) + return false; + return true; + } + + template + HDINLINE int OuterLayerBox::Layer::getVolume() const + { + return volume; + } + + template + HDINLINE int OuterLayerBox::Layer::getLinearIdx(Idx const& idx) const + { + // Convert to 3d zero-based index, for 2d keep .z( ) == 0 + pmacc::DataSpace<3> zeroBasedIdx{0, 0, 0}; + for(uint32_t dim = 0u; dim < simDim; dim++) + zeroBasedIdx[dim] = idx[dim] - beginIdx[dim]; + return zeroBasedIdx.x() + zeroBasedIdx.y() * size.x() + zeroBasedIdx.z() * size.y() * size.x(); + } + + Field::Field(MappingDesc const& cellDescription, Thickness const& globalThickness) + : SimulationFieldHelper(cellDescription) + , gridLayout(cellDescription.getGridLayout()) + , globalThickness(globalThickness) + { + /* Create a simDim-dimentional buffer + * with size = linearSize x 1 [x 1 for 3d] + */ + auto size = pmacc::DataSpace::create(1); + size[0] = detail::getOuterLayerBoxLinearSize(gridLayout, globalThickness); + auto const guardSize = pmacc::DataSpace::create(0); + auto const layout = pmacc::GridLayout(size, guardSize); + data.reset(new Buffer(layout)); + } + + Field::Buffer& Field::getGridBuffer() + { + return *data; + } + + pmacc::GridLayout Field::getGridLayout() + { + return data->getGridLayout(); + } + + Field::DataBoxType Field::getHostDataBox() + { + return data->getHostBuffer().getDataBox(); + } + + Field::DataBoxType Field::getDeviceDataBox() + { + return data->getDeviceBuffer().getDataBox(); + } + + Field::OuterLayerBoxType Field::getDeviceOuterLayerBox() + { + auto const boxWrapper1d = pmacc::DataBoxDim1Access{ + getDeviceDataBox(), + data->getGridLayout().getDataSpace()}; + /* Note: the outer layer box type just provides access to data, + * it does not own or make copy of the data (nor is that required) + */ + return OuterLayerBoxType{gridLayout, globalThickness, boxWrapper1d}; + } + + EventTask Field::asyncCommunication(EventTask serialEvent) + { + return data->asyncCommunication(serialEvent); + } + + void Field::reset(uint32_t) + { + data->getHostBuffer().reset(true); + data->getDeviceBuffer().reset(false); + } + + void Field::syncToDevice() + { + data->hostToDevice(); + } + + void Field::synchronize() + { + data->deviceToHost(); + } + + } // namespace yeePML + } // namespace maxwellSolver + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/YeePML/Parameters.hpp b/include/picongpu/fields/MaxwellSolver/YeePML/Parameters.hpp index 4daa44d34d..6deba68a9e 100644 --- a/include/picongpu/fields/MaxwellSolver/YeePML/Parameters.hpp +++ b/include/picongpu/fields/MaxwellSolver/YeePML/Parameters.hpp @@ -1,4 +1,4 @@ -/* Copyright 2019-2020 Sergei Bastrakov +/* Copyright 2019-2021 Sergei Bastrakov * * This file is part of PIConGPU. * @@ -29,94 +29,89 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ -namespace yeePML -{ - - /** Parameters of PML, except thickness - * - * A detailed description and recommended ranges are given in pml.param, - * normalizations and unit conversions in pml.unitless. - */ - struct Parameters + namespace fields { - /** Max value of artificial electric conductivity - * - * Components correspond to directions. Normalized, so that - * normalizedSigma = sigma / eps0 = sigma* / mue0. - * Unit: 1/unit_time in PIC units - */ - floatD_X normalizedSigmaMax; + namespace maxwellSolver + { + namespace yeePML + { + /** Parameters of PML, except thickness + * + * A detailed description and recommended ranges are given in pml.param, + * normalizations and unit conversions in pml.unitless. + */ + struct Parameters + { + /** Max value of artificial electric conductivity + * + * Components correspond to directions. Normalized, so that + * normalizedSigma = sigma / eps0 = sigma* / mue0. + * Unit: 1/unit_time in PIC units + */ + floatD_X normalizedSigmaMax; - /** Order of polynomial growth of sigma and kappa - * - * The growth is from PML internal boundary to the external boundary. - * Sigma grows from 0, kappa from 1, both to their max values. - */ - float_X sigmaKappaGradingOrder; + /** Order of polynomial growth of sigma and kappa + * + * The growth is from PML internal boundary to the external boundary. + * Sigma grows from 0, kappa from 1, both to their max values. + */ + float_X sigmaKappaGradingOrder; - /** Max value of coordinate stretching coefficient - * - * Unitless. - */ - floatD_X kappaMax; + /** Max value of coordinate stretching coefficient + * + * Unitless. + */ + floatD_X kappaMax; - /** Max value of complex frequency shift - * - * Components correspond to directions. Normalized by eps0. - * Unit: 1/unit_time in PIC units - */ - floatD_X normalizedAlphaMax; + /** Max value of complex frequency shift + * + * Components correspond to directions. Normalized by eps0. + * Unit: 1/unit_time in PIC units + */ + floatD_X normalizedAlphaMax; - /** Order of polynomial growth of alpha - * - * The growth is from PML external boundary to the internal boundary. - * Grows from 0 to the max value. - */ - float_X alphaGradingOrder; - }; + /** Order of polynomial growth of alpha + * + * The growth is from PML external boundary to the internal boundary. + * Grows from 0 to the max value. + */ + float_X alphaGradingOrder; + }; - //! Thickness of PML at each border, in number of cells - struct Thickness - { - //! Negative border is at the local domain sides minimum in coordinates - DataSpace< simDim > negativeBorder; - //! Positive border is at the local domain sides maximum in coordinates - DataSpace< simDim > positiveBorder; + //! Thickness of PML at each border, in number of cells + struct Thickness + { + //! Negative border is at the local domain sides minimum in coordinates + DataSpace negativeBorder; + //! Positive border is at the local domain sides maximum in coordinates + DataSpace positiveBorder; - /** Element access with indexing used in the .param file - * - * This is only for initialization convenience and so does not have - * a device version. Since this is not performance-critical at all, - * do range checks on parameters. - * - * @param axis 0 = x, 1 = y, 2 = z - * @param direction 0 = negative, 1 = positive - */ - int & operator()( uint32_t const axis, uint32_t const direction ) - { - if( axis >= simDim ) - throw std::out_of_range( - "In Thickness::operator() the axis = " + - std::to_string( axis ) + " is invalid" - ); - if( direction == 0 ) - return negativeBorder[ axis ]; - else - if( direction == 1 ) - return positiveBorder[ axis ]; - else - throw std::out_of_range( - "In Thickness::operator() the direction = " + - std::to_string( direction ) + " is invalid" - ); - } - }; + /** Element access with indexing used in the .param file + * + * This is only for initialization convenience and so does not have + * a device version. Since this is not performance-critical at all, + * do range checks on parameters. + * + * @param axis 0 = x, 1 = y, 2 = z + * @param direction 0 = negative, 1 = positive + */ + int& operator()(uint32_t const axis, uint32_t const direction) + { + if(axis >= simDim) + throw std::out_of_range( + "In Thickness::operator() the axis = " + std::to_string(axis) + " is invalid"); + if(direction == 0) + return negativeBorder[axis]; + else if(direction == 1) + return positiveBorder[axis]; + else + throw std::out_of_range( + "In Thickness::operator() the direction = " + std::to_string(direction) + + " is invalid"); + } + }; -} // namespace yeePML -} // namespace maxwellSolver -} // namespace fields + } // namespace yeePML + } // namespace maxwellSolver + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.def b/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.def index 2baf4c786b..8447344789 100644 --- a/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.def +++ b/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, * Sergei Bastrakov * * This file is part of PIConGPU. @@ -20,74 +20,38 @@ #pragma once -#include "picongpu/fields/MaxwellSolver/Yee/Curl.def" #include "picongpu/fields/MaxwellSolver/Yee/Yee.def" #include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ - - template< - typename T_CurrentInterpolation = currentInterpolation::None, - typename T_CurlE = yee::CurlRight, - typename T_CurlB = yee::CurlLeft - > - class YeePML; - -} // namespace maxwellSolver -} // namespace fields - -namespace traits -{ - - template< - typename T_CurrentInterpolation, - typename T_CurlE, - typename T_CurlB - > - struct GetMargin< - picongpu::fields::maxwellSolver::YeePML< - T_CurrentInterpolation, - T_CurlE, - T_CurlB - >, FIELD_B - > : public GetMargin< - picongpu::fields::maxwellSolver::Yee< - T_CurrentInterpolation, - T_CurlE, - T_CurlB - >, - FIELD_B - > + namespace fields { - }; - - template< - typename T_CurrentInterpolation, - typename T_CurlE, - typename T_CurlB - > - struct GetMargin< - picongpu::fields::maxwellSolver::YeePML< - T_CurrentInterpolation, - T_CurlE, - T_CurlB - >, FIELD_E - > : public GetMargin< - picongpu::fields::maxwellSolver::Yee< - T_CurrentInterpolation, - T_CurlE, - T_CurlB - >, - FIELD_E - > + namespace maxwellSolver + { + template< + typename T_CurrentInterpolation = currentInterpolation::None, + typename T_CurlE = yee::CurlRight, + typename T_CurlB = yee::CurlLeft> + class YeePML; + + } // namespace maxwellSolver + } // namespace fields + + namespace traits { - }; - -} //namespace traits + template + struct GetMargin, FIELD_B> + : public GetMargin, FIELD_B> + { + }; + + template + struct GetMargin, FIELD_E> + : public GetMargin, FIELD_E> + { + }; + + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.hpp b/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.hpp index 0f1fede572..7131ec46e6 100644 --- a/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.hpp +++ b/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.hpp @@ -1,5 +1,5 @@ -/* Copyright 2019-2020 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz, - * Sergei Bastrakov +/* Copyright 2019-2021 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz, + * Sergei Bastrakov, Klaus Steiniger * * This file is part of PIConGPU. * @@ -26,6 +26,7 @@ #include "picongpu/fields/MaxwellSolver/YeePML/Parameters.hpp" #include "picongpu/fields/MaxwellSolver/YeePML/YeePML.kernel" #include "picongpu/fields/cellType/Yee.hpp" +#include "picongpu/traits/GetMargin.hpp" #include @@ -35,419 +36,433 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ - - /* Note: the yeePML namespace is only used for details and not the YeePML - * itself in order to be consistent with other field solvers. - */ - namespace yeePML - { - namespace detail + namespace fields { - - /** Implementation of Yee + PML solver updates of E and B - * - * The original paper on this approach is J.A. Roden, S.D. Gedney. - * Convolution PML (CPML): An efficient FDTD implementation of the - * CFS - PML for arbitrary media. Microwave and optical technology - * letters. 27 (5), 334-339 (2000). - * https://doi.org/10.1002/1098-2760(20001205)27:5%3C334::AID-MOP14%3E3.0.CO;2-A - * Our implementation is based on a more detailed description in section - * 7.9 of the book A. Taflove, S.C. Hagness. Computational - * Electrodynamics. The Finite-Difference Time-Domain Method. Third - * Edition. Artech house, Boston (2005), referred to as - * [Taflove, Hagness]. - * - * @tparam T_CurlE functor to compute curl of E - * @tparam T_CurlB functor to compute curl of B - */ - template< - typename T_CurlE, - typename T_CurlB - > - class Solver + namespace maxwellSolver { - public: - - using CurlE = T_CurlE; - using CurlB = T_CurlB; - - Solver( MappingDesc const cellDescription ) : - cellDescription{ cellDescription } - { - initParameters( ); - initFields( ); - } - - //! Get a reference to field E - picongpu::FieldE & getFieldE( ) - { - return *( fieldE.get( ) ); - } - - //! Get a reference to field B - picongpu::FieldB & getFieldB( ) + /* Note: the yeePML namespace is only used for details and not the YeePML + * itself in order to be consistent with other field solvers. + */ + namespace yeePML { - return *( fieldB.get( ) ); - } - - /** Propagate B values in the given area by half a time step + namespace detail + { + /** Implementation of Yee + PML solver updates of E and B + * + * The original paper on this approach is J.A. Roden, S.D. Gedney. + * Convolution PML (CPML): An efficient FDTD implementation of the + * CFS - PML for arbitrary media. Microwave and optical technology + * letters. 27 (5), 334-339 (2000). + * https://doi.org/10.1002/1098-2760(20001205)27:5%3C334::AID-MOP14%3E3.0.CO;2-A + * Our implementation is based on a more detailed description in section + * 7.9 of the book A. Taflove, S.C. Hagness. Computational + * Electrodynamics. The Finite-Difference Time-Domain Method. Third + * Edition. Artech house, Boston (2005), referred to as + * [Taflove, Hagness]. + * + * @tparam T_CurlE functor to compute curl of E + * @tparam T_CurlB functor to compute curl of B + */ + template + class Solver + { + public: + using CurlE = T_CurlE; + using CurlB = T_CurlB; + + Solver(MappingDesc const cellDescription) : cellDescription{cellDescription} + { + initParameters(); + initFields(); + } + + //! Get a reference to field E + picongpu::FieldE& getFieldE() + { + return *(fieldE.get()); + } + + //! Get a reference to field B + picongpu::FieldB& getFieldB() + { + return *(fieldB.get()); + } + + /** Propagate B values in the given area by the first half of a time step + * + * This operation propagates grid values of field B by dt/2 and prepares the internal state of + * convolutional components so that calling updateBSecondHalf() afterwards competes the update. + * + * @tparam T_Area area to apply updates to, the curl must be applicable to all points; + * normally CORE, BORDER, or CORE + BORDER + * + * @param currentStep index of the current time iteration + */ + template + void updateBFirstHalf(uint32_t const currentStep) + { + updateBHalf(currentStep, true); + } + + /** Propagate B values in the given area by the second half of a time step + * + * This operation propagates grid values of field B by dt/2 and relies on the internal state of + * convolutional components set up by a prior call to updateBFirstHalf(). After this call is + * completed, the convolutional components are in the state to call updateBFirstHalf() for the + * next time step. + * + * @tparam T_Area area to apply updates to, the curl must be applicable to all points; + * normally CORE, BORDER, or CORE + BORDER + * + * @param currentStep index of the current time iteration + */ + template + void updateBSecondHalf(uint32_t const currentStep) + { + updateBHalf(currentStep, false); + } + + /** Propagate E values in the given area by a time step. + * + * @tparam T_Area area to apply updates to, the curl must be + * applicable to all points; normally CORE, BORDER, or CORE + BORDER + * + * @param currentStep index of the current time iteration + */ + template + void updateE(uint32_t currentStep) + { + /* Courant-Friedrichs-Levy-Condition for Yee Field Solver: + * + * A workaround is to add a template dependency to the expression. + * `sizeof(ANY_TYPE*) != 0` is always true and defers the evaluation. + */ + PMACC_CASSERT_MSG( + Courant_Friedrichs_Levy_condition_failure____check_your_grid_param_file, + (SPEED_OF_LIGHT * SPEED_OF_LIGHT * DELTA_T * DELTA_T * INV_CELL2_SUM) <= 1.0 + && sizeof(T_CurlE*) != 0); + + constexpr auto numWorkers = getNumWorkers(); + using Kernel = yeePML::KernelUpdateE>; + AreaMapper mapper{cellDescription}; + // Note: optimization considerations same as in updateBHalf( ). + PMACC_KERNEL(Kernel{}) + (mapper.getGridDim(), numWorkers)( + mapper, + getLocalParameters(mapper, currentStep), + CurlB(), + fieldB->getDeviceDataBox(), + fieldE->getDeviceDataBox(), + psiE->getDeviceOuterLayerBox()); + } + + private: + // Helper types for configuring kernels + template + using BlockDescription = pmacc::SuperCellDescription< + SuperCellSize, + typename traits::GetLowerMargin::type, + typename traits::GetUpperMargin::type>; + template + using AreaMapper = pmacc::AreaMapping; + + // Yee solver data + std::shared_ptr fieldE; + std::shared_ptr fieldB; + MappingDesc cellDescription; + + /* PML convolutional field data, defined as in [Taflove, Hagness], + * eq. (7.105a,b), and similar for other components + */ + std::shared_ptr psiE; + std::shared_ptr psiB; + + /** Thickness in terms of the global domain. + * + * We store only global thickness, as the local one can change + * during the simulation and so has to be recomputed for each time + * step. PML must be fully contained in a single layer of local + * domains near the global simulation area boundary. (Note that + * the domains of this layer might be changing, e.g. due to moving + * window.) There are no other limitations on PML thickness. In + * particular, it is independent of the BORDER area size. + */ + Thickness globalSize; + Parameters parameters; + + /** Propagate B values in the given area by half a time step + * + * @tparam T_Area area to apply updates to, the curl must be + * applicable to all points; normally CORE, BORDER, or CORE + BORDER + * + * @param currentStep index of the current time iteration + * @param updatePsiB whether convolutional magnetic fields need to be updated, or are + * up-to-date + */ + template + void updateBHalf(uint32_t const currentStep, bool const updatePsiB) + { + constexpr auto numWorkers = getNumWorkers(); + using Kernel = yeePML::KernelUpdateBHalf>; + AreaMapper mapper{cellDescription}; + /* Note: here it is possible to first check if PML is enabled + * in the local domain at all, and otherwise optimize by calling + * the normal Yee update kernel. We do not do that, as this + * would be fragile with respect to future separation of PML + * into a plugin. + */ + PMACC_KERNEL(Kernel{}) + (mapper.getGridDim(), numWorkers)( + mapper, + getLocalParameters(mapper, currentStep), + CurlE(), + fieldE->getDeviceDataBox(), + updatePsiB, + fieldB->getDeviceDataBox(), + psiB->getDeviceOuterLayerBox()); + } + + void initParameters() + { + namespace pml = maxwellSolver::Pml; + + globalSize = getGlobalThickness(); + parameters.sigmaKappaGradingOrder = pml::SIGMA_KAPPA_GRADING_ORDER; + parameters.alphaGradingOrder = pml::ALPHA_GRADING_ORDER; + for(uint32_t dim = 0u; dim < simDim; dim++) + { + parameters.normalizedSigmaMax[dim] = pml::NORMALIZED_SIGMA_MAX[dim]; + parameters.kappaMax[dim] = pml::KAPPA_MAX[dim]; + parameters.normalizedAlphaMax[dim] = pml::NORMALIZED_ALPHA_MAX[dim]; + } + } + + Thickness getGlobalThickness() const + { + Thickness globalThickness; + for(uint32_t axis = 0u; axis < simDim; axis++) + for(auto direction = 0; direction < 2; direction++) + globalThickness(axis, direction) = absorber::getGlobalThickness()(axis, direction); + return globalThickness; + } + + void initFields() + { + /* Split fields are created here (and not with normal E and B) + * in order to not waste memory in case PML is not used. + */ + DataConnector& dc = Environment<>::get().DataConnector(); + fieldE = dc.get(picongpu::FieldE::getName(), true); + fieldB = dc.get(picongpu::FieldB::getName(), true); + psiE = std::make_shared(cellDescription, globalSize); + psiB = std::make_shared(cellDescription, globalSize); + dc.share(psiE); + dc.share(psiB); + } + + template + yeePML::LocalParameters getLocalParameters( + AreaMapper& mapper, + uint32_t const currentStep) const + { + Thickness localThickness = getLocalThickness(currentStep); + checkLocalThickness(localThickness); + return yeePML::LocalParameters( + parameters, + localThickness, + mapper.getGridSuperCells() * SuperCellSize::toRT(), + mapper.getGuardingSuperCells() * SuperCellSize::toRT()); + } + + /** + * Get PML thickness for the local domain at the current time step. + * It depends on the current step because of the moving window. + */ + Thickness getLocalThickness(uint32_t const currentStep) const + { + /* The logic of the following checks is the same as in + * absorber::ExponentialDamping::run( ), to disable the absorber + * at a border we set the corresponding thickness to 0. + */ + auto& movingWindow = MovingWindow::getInstance(); + auto const numSlides = movingWindow.getSlideCounter(currentStep); + auto const numExchanges = NumberOfExchanges::value; + auto const communicationMask + = Environment::get().GridController().getCommunicationMask(); + Thickness localThickness = globalSize; + for(uint32_t exchange = 1u; exchange < numExchanges; ++exchange) + { + /* Here we are only interested in the positive and negative + * directions for x, y, z axes and not the "diagonal" ones. + * So skip other directions except left, right, top, bottom, + * back, front + */ + if(FRONT % exchange != 0) + continue; + + // Transform exchange into a pair of axis and direction + uint32_t axis = 0; + if(exchange >= BOTTOM && exchange <= TOP) + axis = 1; + if(exchange >= BACK) + axis = 2; + uint32_t direction = exchange % 2; + + // No PML at the borders between two local domains + bool hasNeighbour = communicationMask.isSet(exchange); + if(hasNeighbour) + localThickness(axis, direction) = 0; + + // Disable PML during laser initialization + if(fields::laserProfiles::Selected::initPlaneY == 0) + { + bool isLaserInitializationOver + = (currentStep * DELTA_T) >= fields::laserProfiles::Selected::INIT_TIME; + if(numSlides == 0 && !isLaserInitializationOver && exchange == TOP) + localThickness(axis, direction) = 0; + } + + // Disable PML at the far side of the moving window + if(movingWindow.isSlidingWindowActive(currentStep) && exchange == BOTTOM) + localThickness(axis, direction) = 0; + } + return localThickness; + } + + //! Verify that PML fits the local domain + void checkLocalThickness(Thickness const localThickness) const + { + auto const localDomain = Environment::get().SubGrid().getLocalDomain(); + auto const localPMLSize = localThickness.negativeBorder + localThickness.positiveBorder; + auto pmlFitsDomain = true; + for(uint32_t dim = 0u; dim < simDim; dim++) + if(localPMLSize[dim] > localDomain.size[dim]) + pmlFitsDomain = false; + if(!pmlFitsDomain) + throw std::out_of_range("Requested PML size exceeds the local domain"); + } + + //! Get number of workers for kernels + static constexpr uint32_t getNumWorkers() + { + return pmacc::traits::GetNumWorkers< + pmacc::math::CT::volume::type::value>::value; + } + }; + + } // namespace detail + } // namespace yeePML + + /** Yee field solver with perfectly matched layer (PML) absorber * - * @tparam T_Area area to apply updates to, the curl must be - * applicable to all points; normally CORE, BORDER, or CORE + BORDER + * Absorption is done using convolutional perfectly matched layer (CPML), + * implemented according to [Taflove, Hagness]. * - * @param currentStep index of the current time iteration - */ - template< uint32_t T_Area > - void updateBHalf( uint32_t const currentStep ) - { - constexpr auto numWorkers = getNumWorkers( ); - using Kernel = yeePML::KernelUpdateBHalf< - numWorkers, - BlockDescription< CurlE > - >; - AreaMapper< T_Area > mapper{ cellDescription }; - /* Note: here it is possible to first check if PML is enabled - * in the local domain at all, and otherwise optimize by calling - * the normal Yee update kernel. We do not do that, as this - * would be fragile with respect to future separation of PML - * into a plugin. - */ - PMACC_KERNEL( Kernel{ } ) - ( mapper.getGridDim( ), numWorkers )( - mapper, - getLocalParameters( mapper, currentStep ), - CurlE( ), - fieldE->getDeviceDataBox( ), - fieldB->getDeviceDataBox( ), - psiB->getDeviceOuterLayerBox( ) - ); - } - - /** Propagate E values in the given area by a time step. + * This class template is a public interface to be used, e.g. in .param + * files and is compatible with other field solvers. Parameters of PML + * are taken from pml.param, pml.unitless. * - * @tparam T_Area area to apply updates to, the curl must be - * applicable to all points; normally CORE, BORDER, or CORE + BORDER + * Enabling this solver results in more memory being used on a device: + * 12 additional scalar field values per each grid cell of a local domain. + * Another limitation is not full persistency with checkpointing: the + * additional values are not saved and so set to 0 after loading a + * checkpoint (which in some cases still provides proper absorption, but + * it is not guaranteed and results will differ due to checkpointing). * - * @param currentStep index of the current time iteration - */ - template< uint32_t T_Area > - void updateE( uint32_t currentStep ) - { - /* Courant-Friedrichs-Levy-Condition for Yee Field Solver: */ - PMACC_CASSERT_MSG(Courant_Friedrichs_Levy_condition_failure____check_your_grid_param_file, - (SPEED_OF_LIGHT*SPEED_OF_LIGHT*DELTA_T*DELTA_T*INV_CELL2_SUM)<=1.0); - - constexpr auto numWorkers = getNumWorkers( ); - using Kernel = yeePML::KernelUpdateE< - numWorkers, - BlockDescription< CurlB > - >; - AreaMapper< T_Area > mapper{ cellDescription }; - // Note: optimization considerations same as in updateBHalf( ). - PMACC_KERNEL( Kernel{ } ) - ( mapper.getGridDim( ), numWorkers )( - mapper, - getLocalParameters( mapper, currentStep ), - CurlB( ), - fieldB->getDeviceDataBox( ), - fieldE->getDeviceDataBox( ), - psiE->getDeviceOuterLayerBox( ) - ); - } - - private: - - // Helper types for configuring kernels - template< typename T_Curl > - using BlockDescription = pmacc::SuperCellDescription< - SuperCellSize, - typename T_Curl::LowerMargin, - typename T_Curl::UpperMargin - >; - template< uint32_t T_Area > - using AreaMapper = pmacc::AreaMapping< - T_Area, - MappingDesc - >; - - // Yee solver data - std::shared_ptr< picongpu::FieldE > fieldE; - std::shared_ptr< picongpu::FieldB > fieldB; - MappingDesc cellDescription; - - /* PML convolutional field data, defined as in [Taflove, Hagness], - * eq. (7.105a,b), and similar for other components - */ - std::shared_ptr< yeePML::FieldE > psiE; - std::shared_ptr< yeePML::FieldB > psiB; - - /** Thickness in terms of the global domain. + * This class template implements the general flow of CORE and BORDER field + * updates and communication. The numerical schemes to perform the updates + * are implemented by yeePML::detail::Solver. * - * We store only global thickness, as the local one can change - * during the simulation and so has to be recomputed for each time - * step. PML must be fully contained in a single layer of local - * domains near the global simulation area boundary. (Note that - * the domains of this layer might be changing, e.g. due to moving - * window.) There are no other limitations on PML thickness. In - * particular, it is independent of the BORDER area size. + * @tparam T_CurrentInterpolation current interpolation functor + * @tparam T_CurlE functor to compute curl of E + * @tparam T_CurlB functor to compute curl of B */ - Thickness globalSize; - Parameters parameters; - - void initParameters( ) + template + class YeePML { - globalSize = getGlobalThickness( ); - parameters.sigmaKappaGradingOrder = SIGMA_KAPPA_GRADING_ORDER; - parameters.alphaGradingOrder = ALPHA_GRADING_ORDER; - for( uint32_t dim = 0u; dim < simDim; dim++ ) + public: + // Types required by field solver interface + using CellType = cellType::Yee; + using CurrentInterpolation = T_CurrentInterpolation; + using CurlE = T_CurlE; + using CurlB = T_CurlB; + + YeePML(MappingDesc const cellDescription) : solver(cellDescription) { - parameters.normalizedSigmaMax[ dim ] = NORMALIZED_SIGMA_MAX[ dim ]; - parameters.kappaMax[ dim ] = KAPPA_MAX[ dim ]; - parameters.normalizedAlphaMax[ dim ] = NORMALIZED_ALPHA_MAX[ dim ]; } - } - Thickness getGlobalThickness( ) const - { - Thickness globalThickness; - for( uint32_t axis = 0u; axis < simDim; axis++ ) - for( auto direction = 0; direction < 2; direction++ ) - globalThickness( axis, direction ) = absorber::numCells[ axis ][ direction ]; - return globalThickness; - } - - void initFields( ) - { - /* Split fields are created here (and not with normal E and B) - * in order to not waste memory in case PML is not used. + /** Perform the first part of E and B propagation by a time step. + * + * Together with update_afterCurrent( ) forms the full propagation. + * + * @param currentStep index of the current time iteration */ - DataConnector & dc = Environment<>::get( ).DataConnector( ); - fieldE = dc.get< picongpu::FieldE >( - picongpu::FieldE::getName( ), - true - ); - fieldB = dc.get< picongpu::FieldB >( - picongpu::FieldB::getName( ), - true - ); - psiE = std::make_shared< yeePML::FieldE >( - cellDescription, - globalSize - ); - psiB = std::make_shared< yeePML::FieldB >( - cellDescription, - globalSize - ); - dc.share( psiE ); - dc.share( psiB ); - } - - template< uint32_t T_Area > - yeePML::LocalParameters getLocalParameters( - AreaMapper< T_Area > & mapper, - uint32_t const currentStep - ) const - { - Thickness localThickness = getLocalThickness( currentStep ); - checkLocalThickness( localThickness ); - return yeePML::LocalParameters( - parameters, - localThickness, - mapper.getGridSuperCells( ) * SuperCellSize::toRT( ), - mapper.getGuardingSuperCells( ) * SuperCellSize::toRT( ) - ); - } - - /** - * Get PML thickness for the local domain at the current time step. - * It depends on the current step because of the moving window. - */ - Thickness getLocalThickness( uint32_t const currentStep ) const - { - /* The logic of the following checks is the same as in - * absorber::ExponentialDamping::run( ), to disable the absorber - * at a border we set the corresponding thickness to 0. - */ - auto & movingWindow = MovingWindow::getInstance( ); - auto const numSlides = movingWindow.getSlideCounter( currentStep ); - auto const numExchanges = NumberOfExchanges< simDim >::value; - auto const communicationMask = Environment< simDim >::get( ).GridController( ).getCommunicationMask( ); - Thickness localThickness = globalSize; - for( uint32_t exchange = 1u; exchange < numExchanges; ++exchange ) + void update_beforeCurrent(uint32_t const currentStep) { - /* Here we are only interested in the positive and negative - * directions for x, y, z axes and not the "diagonal" ones. - * So skip other directions except left, right, top, bottom, - * back, front + /* These steps are the same as in the Yee solver, PML updates are done as part of methods of + * solver. Note that here we do the second half of updating B, thus completing the first half + * started in a call to update_afterCurrent() at the previous time step. This splitting of B update + * is standard for Yee-type field solvers in PIC codes due to particle pushers normally requiring E + * and B values defined at the same time while the field solver operates with time-staggered + * fields. However, while the standard Yee solver in vacuum is linear in a way of two consecutive + * updates by dt/2 being equal to one update by dt, this is not true for the convolutional field + * updates in PML. Thus, for PML we have to distinguish between the updates by dt/2 by introducing + * first and second halves of the update. This distinction only concerns the convolutional field B + * data used inside the PML, and not the full fields used by the rest of the code. In the very + * first time step of a simulation we start with the second half right away, but this is no + * problem, since the only meaningful initial conditions in the PML area are zero for the + * to-be-absorbed components. */ - if( FRONT % exchange != 0 ) - continue; - - // Transform exchange into a pair of axis and direction - uint32_t axis = 0; - if( exchange >= BOTTOM && exchange <= TOP ) - axis = 1; - if( exchange >= BACK ) - axis = 2; - uint32_t direction = exchange % 2; - - // No PML at the borders between two local domains - bool hasNeighbour = communicationMask.isSet( exchange ); - if( hasNeighbour ) - localThickness( axis, direction ) = 0; - - // Disable PML during laser initialization - if( fields::laserProfiles::Selected::initPlaneY == 0 ) - { - bool isLaserInitializationOver = - (currentStep * DELTA_T) >= fields::laserProfiles::Selected::INIT_TIME; - if( numSlides == 0 && !isLaserInitializationOver && exchange == TOP ) - localThickness( axis, direction ) = 0; - } - - // Disable PML at the far side of the moving window - if( movingWindow.isSlidingWindowActive( currentStep ) && exchange == BOTTOM ) - localThickness( axis, direction ) = 0; - } - return localThickness; - } + solver.template updateBSecondHalf(currentStep); + auto& fieldB = solver.getFieldB(); + EventTask eRfieldB = fieldB.asyncCommunication(__getTransactionEvent()); - //! Verify that PML fits the local domain - void checkLocalThickness( Thickness const localThickness ) const - { - auto const localDomain = Environment< simDim >::get( ).SubGrid( ).getLocalDomain( ); - auto const localPMLSize = localThickness.negativeBorder + localThickness.positiveBorder; - auto pmlFitsDomain = true; - for( uint32_t dim = 0u; dim < simDim; dim++ ) - if( localPMLSize[ dim ] > localDomain.size[ dim ] ) - pmlFitsDomain = false; - if( !pmlFitsDomain ) - throw std::out_of_range( "Requested PML size exceeds the local domain" ); - } - - //! Get number of workers for kernels - static constexpr uint32_t getNumWorkers( ) - { - return pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - } - - }; - - } // namespace detail - } // namespace yeePML - - /** Yee field solver with perfectly matched layer (PML) absorber - * - * Absorption is done using convolutional perfectly matched layer (CPML), - * implemented according to [Taflove, Hagness]. - * - * This class template is a public interface to be used, e.g. in .param - * files and is compatible with other field solvers. Parameters of PML - * are taken from pml.param, pml.unitless. - * - * Enabling this solver results in more memory being used on a device: - * 12 additional scalar field values per each grid cell of a local domain. - * Another limitation is not full persistency with checkpointing: the - * additional values are not saved and so set to 0 after loading a - * checkpoint (which in some cases still provides proper absorption, but - * it is not guaranteed and results will differ due to checkpointing). - * - * This class template implements the general flow of CORE and BORDER field - * updates and communication. The numerical schemes to perform the updates - * are implemented by yeePML::detail::Solver. - * - * @tparam T_CurrentInterpolation current interpolation functor - * @tparam T_CurlE functor to compute curl of E - * @tparam T_CurlB functor to compute curl of B - */ - template< - typename T_CurrentInterpolation, - typename T_CurlE, - typename T_CurlB - > - class YeePML - { - public: - - // Types required by field solver interface - using CellType = cellType::Yee; - using CurrentInterpolation = T_CurrentInterpolation; - using CurlE = T_CurlE; - using CurlB = T_CurlB; - - YeePML( MappingDesc const cellDescription ) : - solver( cellDescription ) - { - } - - /** Perform the first part of E and B propagation by a time step. - * - * Together with update_afterCurrent( ) forms the full propagation. - * - * @param currentStep index of the current time iteration - */ - void update_beforeCurrent( uint32_t const currentStep ) - { - /* These steps are the same as in the Yee solver, - * PML updates are done as part of solver.updateE( ), - * solver.updateBHalf( ) - */ - solver.template updateBHalf < CORE + BORDER >( currentStep ); - auto & fieldB = solver.getFieldB( ); - EventTask eRfieldB = fieldB.asyncCommunication( __getTransactionEvent( ) ); - - solver.template updateE< CORE >( currentStep ); - __setTransactionEvent( eRfieldB ); - solver.template updateE< BORDER >( currentStep ); - } - - /** Perform the last part of E and B propagation by a time step - * - * Together with update_beforeCurrent( ) forms the full propagation. - * - * @param currentStep index of the current time iteration - */ - void update_afterCurrent( uint32_t const currentStep ) - { - /* These steps are the same as in the Yee solver, - * except the Fabsorber::ExponentialDamping::run( ) is not called, - * PML updates are done as part of solver.updateBHalf( ). - */ - if( laserProfiles::Selected::INIT_TIME > 0.0_X ) - LaserPhysics{ }( currentStep ); - - auto & fieldE = solver.getFieldE( ); - EventTask eRfieldE = fieldE.asyncCommunication( __getTransactionEvent( ) ); + solver.template updateE(currentStep); + __setTransactionEvent(eRfieldB); + solver.template updateE(currentStep); + } - solver.template updateBHalf< CORE >( currentStep ); - __setTransactionEvent( eRfieldE ); - solver.template updateBHalf< BORDER >( currentStep ); + /** Perform the last part of E and B propagation by a time step + * + * Together with update_beforeCurrent( ) forms the full propagation. + * + * @param currentStep index of the current time iteration + */ + void update_afterCurrent(uint32_t const currentStep) + { + /* These steps are the same as in the Yee solver, except the Fabsorber::ExponentialDamping::run( ) + * is not called, PML updates are done as part of calls to methods of solver. As explained in more + * detail in comments inside update_beforeCurrent(), here we start a new step of updating B in + * terms of the time-staggered Yee grid. And so this is the first half of B update, to be completed + * in a call to update_beforeCurrent() on the next time step. + */ + if(laserProfiles::Selected::INIT_TIME > 0.0_X) + LaserPhysics{}(currentStep); - auto & fieldB = solver.getFieldB( ); - EventTask eRfieldB = fieldB.asyncCommunication( __getTransactionEvent( ) ); - __setTransactionEvent( eRfieldB ); - } + auto& fieldE = solver.getFieldE(); + EventTask eRfieldE = fieldE.asyncCommunication(__getTransactionEvent()); - static pmacc::traits::StringProperty getStringProperties( ) - { - pmacc::traits::StringProperty propList( "name", "YeePML" ); - return propList; - } + solver.template updateBFirstHalf(currentStep); + __setTransactionEvent(eRfieldE); + solver.template updateBFirstHalf(currentStep); - private: + auto& fieldB = solver.getFieldB(); + EventTask eRfieldB = fieldB.asyncCommunication(__getTransactionEvent()); + __setTransactionEvent(eRfieldB); + } - yeePML::detail::Solver< CurlE, CurlB > solver; + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "Yee"); + return propList; + } - }; + private: + yeePML::detail::Solver solver; + }; -} // namespace maxwellSolver -} // namespace fields + } // namespace maxwellSolver + } // namespace fields } // namespace picongpu #include "picongpu/fields/MaxwellSolver/YeePML/Field.tpp" diff --git a/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.kernel b/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.kernel index dcc5fff27b..d04eae9614 100644 --- a/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.kernel +++ b/include/picongpu/fields/MaxwellSolver/YeePML/YeePML.kernel @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten, * Sergei Bastrakov * * This file is part of PIConGPU. @@ -33,551 +33,434 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ -namespace yeePML -{ - - //! Parameters of PML for the local domain - struct LocalParameters : public Parameters - { - /** PML size in cells, stored as floats to avoid type casts later, - * negative and positive borders defined the same way as for Thickness - */ - floatD_X const negativeBorderSize; - floatD_X const positiveBorderSize; - - //! Local domain characteristics, including guard cells - DataSpace< simDim > const numLocalDomainCells; - DataSpace< simDim > const numGuardCells; - - LocalParameters( - Parameters const parameters, - Thickness const localThickness, - DataSpace< simDim > const numLocalDomainCells, - DataSpace< simDim > const numGuardCells - ): - Parameters( parameters ), - negativeBorderSize( precisionCast< float_X >( localThickness.negativeBorder ) ), - positiveBorderSize( precisionCast< float_X >( localThickness.positiveBorder ) ), - numLocalDomainCells( numLocalDomainCells ), - numGuardCells( numGuardCells ) - { - } - }; - - namespace detail + namespace fields { - - /** Get relative depth of a given cell for 1D. - * - * This function operates with a 1D slice of domain and PML. - * index == numGuardCells corresponds to the external negative PML - * border, and index == numLocalDomainCells - numGuardCells - 1 - * corresponds to the external positive PML border. - * For the internal area result is 0, for points in PML the depth - * scales from 0 at the internal border to 1 at the external border. - * Index and local domain size include the guard. - * - * @param cellIdx cell index including the guard, can be fractional, - * e.g. for halves of cells - * @param numPMLCellsNegative number of PML cells at the negative border - * @param numPMLCellsPositive number of PML cells at the positive border - * @param numLocalDomainCells number of cells of the local domain - * including the guard - * @param numGuardCells number of guard cells at each side - * @return relative depth, value between 0 and 1 - */ - DINLINE float_X getRelativeDepth( - float_X const cellIdx, - float_X const numPMLCellsNegative, - float_X const numPMLCellsPositive, - uint32_t const numLocalDomainCells, - uint32_t const numGuardCells - ) + namespace maxwellSolver { - auto zeroBasedIdx = cellIdx - numGuardCells; - auto const isInLeftPML = ( zeroBasedIdx < numPMLCellsNegative ); - if( isInLeftPML ) - return ( numPMLCellsNegative - zeroBasedIdx ) / numPMLCellsNegative; - else + namespace yeePML { - auto zeroBasedRightPMLStart = numLocalDomainCells - - 2 * numGuardCells - numPMLCellsPositive; - auto const isInRightPML = ( zeroBasedIdx > zeroBasedRightPMLStart ); - if( isInRightPML ) - return ( zeroBasedIdx - zeroBasedRightPMLStart ) / numPMLCellsPositive; - } - return 0._X; - } - - /** Get absorption parameters: sigma, kappa and alpha at a given cell - * - * Apply polynomial grading, as described in pml.param. - * - * @param cellIdx cell index including the guard, can be fractional, - * e.g. for halves of cells - * @param parameters parameters of PML in the local domain - * @param[out] normalizedSigma value of normalized sigma at the cell - * @param[out] kappa value of normalized kappa at the cell - * @param[out] normalizedAlpha value of normalized alpha at the cell - */ - DINLINE void getAbsorptionParameters( - floatD_X const cellIdx, - LocalParameters const parameters, - float3_X & normalizedSigma, - float3_X & kappa, - float3_X & normalizedAlpha - ) - { - // initialize with values for non-PML area - normalizedSigma = float3_X::create( 0._X ); - kappa = float3_X::create( 1._X ); - normalizedAlpha = float3_X::create( 0._X ); - for( uint32_t dim = 0u; dim < simDim; dim++ ) - { - auto const relativeDepth = getRelativeDepth( - cellIdx[ dim ], - parameters.negativeBorderSize[ dim ], - parameters.positiveBorderSize[ dim ], - parameters.numLocalDomainCells[ dim ], - parameters.numGuardCells[ dim ] - ); - // Since normally most points are not in PML, avoid costly - // computing in this case - if( relativeDepth != 0._X ) + //! Parameters of PML for the local domain + struct LocalParameters : public Parameters { - /* Grading done according to [Taflove, Hagness], eq. (7.60a, b). - * Note: here we use a general expression, it is possible - * to specialize for sigmaKappaGradingOrder = 2, 3, or 4, - * but currently seems not worth it. - */ - auto const sigmaKappaGradingCoeff = math::pow( - relativeDepth, - parameters.sigmaKappaGradingOrder - ); - normalizedSigma[ dim ] = parameters.normalizedSigmaMax[ dim ] * - sigmaKappaGradingCoeff; - kappa[ dim ] = 1._X + ( parameters.kappaMax[ dim ] - 1._X ) * - sigmaKappaGradingCoeff; - /* Grading done according to [Taflove, Hagness], eq. (7.79), - * note that this code is only correct when relativeDepth != 0 + /** PML size in cells, stored as floats to avoid type casts later, + * negative and positive borders defined the same way as for Thickness */ - auto const alphaGradingCoeff = math::pow( - 1._X - relativeDepth, - parameters.alphaGradingOrder - ); - normalizedAlpha[ dim ] = parameters.normalizedAlphaMax[ dim ] * - alphaGradingCoeff; - } - } - } - - //! Coefficients for E or B updates at a particular point - struct Coefficients - { - //! Coordinate stretching coefficient - float3_X kappa; - - //! Damping coefficient, [Taflove, Hagness], eq. (7.102) - float3_X b; - - //! Spatial difference coefficient, [Taflove, Hagness], eq. (7.99) - float3_X c; - }; - - /** Get coefficients for E or B updates at a given cell - * - * Apply polynomial grading, as described in pml.param. - * Due to normalizations, the same way of computing coefficients applies - * to E and B updates. - * - * @param cellIdx cell index including the guard, can be fractional, - * e.g. for halves of cells - * @param parameters parameters of PML in the local domain - * @param dt value of time step to propagate by - * @result an instance of Coefficients with computed values - */ - DINLINE Coefficients getCoefficients( - floatD_X const cellIdx, - LocalParameters const parameters, - float_X const dt - ) - { - Coefficients coeffs; - float3_X normalizedSigma, normalizedAlpha; - getAbsorptionParameters( - cellIdx, - parameters, - normalizedSigma, - coeffs.kappa, - normalizedAlpha - ); - - /* [Taflove, Hagness], eq. (7.102), normalizedSigma and - * normalizedAlpha are already divided by eps0 - */ - coeffs.b = math::exp( - -( normalizedSigma / coeffs.kappa + normalizedAlpha ) * dt - ); - /* [Taflove, Hagness], eq. (7.99), in our case both the numerator - * and the denominator are equally normalized - */ - coeffs.c = float3_X::create( 0._X ); - for ( uint32_t dim = 0u; dim < 3; dim++ ) - { - auto const denominator = coeffs.kappa[ dim ] * - ( normalizedSigma[ dim ] + normalizedAlpha[ dim ] * - coeffs.kappa[ dim ] ); - // Avoid the 0 / 0 uncertainty, in that case keep the value 0 - if( denominator ) - coeffs.c[ dim ] = normalizedSigma[ dim ] * - ( coeffs.b[ dim ] - 1.0_X ) / denominator; - } - return coeffs; - } - - /** Return if a point with given coefficients belongs to PML - * - * @param coeffs values of coefficients - * @result boolean value if a point with given coefficients belongs - * to PML - */ - DINLINE bool isInPML( Coefficients const coeffs ) - { - /* Each damping component is < 1 when absorption is enabled - * along this direction and == 1 otherwise. - * So a product is 1 in the internal area and < 1 in PML - */ - return coeffs.b.productOfComponents( ) != 1.0_X; - } - - } // namespace detail - - /** Functor to update the electric field by a time step - * - * @tparam T_numWorkers number of workers - * @tparam T_BlockDescription field (electric and magnetic) domain description - */ - template< - uint32_t T_numWorkers, - typename T_BlockDescription - > - struct KernelUpdateE - { - /** Update the electric field by a time step - * - * @tparam T_Acc alpaka accelerator type - * @tparam T_Mapping mapper functor type - * @tparam T_Curl curl functor type - * @tparam T_BBox pmacc::DataBox, magnetic field box type - * @tparam T_EBox pmacc::DataBox, electric field box type - * - * @param acc alpaka accelerator - * @param mapper functor to map a block to a supercell - * @param parameters PML parameters for a local domain - * @param curl functor to calculate the electric field, interface must be - * `operator( )( T_EBox )` - * @param fieldB magnetic field iterator - * @param fieldE electric field iterator - * @param fieldPsiE PML convolutional electric field iterator - */ - template< - typename T_Acc, - typename T_Mapping, - typename T_Curl, - typename T_BBox, - typename T_EBox - > - DINLINE void operator( )( - T_Acc const & acc, - T_Mapping const mapper, - LocalParameters const parameters, - T_Curl const curl, - T_BBox const fieldB, - T_EBox fieldE, - FieldBox fieldPsiE - ) const - { - /* Each block processes grid values in a supercell, - * the index includes guards, same as all indices in this kernel - */ - auto const blockBeginIdx = mapper.getSuperCellIndex( - DataSpace< simDim >( blockIdx ) - ) * MappingDesc::SuperCellSize::toRT( ); - - // Cache B values for the block - using namespace mappings::threads; - constexpr auto numWorkers = T_numWorkers; - auto const workerIdx = threadIdx.x; - nvidia::functors::Assign assign; - auto fieldBBlock = fieldB.shift( blockBeginIdx ); - ThreadCollective< - T_BlockDescription, - numWorkers - > collectiveCacheB( workerIdx ); - auto cachedB = CachedBox::create< - 0u, - typename T_BBox::ValueType - >( - acc, - T_BlockDescription( ) - ); - collectiveCacheB( - acc, - assign, - cachedB, - fieldBBlock - ); - __syncthreads( ); + floatD_X const negativeBorderSize; + floatD_X const positiveBorderSize; + + //! Local domain characteristics, including guard cells + DataSpace const numLocalDomainCells; + DataSpace const numGuardCells; + + LocalParameters( + Parameters const parameters, + Thickness const localThickness, + DataSpace const numLocalDomainCells, + DataSpace const numGuardCells) + : Parameters(parameters) + , negativeBorderSize(precisionCast(localThickness.negativeBorder)) + , positiveBorderSize(precisionCast(localThickness.positiveBorder)) + , numLocalDomainCells(numLocalDomainCells) + , numGuardCells(numGuardCells) + { + } + }; - // Threads process values of the supercell in parallel - constexpr auto numCellsPerSuperCell = - pmacc::math::CT::volume< SuperCellSize >::type::value; - ForEachIdx< - IdxConfig< - numCellsPerSuperCell, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) + namespace detail { - constexpr auto c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; - constexpr auto dt = DELTA_T; + /** Get relative depth of a given cell for 1D. + * + * This function operates with a 1D slice of domain and PML. + * index == numGuardCells corresponds to the external negative PML + * border, and index == numLocalDomainCells - numGuardCells - 1 + * corresponds to the external positive PML border. + * For the internal area result is 0, for points in PML the depth + * scales from 0 at the internal border to 1 at the external border. + * Index and local domain size include the guard. + * + * @param cellIdx cell index including the guard, can be fractional, + * e.g. for halves of cells + * @param numPMLCellsNegative number of PML cells at the negative border + * @param numPMLCellsPositive number of PML cells at the positive border + * @param numLocalDomainCells number of cells of the local domain + * including the guard + * @param numGuardCells number of guard cells at each side + * @return relative depth, value between 0 and 1 + */ + DINLINE float_X getRelativeDepth( + float_X const cellIdx, + float_X const numPMLCellsNegative, + float_X const numPMLCellsPositive, + uint32_t const numLocalDomainCells, + uint32_t const numGuardCells) + { + auto zeroBasedIdx = cellIdx - numGuardCells; + auto const isInLeftPML = (zeroBasedIdx < numPMLCellsNegative); + if(isInLeftPML) + return (numPMLCellsNegative - zeroBasedIdx) / numPMLCellsNegative; + else + { + auto zeroBasedRightPMLStart + = numLocalDomainCells - 2 * numGuardCells - numPMLCellsPositive; + auto const isInRightPML = (zeroBasedIdx > zeroBasedRightPMLStart); + if(isInRightPML) + return (zeroBasedIdx - zeroBasedRightPMLStart) / numPMLCellsPositive; + } + return 0._X; + } - auto const idxInSuperCell = - DataSpaceOperations< simDim >::template map< SuperCellSize >( linearIdx ); - // grid index to process with the current thread - auto const idx = blockBeginIdx + idxInSuperCell; - // with the current Yee grid, no shift needed here - auto const pmlIdx = precisionCast< float_X >( idx ); - auto const coeffs = detail::getCoefficients( - pmlIdx, - parameters, - dt - ); + /** Get absorption parameters: sigma, kappa and alpha at a given cell + * + * Apply polynomial grading, as described in pml.param. + * + * @param cellIdx cell index including the guard, can be fractional, + * e.g. for halves of cells + * @param parameters parameters of PML in the local domain + * @param[out] normalizedSigma value of normalized sigma at the cell + * @param[out] kappa value of normalized kappa at the cell + * @param[out] normalizedAlpha value of normalized alpha at the cell + */ + DINLINE void getAbsorptionParameters( + floatD_X const cellIdx, + LocalParameters const parameters, + float3_X& normalizedSigma, + float3_X& kappa, + float3_X& normalizedAlpha) + { + // initialize with values for non-PML area + normalizedSigma = float3_X::create(0._X); + kappa = float3_X::create(1._X); + normalizedAlpha = float3_X::create(0._X); + for(uint32_t dim = 0u; dim < simDim; dim++) + { + auto const relativeDepth = getRelativeDepth( + cellIdx[dim], + parameters.negativeBorderSize[dim], + parameters.positiveBorderSize[dim], + parameters.numLocalDomainCells[dim], + parameters.numGuardCells[dim]); + // Since normally most points are not in PML, avoid costly + // computing in this case + if(relativeDepth != 0._X) + { + /* Grading done according to [Taflove, Hagness], eq. (7.60a, b). + * Note: here we use a general expression, it is possible + * to specialize for sigmaKappaGradingOrder = 2, 3, or 4, + * but currently seems not worth it. + */ + auto const sigmaKappaGradingCoeff + = math::pow(relativeDepth, parameters.sigmaKappaGradingOrder); + normalizedSigma[dim] = parameters.normalizedSigmaMax[dim] * sigmaKappaGradingCoeff; + kappa[dim] = 1._X + (parameters.kappaMax[dim] - 1._X) * sigmaKappaGradingCoeff; + /* Grading done according to [Taflove, Hagness], eq. (7.79), + * note that this code is only correct when relativeDepth != 0 + */ + auto const alphaGradingCoeff + = math::pow(1._X - relativeDepth, parameters.alphaGradingOrder); + normalizedAlpha[dim] = parameters.normalizedAlphaMax[dim] * alphaGradingCoeff; + } + } + } - if( detail::isInPML( coeffs ) ) + //! Coefficients for E or B updates at a particular point + struct Coefficients { - /* This precomputation of partial derivatives is done - * more for readability, rather than avoiding computing - * it twice - */ - using Difference = typename T_Curl::Difference; - const typename Difference::template GetDifference< 0 > Dx; - const typename Difference::template GetDifference< 1 > Dy; - const typename Difference::template GetDifference< 2 > Dz; - auto const localB = cachedB.shift( idxInSuperCell ); - auto const dBxDy = Dy( localB ).x( ); - auto const dBxDz = Dz( localB ).x( ); - auto const dByDx = Dx( localB ).y( ); - auto const dByDz = Dz( localB ).y( ); - auto const dBzDx = Dx( localB ).z( ); - auto const dBzDy = Dy( localB ).z( ); + //! Coordinate stretching coefficient + float3_X kappa; + + //! Damping coefficient, [Taflove, Hagness], eq. (7.102) + float3_X b; + + //! Spatial difference coefficient, [Taflove, Hagness], eq. (7.99) + float3_X c; + }; + + /** Get coefficients for E or B updates at a given cell + * + * Apply polynomial grading, as described in pml.param. + * Due to normalizations, the same way of computing coefficients applies + * to E and B updates. + * + * @param cellIdx cell index including the guard, can be fractional, + * e.g. for halves of cells + * @param parameters parameters of PML in the local domain + * @param dt value of time step to propagate by + * @result an instance of Coefficients with computed values + */ + DINLINE Coefficients + getCoefficients(floatD_X const cellIdx, LocalParameters const parameters, float_X const dt) + { + Coefficients coeffs; + float3_X normalizedSigma, normalizedAlpha; + getAbsorptionParameters(cellIdx, parameters, normalizedSigma, coeffs.kappa, normalizedAlpha); - /* Update convolutional fields using [Taflove, Hagness], - * eq. (7.105a,b) and similar for other components. - * For PIC the right-hand side uses B, not H. + /* [Taflove, Hagness], eq. (7.102), normalizedSigma and + * normalizedAlpha are already divided by eps0 */ - auto & psiE = fieldPsiE( idx ); - psiE.yx = coeffs.b.x( ) * psiE.yx + coeffs.c.x( ) * dBzDx; - psiE.zx = coeffs.b.x( ) * psiE.zx + coeffs.c.x( ) * dByDx; - psiE.xy = coeffs.b.y( ) * psiE.xy + coeffs.c.y( ) * dBzDy; - psiE.zy = coeffs.b.y( ) * psiE.zy + coeffs.c.y( ) * dBxDy; - psiE.xz = coeffs.b.z( ) * psiE.xz + coeffs.c.z( ) * dByDz; - psiE.yz = coeffs.b.z( ) * psiE.yz + coeffs.c.z( ) * dBxDz; - - /* [Taflove, Hagness], eq. (7.106) and similar for other - * components. Coefficients Ca, Cb as given in (7.107a,b) - * are general to account for materials, in addition to - * artificial PML absorbing medium. We do not have any - * real material, so in (7.107a,b) we have to use - * sigma(i + 1/2, j, k) = 0 (it is another sigma, - * unrelated to PML), eps(i + 1/2, j, k) = EPS0. Also, - * same as the Yee scheme in PIC, adjusted to use B, - * not H, on the right-hand side. + coeffs.b = math::exp(-(normalizedSigma / coeffs.kappa + normalizedAlpha) * dt); + /* [Taflove, Hagness], eq. (7.99), in our case both the numerator + * and the denominator are equally normalized */ - fieldE( idx ).x( ) += c2 * dt * (dBzDy / coeffs.kappa.y( ) - - dByDz / coeffs.kappa.z( ) + psiE.xy - psiE.xz ); - fieldE( idx ).y( ) += c2 * dt * (dBxDz / coeffs.kappa.z( ) - - dBzDx / coeffs.kappa.x( ) + psiE.yz - psiE.yx ); - fieldE( idx ).z( ) += c2 * dt * (dByDx / coeffs.kappa.x( ) - - dBxDy / coeffs.kappa.y( ) + psiE.zx - psiE.zy ); + coeffs.c = float3_X::create(0._X); + for(uint32_t dim = 0u; dim < 3; dim++) + { + auto const denominator = coeffs.kappa[dim] + * (normalizedSigma[dim] + normalizedAlpha[dim] * coeffs.kappa[dim]); + // Avoid the 0 / 0 uncertainty, in that case keep the value 0 + if(denominator) + coeffs.c[dim] = normalizedSigma[dim] * (coeffs.b[dim] - 1.0_X) / denominator; + } + return coeffs; } - else - // Normal Yee scheme update - fieldE( idx ) += curl( cachedB.shift( idxInSuperCell ) ) * c2 * dt; - } - ); - } - }; - /** Functor to update the magnetic field by half a time step - * - * @tparam T_numWorkers number of workers - * @tparam T_BlockDescription field (electric and magnetic) domain description - */ - template< - uint32_t T_numWorkers, - typename T_BlockDescription - > - struct KernelUpdateBHalf - { - /** Update the magnetic field by half a time step - * - * @tparam T_Acc alpaka accelerator type - * @tparam T_Mapping mapper functor type - * @tparam T_Curl curl functor type - * @tparam T_EBox pmacc::DataBox electric field box type - * @tparam T_BBox pmacc::DataBox magnetic field box type - * - * @param acc alpaka accelerator - * @param mapper functor to map a block to a supercell - * @param parameters PML parameters for a local domain - * @param curl functor to calculate the electric field, interface must be - * `operator( )( T_EBox )` - * @param fieldE electric field iterator - * @param fieldB magnetic field iterator - * @param fieldPsiB PML convolutional magnetic field iterator - */ - template< - typename T_Acc, - typename T_Mapping, - typename T_Curl, - typename T_EBox, - typename T_BBox - > - DINLINE void operator( )( - T_Acc const & acc, - T_Mapping const mapper, - LocalParameters const parameters, - T_Curl const curl, - T_EBox const fieldE, - T_BBox fieldB, - FieldBox fieldPsiB - ) const - { - /* Each block processes grid values in a supercell, - * the index includes guards, same as all indices in this kernel - */ - auto const blockBeginIdx = mapper.getSuperCellIndex( - DataSpace< simDim >( blockIdx ) - ) * MappingDesc::SuperCellSize::toRT( ); + /** Return if a point with given coefficients belongs to PML + * + * @param coeffs values of coefficients + * @result boolean value if a point with given coefficients belongs + * to PML + */ + DINLINE bool isInPML(Coefficients const coeffs) + { + /* Each damping component is < 1 when absorption is enabled + * along this direction and == 1 otherwise. + * So a product is 1 in the internal area and < 1 in PML + */ + return coeffs.b.productOfComponents() != 1.0_X; + } - // Cache E values for the block - using namespace mappings::threads; - constexpr auto numWorkers = T_numWorkers; - auto const workerIdx = threadIdx.x; - nvidia::functors::Assign assign; - auto fieldEBlock = fieldE.shift( blockBeginIdx ); - ThreadCollective< - T_BlockDescription, - numWorkers - > collectiveCacheE( workerIdx ); - auto cachedE = CachedBox::create< - 0u, - typename T_EBox::ValueType - >( - acc, - T_BlockDescription( ) - ); - collectiveCacheE( - acc, - assign, - cachedE, - fieldEBlock - ); - __syncthreads( ); + } // namespace detail - // Threads process values of the supercell in parallel - constexpr auto numCellsPerSuperCell = - pmacc::math::CT::volume< SuperCellSize >::type::value; - ForEachIdx< - IdxConfig< - numCellsPerSuperCell, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) + /** Functor to update the electric field by a time step + * + * @tparam T_numWorkers number of workers + * @tparam T_BlockDescription field (electric and magnetic) domain description + */ + template + struct KernelUpdateE { - constexpr auto halfDt = 0.5_X * DELTA_T; - auto const idxInSuperCell = - DataSpaceOperations< simDim >::template map< SuperCellSize >( linearIdx ); - // grid index to process with the current thread - auto const idx = blockBeginIdx + idxInSuperCell; - // with the current Yee grid, a half cell shift is needed here - auto const pmlIdx = floatD_X::create( 0.5_X ) + - precisionCast< float_X >( idx ); - auto const coeffs = detail::getCoefficients( - pmlIdx, - parameters, - halfDt - ); - - if( detail::isInPML( coeffs ) ) + /** Update the electric field by a time step + * + * @tparam T_Acc alpaka accelerator type + * @tparam T_Mapping mapper functor type + * @tparam T_Curl curl functor type + * @tparam T_BBox pmacc::DataBox, magnetic field box type + * @tparam T_EBox pmacc::DataBox, electric field box type + * + * @param acc alpaka accelerator + * @param mapper functor to map a block to a supercell + * @param parameters PML parameters for a local domain + * @param curl functor to calculate the electric field, interface must be + * `operator( )( T_EBox )` + * @param fieldB magnetic field iterator + * @param fieldE electric field iterator + * @param fieldPsiE PML convolutional electric field iterator + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_Mapping const mapper, + LocalParameters const parameters, + T_Curl const curl, + T_BBox const fieldB, + T_EBox fieldE, + FieldBox fieldPsiE) const { - /* This precomputation of partial derivatives is done - * more for readability, rather than avoiding computing - * it twice - */ - using Difference = typename T_Curl::Difference; - const typename Difference::template GetDifference< 0 > Dx; - const typename Difference::template GetDifference< 1 > Dy; - const typename Difference::template GetDifference< 2 > Dz; - auto const localE = cachedE.shift( idxInSuperCell ); - auto const dExDy = Dy( localE ).x( ); - auto const dExDz = Dz( localE ).x( ); - auto const dEyDx = Dx( localE ).y( ); - auto const dEyDz = Dz( localE ).y( ); - auto const dEzDx = Dx( localE ).z( ); - auto const dEzDy = Dy( localE ).z( ); - - /* Update convolutional fields using [Taflove, Hagness], - * eq. (7.110a,b) and similar for other components. - * For PIC the left-hand side uses B, not H. + /* Each block processes grid values in a supercell, + * the index includes guards, same as all indices in this kernel */ - auto & psiB = fieldPsiB( idx ); - psiB.yx = coeffs.b.x( ) * psiB.yx + coeffs.c.x( ) * dEzDx; - psiB.zx = coeffs.b.x( ) * psiB.zx + coeffs.c.x( ) * dEyDx; - psiB.xy = coeffs.b.y( ) * psiB.xy + coeffs.c.y( ) * dEzDy; - psiB.zy = coeffs.b.y( ) * psiB.zy + coeffs.c.y( ) * dExDy; - psiB.xz = coeffs.b.z( ) * psiB.xz + coeffs.c.z( ) * dEyDz; - psiB.yz = coeffs.b.z( ) * psiB.yz + coeffs.c.z( ) * dExDz; - - /* [Taflove, Hagness], eq. (7.108) and similar for other - * components. Coefficients Da, Db as given in (7.109a,b) - * are general to account for materials, in addition to - * artificial PML absorbing medium. We do not have any - * real material, so in (7.109a,b) we have to use - * sigma*(i + 1/2, j, k) = 0 (it is another sigma*, - * unrelated to PML), mue(i + 1/2, j, k) = MUE0. Also, - * same as the Yee scheme in PIC, adjusted to use B, - * not H, on the left-hand side. - */ - fieldB( idx ).x( ) += halfDt * ( dEyDz / coeffs.kappa.z( ) - - dEzDy / coeffs.kappa.y( ) + psiB.xz - psiB.xy ); - fieldB( idx ).y( ) += halfDt * ( dEzDx / coeffs.kappa.x( ) - - dExDz / coeffs.kappa.z( ) + psiB.yx - psiB.yz ); - fieldB( idx ).z( ) += halfDt * ( dExDy / coeffs.kappa.y( ) - - dEyDx / coeffs.kappa.x( ) + psiB.zy - psiB.zx ); + auto const blockBeginIdx = mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))) + * MappingDesc::SuperCellSize::toRT(); + + // Cache B values for the block + using namespace mappings::threads; + constexpr auto numWorkers = T_numWorkers; + auto const workerIdx = cupla::threadIdx(acc).x; + nvidia::functors::Assign assign; + auto fieldBBlock = fieldB.shift(blockBeginIdx); + ThreadCollective collectiveCacheB(workerIdx); + auto cachedB = CachedBox::create<0u, typename T_BBox::ValueType>(acc, T_BlockDescription()); + collectiveCacheB(acc, assign, cachedB, fieldBBlock); + cupla::__syncthreads(acc); + + // Threads process values of the supercell in parallel + constexpr auto numCellsPerSuperCell = pmacc::math::CT::volume::type::value; + ForEachIdx>{ + workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + constexpr auto c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; + constexpr auto dt = DELTA_T; + + auto const idxInSuperCell + = DataSpaceOperations::template map(linearIdx); + // grid index to process with the current thread + auto const idx = blockBeginIdx + idxInSuperCell; + // with the current Yee grid, no shift needed here + auto const pmlIdx = precisionCast(idx); + auto const coeffs = detail::getCoefficients(pmlIdx, parameters, dt); + + if(detail::isInPML(coeffs)) + { + /* Update convolutional fields using [Taflove, Hagness], + * eq. (7.105a,b) and similar for other components. + * For PIC the right-hand side uses B, not H. + * + * Notation: dBdx = dB / dx, dBdx.y() = dBy / dx, etc. + */ + auto const localB = cachedB.shift(idxInSuperCell); + auto const dBdx = curl.xDerivative(localB); + auto const dBdy = curl.yDerivative(localB); + auto const dBdz = curl.zDerivative(localB); + auto& psiE = fieldPsiE(idx); + psiE.yx = coeffs.b.x() * psiE.yx + coeffs.c.x() * dBdx.z(); + psiE.zx = coeffs.b.x() * psiE.zx + coeffs.c.x() * dBdx.y(); + psiE.xy = coeffs.b.y() * psiE.xy + coeffs.c.y() * dBdy.z(); + psiE.zy = coeffs.b.y() * psiE.zy + coeffs.c.y() * dBdy.x(); + psiE.xz = coeffs.b.z() * psiE.xz + coeffs.c.z() * dBdz.y(); + psiE.yz = coeffs.b.z() * psiE.yz + coeffs.c.z() * dBdz.x(); + + /* [Taflove, Hagness], eq. (7.106) and similar for other + * components. Coefficients Ca, Cb as given in (7.107a,b) + * are general to account for materials, in addition to + * artificial PML absorbing medium. We do not have any + * real material, so in (7.107a,b) we have to use + * sigma(i + 1/2, j, k) = 0 (it is another sigma, + * unrelated to PML), eps(i + 1/2, j, k) = EPS0. Also, + * same as the Yee scheme in PIC, adjusted to use B, + * not H, on the right-hand side. + */ + fieldE(idx).x() += c2 * dt + * (dBdy.z() / coeffs.kappa.y() - dBdz.y() / coeffs.kappa.z() + psiE.xy - psiE.xz); + fieldE(idx).y() += c2 * dt + * (dBdz.x() / coeffs.kappa.z() - dBdx.z() / coeffs.kappa.x() + psiE.yz - psiE.yx); + fieldE(idx).z() += c2 * dt + * (dBdx.y() / coeffs.kappa.x() - dBdy.x() / coeffs.kappa.y() + psiE.zx - psiE.zy); + } + else + // Normal Yee scheme update + fieldE(idx) += curl(cachedB.shift(idxInSuperCell)) * c2 * dt; + }); + } + }; + + /** Functor to update the magnetic field by half a time step + * + * @tparam T_numWorkers number of workers + * @tparam T_BlockDescription field (electric and magnetic) domain description + */ + template + struct KernelUpdateBHalf + { + /** Update the magnetic field by half a time step + * + * @tparam T_Acc alpaka accelerator type + * @tparam T_Mapping mapper functor type + * @tparam T_Curl curl functor type + * @tparam T_EBox pmacc::DataBox electric field box type + * @tparam T_BBox pmacc::DataBox magnetic field box type + * + * @param acc alpaka accelerator + * @param mapper functor to map a block to a supercell + * @param parameters PML parameters for a local domain + * @param curl functor to calculate the electric field, interface must be + * `operator( )( T_EBox )` + * @param fieldE electric field iterator + * @param updatePsiB whether convolutional magnetic fields need to be updated, or are up-to-date + * @param fieldB magnetic field iterator + * @param fieldPsiB PML convolutional magnetic field iterator + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_Mapping const mapper, + LocalParameters const parameters, + T_Curl const curl, + T_EBox const fieldE, + bool const updatePsiB, + T_BBox fieldB, + FieldBox fieldPsiB) const + { + /* Each block processes grid values in a supercell, + * the index includes guards, same as all indices in this kernel + */ + auto const blockBeginIdx = mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))) + * MappingDesc::SuperCellSize::toRT(); + + // Cache E values for the block + using namespace mappings::threads; + constexpr auto numWorkers = T_numWorkers; + auto const workerIdx = cupla::threadIdx(acc).x; + nvidia::functors::Assign assign; + auto fieldEBlock = fieldE.shift(blockBeginIdx); + ThreadCollective collectiveCacheE(workerIdx); + auto cachedE = CachedBox::create<0u, typename T_EBox::ValueType>(acc, T_BlockDescription()); + collectiveCacheE(acc, assign, cachedE, fieldEBlock); + cupla::__syncthreads(acc); + + // Threads process values of the supercell in parallel + constexpr auto numCellsPerSuperCell = pmacc::math::CT::volume::type::value; + ForEachIdx>{ + workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + constexpr auto dt = DELTA_T; + constexpr auto halfDt = 0.5_X * dt; + auto const idxInSuperCell + = DataSpaceOperations::template map(linearIdx); + // grid index to process with the current thread + auto const idx = blockBeginIdx + idxInSuperCell; + // with the current Yee grid, a half cell shift is needed here + auto const pmlIdx = floatD_X::create(0.5_X) + precisionCast(idx); + /* Note that convolutional fields are updated once per dt. So the coefficients are computed + * in this way, and whether the update has to be performed is controlled by a kernel caller + * with updatePsiB parameter. + */ + auto const coeffs = detail::getCoefficients(pmlIdx, parameters, dt); + + if(detail::isInPML(coeffs)) + { + /* Update convolutional fields using [Taflove, Hagness], + * eq. (7.110a,b) and similar for other components. + * For PIC the left-hand side uses B, not H. + * + * Notation: dEdx = dE / dx, dEdx.y() = dEy / dx, etc. + */ + auto const localE = cachedE.shift(idxInSuperCell); + auto const dEdx = curl.xDerivative(localE); + auto const dEdy = curl.yDerivative(localE); + auto const dEdz = curl.zDerivative(localE); + auto& psiB = fieldPsiB(idx); + if(updatePsiB) + { + psiB.yx = coeffs.b.x() * psiB.yx + coeffs.c.x() * dEdx.z(); + psiB.zx = coeffs.b.x() * psiB.zx + coeffs.c.x() * dEdx.y(); + psiB.xy = coeffs.b.y() * psiB.xy + coeffs.c.y() * dEdy.z(); + psiB.zy = coeffs.b.y() * psiB.zy + coeffs.c.y() * dEdy.x(); + psiB.xz = coeffs.b.z() * psiB.xz + coeffs.c.z() * dEdz.y(); + psiB.yz = coeffs.b.z() * psiB.yz + coeffs.c.z() * dEdz.x(); + } + + /* [Taflove, Hagness], eq. (7.108) and similar for other + * components. Coefficients Da, Db as given in (7.109a,b) + * are general to account for materials, in addition to + * artificial PML absorbing medium. We do not have any + * real material, so in (7.109a,b) we have to use + * sigma*(i + 1/2, j, k) = 0 (it is another sigma*, + * unrelated to PML), mue(i + 1/2, j, k) = MUE0. Also, + * same as the Yee scheme in PIC, adjusted to use B, + * not H, on the left-hand side. + */ + fieldB(idx).x() += halfDt + * (dEdz.y() / coeffs.kappa.z() - dEdy.z() / coeffs.kappa.y() + psiB.xz - psiB.xy); + fieldB(idx).y() += halfDt + * (dEdx.z() / coeffs.kappa.x() - dEdz.x() / coeffs.kappa.z() + psiB.yx - psiB.yz); + fieldB(idx).z() += halfDt + * (dEdy.x() / coeffs.kappa.y() - dEdx.y() / coeffs.kappa.x() + psiB.zy - psiB.zx); + } + else + // Normal Yee scheme update + fieldB(idx) -= curl(cachedE.shift(idxInSuperCell)) * halfDt; + }); } - else - // Normal Yee scheme update - fieldB( idx ) -= curl( cachedE.shift( idxInSuperCell ) ) * halfDt; - } - ); - } - }; + }; -} // namespace yeePML -} // namespace maxwellSolver -} // namespace fields + } // namespace yeePML + } // namespace maxwellSolver + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/absorber/Absorber.hpp b/include/picongpu/fields/absorber/Absorber.hpp index 0495001a5a..09205338b3 100644 --- a/include/picongpu/fields/absorber/Absorber.hpp +++ b/include/picongpu/fields/absorber/Absorber.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Sergei Bastrakov +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Sergei Bastrakov, Klaus Steiniger * * This file is part of PIConGPU. * @@ -31,208 +31,295 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ - - /** Forward declaration to avoid mutual including with YeePML.hpp - * - * @tparam T_CurrentInterpolation current interpolation functor - * @tparam T_CurlE functor to compute curl of E - * @tparam T_CurlB functor to compute curl of B - */ - template< - typename T_CurrentInterpolation, - typename T_CurlE, - typename T_CurlB - > - class YeePML; - -} // namespace maxwellSolver - -namespace absorber -{ - - //! Forward declaration to avoid mutual including with ExponentialDamping.hpp - class ExponentialDamping; - -namespace detail -{ - - /** Get string properties of the absorber - * - * @param name absorber name - */ - HINLINE pmacc::traits::StringProperty getStringProperties( - std::string const & name - ); - - /** Absorber wrapper - * - * Provides unified interface for the absorber information: - * size along the 6 boundaries and getStringProperties() implementation. - * Currently does not provide the computational part, only description. - * - * The general version uses exponential absorber settings since this is the - * default absorber. - * - * @tparam T_FieldSolver field solver - */ - template< typename T_FieldSolver > - struct Absorber + namespace fields { - //! Number of absorber cells along the min x boundary - static constexpr uint32_t xNegativeNumCells = ABSORBER_CELLS[ 0 ][ 0 ]; - - //! Number of absorber cells along the max x boundary - static constexpr uint32_t xPositiveNumCells = ABSORBER_CELLS[ 0 ][ 1 ]; - - //! Number of absorber cells along the min y boundary - static constexpr uint32_t yNegativeNumCells = ABSORBER_CELLS[ 1 ][ 0 ]; - - //! Number of absorber cells along the max y boundary - static constexpr uint32_t yPositiveNumCells = ABSORBER_CELLS[ 1 ][ 1 ]; - - //! Number of absorber cells along the min z boundary - static constexpr uint32_t zNegativeNumCells = ABSORBER_CELLS[ 2 ][ 0 ]; - - //! Number of cells along the max z boundary - static constexpr uint32_t zPositiveNumCells = ABSORBER_CELLS[ 2 ][ 1 ]; - - //! Get string properties of the absorber - static pmacc::traits::StringProperty getStringProperties() + namespace maxwellSolver { - return detail::getStringProperties( "exponential damping" ); - } - }; - - namespace pml = maxwellSolver::yeePML; - - /** Absorber wrapper - * - * Specialization for PML - * - * @tparam T_CurrentInterpolation current interpolation for YeePML - * @tparam T_CurlE curl E for YeePML - * @tparam T_CurlB curl B for YeePML - */ - template< - typename T_CurrentInterpolation, - typename T_CurlE, - typename T_CurlB - > - struct Absorber< - maxwellSolver::YeePML< - T_CurrentInterpolation, - T_CurlE, - T_CurlB - > - > - { - //! Number of absorber cells along the min x boundary - static constexpr uint32_t xNegativeNumCells = pml::NUM_CELLS[ 0 ][ 0 ]; - - //! Number of absorber cells along the max x boundary - static constexpr uint32_t xPositiveNumCells = pml::NUM_CELLS[ 0 ][ 1 ]; - - //! Number of absorber cells along the min y boundary - static constexpr uint32_t yNegativeNumCells = pml::NUM_CELLS[ 1 ][ 0 ]; - - //! Number of absorber cells along the max y boundary - static constexpr uint32_t yPositiveNumCells = pml::NUM_CELLS[ 1 ][ 1 ]; - - //! Number of absorber cells along the min z boundary - static constexpr uint32_t zNegativeNumCells = pml::NUM_CELLS[ 2 ][ 0 ]; - - //! Number of absorber cells along the max z boundary - static constexpr uint32_t zPositiveNumCells = pml::NUM_CELLS[ 2 ][ 1 ]; - - //! Get string properties of the absorber - static pmacc::traits::StringProperty getStringProperties() + /** Forward declaration to avoid mutual including with YeePML.hpp + * + * @tparam T_CurrentInterpolation current interpolation functor + * @tparam T_CurlE functor to compute curl of E + * @tparam T_CurlB functor to compute curl of B + */ + template + class YeePML; + + } // namespace maxwellSolver + + namespace absorber { - return detail::getStringProperties( "convolutional PML" ); - } - - }; - -} // namespace detail - - /** Absorber description implementing getStringProperties() - * - * To be used for writing absorber meta information, does not provide - * interface for running the absorber - */ - using Absorber = detail::Absorber< Solver >; - - /** Number of absorber cells along each boundary - * - * Is uniform for both PML and exponential damping absorbers. - * First index: 0 = x, 1 = y, 2 = z. - * Second index: 0 = negative (min coordinate), 1 = positive (max coordinate). - * Not for ODR-use. - */ - constexpr uint32_t numCells[ 3 ][ 2 ] = { - { Absorber::xNegativeNumCells, Absorber::xPositiveNumCells }, - { Absorber::yNegativeNumCells, Absorber::yPositiveNumCells }, - { Absorber::zNegativeNumCells, Absorber::zPositiveNumCells } - }; - -namespace detail -{ - - // Implementation has to be after numCells is defined - pmacc::traits::StringProperty getStringProperties( std::string const & name ) - { - pmacc::traits::StringProperty propList; - const DataSpace periodic = - Environment::get().EnvironmentController().getCommunicator().getPeriodic(); + //! Forward declaration to avoid mutual including with ExponentialDamping.hpp + class ExponentialDamping; - for( uint32_t i = 1; i < NumberOfExchanges::value; ++i ) - { - // for each planar direction: left right top bottom back front - if( FRONT % i == 0 ) + namespace detail { - const std::string directionName = ExchangeTypeNames()[i]; - const DataSpace relDir = Mask::getRelativeDirections(i); - - bool isPeriodic = false; - uint32_t axis = 0; // x(0) y(1) z(2) - uint32_t axisDir = 0; // negative (0), positive (1) - for( uint32_t d = 0; d < simDim; d++ ) + /** Get string properties of the absorber + * + * @param name absorber name + */ + HINLINE pmacc::traits::StringProperty getStringProperties(std::string const& name); + + /** Absorber wrapper + * + * Provides unified interface for the absorber information: + * size along the 6 boundaries and getStringProperties() implementation. + * Currently does not provide the computational part, only description. + * + * The general version uses exponential absorber settings since this is the + * default absorber. + * + * @tparam T_FieldSolver field solver + */ + template + struct Absorber { - if( relDir[d] * periodic[d] != 0 ) - isPeriodic = true; - if( relDir[d] != 0 ) - axis = d; + //! Number of absorber cells along the min x boundary + static constexpr uint32_t xNegativeNumCells = ABSORBER_CELLS[0][0]; + + //! Number of absorber cells along the max x boundary + static constexpr uint32_t xPositiveNumCells = ABSORBER_CELLS[0][1]; + + //! Number of absorber cells along the min y boundary + static constexpr uint32_t yNegativeNumCells = ABSORBER_CELLS[1][0]; + + //! Number of absorber cells along the max y boundary + static constexpr uint32_t yPositiveNumCells = ABSORBER_CELLS[1][1]; + + //! Number of absorber cells along the min z boundary + static constexpr uint32_t zNegativeNumCells = ABSORBER_CELLS[2][0]; + + //! Number of cells along the max z boundary + static constexpr uint32_t zPositiveNumCells = ABSORBER_CELLS[2][1]; + + //! Get string properties of the absorber + static pmacc::traits::StringProperty getStringProperties() + { + return detail::getStringProperties("exponential damping"); + } + }; + + namespace pml = maxwellSolver::Pml; + + /** Absorber wrapper + * + * Specialization for PML, works for both YeePML and LehePML + * + * @tparam T_CurrentInterpolation current interpolation for YeePML + * @tparam T_CurlE curl E for YeePML + * @tparam T_CurlB curl B for YeePML + */ + template + struct Absorber> + { + //! Number of absorber cells along the min x boundary + static constexpr uint32_t xNegativeNumCells = pml::NUM_CELLS[0][0]; + + //! Number of absorber cells along the max x boundary + static constexpr uint32_t xPositiveNumCells = pml::NUM_CELLS[0][1]; + + //! Number of absorber cells along the min y boundary + static constexpr uint32_t yNegativeNumCells = pml::NUM_CELLS[1][0]; + + //! Number of absorber cells along the max y boundary + static constexpr uint32_t yPositiveNumCells = pml::NUM_CELLS[1][1]; + + //! Number of absorber cells along the min z boundary + static constexpr uint32_t zNegativeNumCells = pml::NUM_CELLS[2][0]; + + //! Number of absorber cells along the max z boundary + static constexpr uint32_t zPositiveNumCells = pml::NUM_CELLS[2][1]; + + //! Get string properties of the absorber + static pmacc::traits::StringProperty getStringProperties() + { + return detail::getStringProperties("convolutional PML"); + } + }; + + } // namespace detail + + /** Absorber description implementing getStringProperties() + * + * To be used for writing absorber meta information, does not provide + * interface for running the absorber + */ + using Absorber = detail::Absorber; + + /** Number of absorber cells along each boundary + * + * Stores the global absorber thickness in case the absorbing boundary + * conditions are used along each boundary. Note that in case of periodic + * boundaries the corresponding values will be ignored. + * + * Is uniform for both PML and exponential damping absorbers. + * First index: 0 = x, 1 = y, 2 = z. + * Second index: 0 = negative (min coordinate), 1 = positive (max coordinate). + * Not for ODR-use. + */ + constexpr uint32_t numCells[3][2] + = {{Absorber::xNegativeNumCells, Absorber::xPositiveNumCells}, + {Absorber::yNegativeNumCells, Absorber::yPositiveNumCells}, + {Absorber::zNegativeNumCells, Absorber::zPositiveNumCells}}; + + //! Thickness of the absorbing layer + class Thickness + { + public: + //! Create a zero thickness + Thickness() + { + for(uint32_t axis = 0u; axis < 3u; axis++) + for(uint32_t direction = 0u; direction < 2u; direction++) + (*this)(axis, direction) = 0u; } - if( relDir[axis] > 0 ) - axisDir = 1; - std::string boundaryName = "open"; // absorbing boundary - if( isPeriodic ) - boundaryName = "periodic"; - - if( boundaryName == "open" ) + /** Get thickness for the given boundary + * + * @param axis axis, 0 = x, 1 = y, 2 = z + * @param direction direction, 0 = negative (min coordinate), + * 1 = positive (max coordinate) + */ + uint32_t operator()(uint32_t const axis, uint32_t const direction) const { - std::ostringstream boundaryParam; - boundaryParam << name + " over " - << numCells[axis][axisDir] << " cells"; - propList[directionName]["param"] = boundaryParam.str(); + return numCells[axis][direction]; } - else + + /** Get reference to thickness for the given boundary + * + * @param axis axis, 0 = x, 1 = y, 2 = z + * @param direction direction, 0 = negative (min coordinate), + * 1 = positive (max coordinate) + */ + uint32_t& operator()(uint32_t const axis, uint32_t const direction) { - propList[directionName]["param"] = "none"; + return numCells[axis][direction]; } - propList[directionName]["name"] = boundaryName; + private: + /** Number of absorber cells along each boundary + * + * First index: 0 = x, 1 = y, 2 = z. + * Second index: 0 = negative (min coordinate), 1 = positive (max coordinate). + */ + uint32_t numCells[3][2]; + }; + + /** Get absorber thickness in number of cells for the global domain + * + * This function takes into account which boundaries are periodic and + * absorbing. + */ + inline Thickness getGlobalThickness() + { + Thickness thickness; + for(uint32_t axis = 0u; axis < 3u; axis++) + for(uint32_t direction = 0u; direction < 2u; direction++) + thickness(axis, direction) = numCells[axis][direction]; + const DataSpace isPeriodicBoundary + = Environment::get().EnvironmentController().getCommunicator().getPeriodic(); + for(uint32_t axis = 0u; axis < 3u; axis++) + if(isPeriodicBoundary[axis]) + { + thickness(axis, 0) = 0u; + thickness(axis, 1) = 0u; + } + return thickness; + } + + /** Get absorber thickness in number of cells for the current local domain + * + * This function takes into account the current domain decomposition and + * which boundaries are periodic and absorbing. + * + * Note that unlike getGlobalThickness() result which does not change + * throughout the simulation, the local thickness can change. Thus, + * the result of this function should not be reused on another time step, + * but rather the function called again. + */ + inline Thickness getLocalThickness() + { + Thickness thickness = getGlobalThickness(); + auto const numExchanges = NumberOfExchanges::value; + auto const communicationMask = Environment::get().GridController().getCommunicationMask(); + for(uint32_t exchange = 1u; exchange < numExchanges; exchange++) + { + /* Here we are only interested in the positive and negative + * directions for x, y, z axes and not the "diagonal" ones. + * So skip other directions except left, right, top, bottom, + * back, front + */ + if(FRONT % exchange != 0) + continue; + + // Transform exchange into a pair of axis and direction + uint32_t axis = 0; + if(exchange >= BOTTOM && exchange <= TOP) + axis = 1; + if(exchange >= BACK) + axis = 2; + uint32_t direction = exchange % 2; + + // No absorber at the borders between two local domains + bool hasNeighbour = communicationMask.isSet(exchange); + if(hasNeighbour) + thickness(axis, direction) = 0u; + } + return thickness; } - } - return propList; - } -} // namespace detail + namespace detail + { + // Implementation has to be after numCells is defined + pmacc::traits::StringProperty getStringProperties(std::string const& name) + { + pmacc::traits::StringProperty propList; + const DataSpace periodic + = Environment::get().EnvironmentController().getCommunicator().getPeriodic(); + + for(uint32_t i = 1; i < NumberOfExchanges::value; ++i) + { + // for each planar direction: left right top bottom back front + if(FRONT % i == 0) + { + const std::string directionName = ExchangeTypeNames()[i]; + const DataSpace relDir = Mask::getRelativeDirections(i); + + bool isPeriodic = false; + uint32_t axis = 0; // x(0) y(1) z(2) + uint32_t axisDir = 0; // negative (0), positive (1) + for(uint32_t d = 0; d < simDim; d++) + { + if(relDir[d] * periodic[d] != 0) + isPeriodic = true; + if(relDir[d] != 0) + axis = d; + } + if(relDir[axis] > 0) + axisDir = 1; + + std::string boundaryName = "open"; // absorbing boundary + if(isPeriodic) + boundaryName = "periodic"; + + if(boundaryName == "open") + { + std::ostringstream boundaryParam; + boundaryParam << name + " over " << numCells[axis][axisDir] << " cells"; + propList[directionName]["param"] = boundaryParam.str(); + } + else + { + propList[directionName]["param"] = "none"; + } + + propList[directionName]["name"] = boundaryName; + } + } + return propList; + } + + } // namespace detail -} // namespace absorber -} // namespace fields + } // namespace absorber + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/absorber/ExponentialDamping.hpp b/include/picongpu/fields/absorber/ExponentialDamping.hpp index 392a413408..8fb34eab3f 100644 --- a/include/picongpu/fields/absorber/ExponentialDamping.hpp +++ b/include/picongpu/fields/absorber/ExponentialDamping.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -34,81 +34,74 @@ namespace picongpu { -namespace fields -{ -namespace absorber -{ - -class ExponentialDamping -{ -public: - - template - static void run(uint32_t currentStep, MappingDesc &cellDescription, BoxedMemory deviceBox) + namespace fields { - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); - for (uint32_t i = 1; i < NumberOfExchanges::value; ++i) + namespace absorber { - /* only call for planes: left right top bottom back front*/ - if (FRONT % i == 0 && !(Environment::get().GridController().getCommunicationMask().isSet(i))) + class ExponentialDamping { - uint32_t direction = 0; /*set direction to X (default)*/ - if (i >= BOTTOM && i <= TOP) - direction = 1; /*set direction to Y*/ - if (i >= BACK) - direction = 2; /*set direction to Z*/ - - /* exchange mod 2 to find positive or negative direction - * positive direction = 1 - * negative direction = 0 - */ - uint32_t pos_or_neg = i % 2; - - uint32_t thickness = absorber::numCells[direction][pos_or_neg]; - float_X absorber_strength = ABSORBER_STRENGTH[direction][pos_or_neg]; - - if (thickness == 0) continue; /*if the absorber has no thickness we check the next side*/ - - /* allow to enable the absorber on the top side if the laser - * initialization plane in y direction is *not* in cell zero - */ - if (fields::laserProfiles::Selected::initPlaneY == 0) + public: + template + static void run(uint32_t currentStep, MappingDesc& cellDescription, BoxedMemory deviceBox) { - /* disable the absorber on top side if - * no slide was performed and - * laser init time is not over - */ - if (numSlides == 0 && ((currentStep * DELTA_T) <= fields::laserProfiles::Selected::INIT_TIME)) + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + for(uint32_t i = 1; i < NumberOfExchanges::value; ++i) { - /* disable absorber on top side */ - if (i == TOP) continue; + /* only call for planes: left right top bottom back front*/ + if(FRONT % i == 0 + && !(Environment::get().GridController().getCommunicationMask().isSet(i))) + { + uint32_t direction = 0; /*set direction to X (default)*/ + if(i >= BOTTOM && i <= TOP) + direction = 1; /*set direction to Y*/ + if(i >= BACK) + direction = 2; /*set direction to Z*/ + + /* exchange mod 2 to find positive or negative direction + * positive direction = 1 + * negative direction = 0 + */ + uint32_t pos_or_neg = i % 2; + + uint32_t thickness = absorber::numCells[direction][pos_or_neg]; + float_X absorber_strength = ABSORBER_STRENGTH[direction][pos_or_neg]; + + if(thickness == 0) + continue; /*if the absorber has no thickness we check the next side*/ + + /* allow to enable the absorber on the top side if the laser + * initialization plane in y direction is *not* in cell zero + */ + if(fields::laserProfiles::Selected::initPlaneY == 0) + { + /* disable the absorber on top side if + * no slide was performed and + * laser init time is not over + */ + if(numSlides == 0 + && ((currentStep * DELTA_T) <= fields::laserProfiles::Selected::INIT_TIME)) + { + /* disable absorber on top side */ + if(i == TOP) + continue; + } + } + + /* if sliding window is active we disable absorber on bottom side*/ + if(MovingWindow::getInstance().isSlidingWindowActive(currentStep) && i == BOTTOM) + continue; + + ExchangeMapping mapper(cellDescription, i); + constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< + pmacc::math::CT::volume::type::value>::value; + + PMACC_KERNEL(KernelAbsorbBorder{}) + (mapper.getGridDim(), numWorkers)(deviceBox, thickness, absorber_strength, mapper); + } } } + }; - /* if sliding window is active we disable absorber on bottom side*/ - if (MovingWindow::getInstance().isSlidingWindowActive(currentStep) && i == BOTTOM) continue; - - ExchangeMapping mapper(cellDescription, i); - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( KernelAbsorbBorder< numWorkers> {} )( - mapper.getGridDim(), - numWorkers - )( - deviceBox, - thickness, - absorber_strength, - mapper - ); - } - } - } - -}; - -} // namespace absorber -} // namespace fields + } // namespace absorber + } // namespace fields } // namespace picongpu - diff --git a/include/picongpu/fields/absorber/ExponentialDamping.kernel b/include/picongpu/fields/absorber/ExponentialDamping.kernel index dd282dc48f..b31978aa33 100644 --- a/include/picongpu/fields/absorber/ExponentialDamping.kernel +++ b/include/picongpu/fields/absorber/ExponentialDamping.kernel @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -33,202 +33,153 @@ namespace picongpu { -namespace fields -{ -namespace absorber -{ -namespace detail -{ - - /** damp each field component at exchange the border - * - * @tparam T_NumWorkers, boost::mpl::integral_c number of workers - * @tparam T_Axis, boost::mpl::integral_c axis of the coordinate system - * (0 = x, 1 = y, 2 = z) - */ - template< - typename T_NumWorkers, - typename T_Axis - > - struct AbsorbInOneDirection + namespace fields { - /** absorb one direction - * - * The functor is only performed if `relExchangeDir[ T_Axis::value ] != 0`. - * - * @tparam T_BoxedMemory pmacc::DataBox, type of the field - * @tparam T_Mapping mapper functor type - * - * @param field field to manipulate - * @param thickness the thickness of the absorber area (in cells) - * @param absorberStrength strength of the absorber - * @param mapper functor to map a block to a supercell - * @param relExchangeDir relative direction for each dimension - * (-1 = negative; +1 = positive direction; 0 = direction not selected) - */ - template< - typename T_BoxedMemory, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_BoxedMemory & field, - uint32_t const thickness, - float_X const absorberStrength, - T_Mapping & mapper, - DataSpace< simDim > const & relExchangeDir - ) const + namespace absorber { - using namespace mappings::threads; - - constexpr int axis = T_Axis::value; - - // return if axis is not selected - if( relExchangeDir[ axis ] == 0 ) - return; - - using SuperCellSize = typename T_Mapping::SuperCellSize; - DataSpace< simDim > const superCellIdx( - mapper.getSuperCellIndex( DataSpace< simDim >( blockIdx ) ) - ); - - constexpr uint32_t numWorkers = T_NumWorkers::value; - constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume< SuperCellSize >::type::value; - - uint32_t const workerIdx = threadIdx.x; - - auto const numGuardSuperCells = mapper.getGuardingSuperCells(); - DataSpace< simDim > guardCells( numGuardSuperCells * SuperCellSize::toRT() ); - - // cell index of the supercell within the local domain (incl. the guards) - DataSpace< simDim > const localDomainCells = mapper.getGridSuperCells() * SuperCellSize::toRT(); - - using SuperCellDomCfg = IdxConfig< - cellsPerSuperCell, - numWorkers - >; - - ForEachIdx< - SuperCellDomCfg - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) + namespace detail + { + /** damp each field component at exchange the border + * + * @tparam T_NumWorkers, boost::mpl::integral_c number of workers + * @tparam T_Axis, boost::mpl::integral_c axis of the coordinate system + * (0 = x, 1 = y, 2 = z) + */ + template + struct AbsorbInOneDirection { - /* cell index within the superCell */ - DataSpace< simDim > const cellIdx = DataSpaceOperations< simDim >:: - template map< SuperCellSize >( linearIdx ); - - DataSpace< simDim > cell( superCellIdx * SuperCellSize::toRT( ) + cellIdx); - - - do + /** absorb one direction + * + * The functor is only performed if `relExchangeDir[ T_Axis::value ] != 0`. + * + * @tparam T_BoxedMemory pmacc::DataBox, type of the field + * @tparam T_Mapping mapper functor type + * + * @param field field to manipulate + * @param thickness the thickness of the absorber area (in cells) + * @param absorberStrength strength of the absorber + * @param mapper functor to map a block to a supercell + * @param relExchangeDir relative direction for each dimension + * (-1 = negative; +1 = positive direction; 0 = direction not selected) + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_BoxedMemory& field, + uint32_t const thickness, + float_X const absorberStrength, + T_Mapping& mapper, + DataSpace const& relExchangeDir) const { - cell[ axis ] += guardCells[ axis ] * -relExchangeDir[ axis ]; - int factor(0); - - if( relExchangeDir[ axis ] < 0 ) - { - factor = guardCells[ axis ] - cell[ axis ] + - thickness - 1; - } - else - { - factor = guardCells[ axis ] + cell[ axis ] - - localDomainCells[ axis ] + thickness; - } - - if( factor <= 0 ) - { - break; - } - else - { - float_X const a = math::exp( -absorberStrength * float_X( factor ) ); - field( cell ) = field( cell ) * a; - } - } while( true ); - } - ); - - } - }; + using namespace mappings::threads; + + constexpr int axis = T_Axis::value; + + // return if axis is not selected + if(relExchangeDir[axis] == 0) + return; + + using SuperCellSize = typename T_Mapping::SuperCellSize; + DataSpace const superCellIdx( + mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + + constexpr uint32_t numWorkers = T_NumWorkers::value; + constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + auto const numGuardSuperCells = mapper.getGuardingSuperCells(); + DataSpace guardCells(numGuardSuperCells * SuperCellSize::toRT()); + + // cell index of the supercell within the local domain (incl. the guards) + DataSpace const localDomainCells = mapper.getGridSuperCells() * SuperCellSize::toRT(); + + using SuperCellDomCfg = IdxConfig; + + ForEachIdx{workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + /* cell index within the superCell */ + DataSpace const cellIdx + = DataSpaceOperations::template map(linearIdx); + + DataSpace cell(superCellIdx * SuperCellSize::toRT() + cellIdx); + + + do + { + cell[axis] += guardCells[axis] * -relExchangeDir[axis]; + int factor(0); + + if(relExchangeDir[axis] < 0) + { + factor = guardCells[axis] - cell[axis] + thickness - 1; + } + else + { + factor = guardCells[axis] + cell[axis] - localDomainCells[axis] + thickness; + } + + if(factor <= 0) + { + break; + } + else + { + float_X const a = math::exp(-absorberStrength * float_X(factor)); + field(cell) = field(cell) * a; + } + } while(true); + }); + } + }; + + } // namespace detail + + /** damp each field's components at the outer cells of the global domain + * + * Done for one direction per call. + * + * @tparam T_numWorkers number of workers + */ + template + struct KernelAbsorbBorder + { + /** damp a field at the border + * + * @tparam T_BoxedMemory pmacc::DataBox, type of the field + * @tparam T_Mapping pmacc::ExchangeMapping, mapper functor type + * + * @param field filed to manipulate + * @param thickness the thickness of the absorber area (in cells) + * @param absorberStrength strength of the absorber (positive, exponential damping constant) + * @param mapper functor to map a block to a supercell, + * selects the direction of damping by the exchange type + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_BoxedMemory field, + uint32_t const thickness, + float_X const absorberStrength, + T_Mapping mapper) const + { + DataSpace const relExchangeDir + = Mask::getRelativeDirections(mapper.getExchangeType()); -} // namespace detail + /* create a sequence with int values [0;simDim) + * MakeSeq_t allows to use the result of mpl::range_c + * within the PMacc ForEach + */ + using SimulationDimensions = MakeSeq_t>; - /** damp each field's components at the outer cells of the global domain - * - * Done for one direction per call. - * - * @tparam T_numWorkers number of workers - */ - template< uint32_t T_numWorkers > - struct KernelAbsorbBorder - { - /** damp a field at the border - * - * @tparam T_BoxedMemory pmacc::DataBox, type of the field - * @tparam T_Mapping pmacc::ExchangeMapping, mapper functor type - * - * @param field filed to manipulate - * @param thickness the thickness of the absorber area (in cells) - * @param absorberStrength strength of the absorber (positive, exponential damping constant) - * @param mapper functor to map a block to a supercell, - * selects the direction of damping by the exchange type - */ - template< - typename T_BoxedMemory, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_BoxedMemory field, - uint32_t const thickness, - float_X const absorberStrength, - T_Mapping mapper - ) const - { + meta::ForEach< + SimulationDimensions, + detail::AbsorbInOneDirection, boost::mpl::_1>> + absorbInAllDirections; - DataSpace< simDim > const relExchangeDir = - Mask::getRelativeDirections< simDim >( mapper.getExchangeType( ) ); + absorbInAllDirections(acc, field, thickness, absorberStrength, mapper, relExchangeDir); + } + }; - /* create a sequence with int values [0;simDim) - * MakeSeq_t allows to use the result of mpl::range_c - * within the PMacc ForEach - */ - using SimulationDimensions = MakeSeq_t< - boost::mpl::range_c< - int, - 0, - int( simDim ) - > - >; - - meta::ForEach< - SimulationDimensions, - detail::AbsorbInOneDirection< - boost::mpl::integral_c< - uint32_t, - T_numWorkers - >, - boost::mpl::_1 - > - > absorbInAllDirections; - - absorbInAllDirections( - acc, - field, - thickness, - absorberStrength, - mapper, - relExchangeDir - ); - } - }; - -} // namespace absorber -} // namespace fields + } // namespace absorber + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/background/cellwiseOperation.hpp b/include/picongpu/fields/background/cellwiseOperation.hpp index 8186fc4f69..1ea3e10c95 100644 --- a/include/picongpu/fields/background/cellwiseOperation.hpp +++ b/include/picongpu/fields/background/cellwiseOperation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Rene Widera +/* Copyright 2014-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -32,166 +32,128 @@ namespace picongpu { -namespace cellwiseOperation -{ - - /** call a functor for each cell - * - * @tparam T_numWorkers number of workers - */ - template< uint32_t T_numWorkers > - struct KernelCellwiseOperation + namespace cellwiseOperation { - /** Kernel that calls T_OpFunctor and T_ValFunctor on each cell of a field - * - * performed code for each cell: - * @code{.cpp} - * opFunctor( acc, field, valFunctor( totalCellIdx, currentStep ) ); - * @endcode + /** call a functor for each cell * - * @tparam T_OpFunctor like assign, add, subtract, ... - * @tparam T_ValFunctor like "f(x,t)", "0.0", "readFromOtherField", ... - * @tparam T_FieldBox field type - * @tparam T_Mapping mapper which defines the working region - * @tparam T_Acc alpaka accelerator type - * - * @param acc alpaka accelerator - * @param[in,out] field field to manipulate - * @param opFunctor binary operator used with the old and functor value - * (collective functors are not supported) - * @param valFunctor functor to execute (collective functors are not supported) - * @param totalDomainOffset offset to the local domain relative to the origin of the global domain - * @param currentStep simulation time step - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_OpFunctor, - typename T_ValFunctor, - typename T_FieldBox, - typename T_Mapping, - typename T_Acc - > - DINLINE void - operator()( - T_Acc const & acc, - T_FieldBox field, - T_OpFunctor opFunctor, - T_ValFunctor valFunctor, - DataSpace< simDim > const totalDomainOffset, - uint32_t const currentStep, - T_Mapping mapper - ) const - { - using namespace mappings::threads; - constexpr uint32_t cellsPerSupercell = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorker = T_numWorkers; - - uint32_t const workerIdx = threadIdx.x; - - DataSpace< simDim > const block( mapper.getSuperCellIndex( DataSpace( blockIdx ) ) ); - DataSpace< simDim > const blockCell = block * SuperCellSize::toRT( ); - DataSpace< simDim > const guardCells = mapper.getGuardingSuperCells( ) * SuperCellSize::toRT( ); - - ForEachIdx< - IdxConfig< - cellsPerSupercell, - numWorker - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - // cell index within the superCell - DataSpace< simDim > const cellIdx = DataSpaceOperations< simDim >:: - template map< SuperCellSize >( linearIdx ); - - opFunctor( - acc, - field( blockCell + cellIdx ), - valFunctor( - blockCell + cellIdx + totalDomainOffset - guardCells, - currentStep - ) - ); - } - ); - } - }; - - /** Call a functor on each cell of a field - * - * \tparam T_Area Where to compute on (CORE, BORDER, GUARD) - */ - template< uint32_t T_Area > - class CellwiseOperation - { - private: - - MappingDesc m_cellDescription; - - public: - CellwiseOperation( MappingDesc const cellDescription ) : - m_cellDescription( cellDescription ) + template + struct KernelCellwiseOperation { - } - - /** Functor call to execute the op/valFunctor on a given field + /** Kernel that calls T_OpFunctor and T_ValFunctor on each cell of a field + * + * performed code for each cell: + * @code{.cpp} + * opFunctor( acc, field, valFunctor( totalCellIdx, currentStep ) ); + * @endcode + * + * @tparam T_OpFunctor like assign, add, subtract, ... + * @tparam T_ValFunctor like "f(x,t)", "0.0", "readFromOtherField", ... + * @tparam T_FieldBox field type + * @tparam T_Mapping mapper which defines the working region + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param[in,out] field field to manipulate + * @param opFunctor binary operator used with the old and functor value + * (collective functors are not supported) + * @param valFunctor functor to execute (collective functors are not supported) + * @param totalDomainOffset offset to the local domain relative to the origin of the global domain + * @param currentStep simulation time step + * @param mapper functor to map a block to a supercell + */ + template< + typename T_OpFunctor, + typename T_ValFunctor, + typename T_FieldBox, + typename T_Mapping, + typename T_Acc> + DINLINE void operator()( + T_Acc const& acc, + T_FieldBox field, + T_OpFunctor opFunctor, + T_ValFunctor valFunctor, + DataSpace const totalDomainOffset, + uint32_t const currentStep, + T_Mapping mapper) const + { + using namespace mappings::threads; + constexpr uint32_t cellsPerSupercell = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorker = T_numWorkers; + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + DataSpace const block(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + DataSpace const blockCell = block * SuperCellSize::toRT(); + DataSpace const guardCells = mapper.getGuardingSuperCells() * SuperCellSize::toRT(); + + ForEachIdx>{workerIdx}( + [&](uint32_t const linearIdx, uint32_t const) { + // cell index within the superCell + DataSpace const cellIdx + = DataSpaceOperations::template map(linearIdx); + + opFunctor( + acc, + field(blockCell + cellIdx), + valFunctor(blockCell + cellIdx + totalDomainOffset - guardCells, currentStep)); + }); + } + }; + + /** Call a functor on each cell of a field * - * @tparam ValFunctor A Value-Producing functor for a given cell - * in time and space - * @tparam OpFunctor A manipulating functor like pmacc::nvidia::functors::add + * \tparam T_Area Where to compute on (CORE, BORDER, GUARD) */ - template< - typename T_Field, - typename T_OpFunctor, - typename T_ValFunctor - > - void - operator()( - T_Field field, - T_OpFunctor opFunctor, - T_ValFunctor valFunctor, - uint32_t const currentStep, - const bool enabled = true - ) const + template + class CellwiseOperation { - if( !enabled ) - return; - - SubGrid< simDim > const & subGrid = Environment< simDim >::get( ).SubGrid(); - // offset to the local domain relative to the origin of the global domain - DataSpace< simDim > totalDomainOffset( subGrid.getLocalDomain( ).offset ); - uint32_t const numSlides = MovingWindow::getInstance( ).getSlideCounter( currentStep ); - - /** Assumption: all GPUs have the same number of cells in - * y direction for sliding window + private: + MappingDesc m_cellDescription; + + public: + CellwiseOperation(MappingDesc const cellDescription) : m_cellDescription(cellDescription) + { + } + + /** Functor call to execute the op/valFunctor on a given field + * + * @tparam ValFunctor A Value-Producing functor for a given cell + * in time and space + * @tparam OpFunctor A manipulating functor like pmacc::nvidia::functors::add */ - totalDomainOffset.y( ) += numSlides * subGrid.getLocalDomain().size.y( ); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - AreaMapping< - T_Area, - MappingDesc - > mapper( m_cellDescription ); - - PMACC_KERNEL( KernelCellwiseOperation< numWorkers >{ })( - mapper.getGridDim( ), - numWorkers - )( - field->getDeviceDataBox( ), - opFunctor, - valFunctor, - totalDomainOffset, - currentStep, - mapper - ); - } - }; - -} // namespace cellwiseOperation + template + void operator()( + T_Field field, + T_OpFunctor opFunctor, + T_ValFunctor valFunctor, + uint32_t const currentStep, + const bool enabled = true) const + { + if(!enabled) + return; + + SubGrid const& subGrid = Environment::get().SubGrid(); + // offset to the local domain relative to the origin of the global domain + DataSpace totalDomainOffset(subGrid.getLocalDomain().offset); + uint32_t const numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + + /** Assumption: all GPUs have the same number of cells in + * y direction for sliding window + */ + totalDomainOffset.y() += numSlides * subGrid.getLocalDomain().size.y(); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + AreaMapping mapper(m_cellDescription); + + PMACC_KERNEL(KernelCellwiseOperation{}) + (mapper.getGridDim(), + numWorkers)(field->getDeviceDataBox(), opFunctor, valFunctor, totalDomainOffset, currentStep, mapper); + } + }; + + } // namespace cellwiseOperation } // namespace picongpu diff --git a/include/picongpu/fields/background/templates/TWTS/BField.hpp b/include/picongpu/fields/background/templates/TWTS/BField.hpp index 1fe49f322d..11d8fc943f 100644 --- a/include/picongpu/fields/background/templates/TWTS/BField.hpp +++ b/include/picongpu/fields/background/templates/TWTS/BField.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Alexander Debus, Axel Huebl +/* Copyright 2014-2021 Alexander Debus, Axel Huebl * * This file is part of PIConGPU. * @@ -28,168 +28,159 @@ namespace picongpu { -/* Load pre-defined background field */ -namespace templates -{ -/* Traveling-wave Thomson scattering laser pulse */ -namespace twts -{ - -class BField -{ -public: - using float_T = float_X; - - enum PolarizationType + /* Load pre-defined background field */ + namespace templates { - /* The linear polarization of the TWTS laser is defined - * relative to the plane of the pulse front tilt (reference plane). - * - * Polarisation is normal to the reference plane. - * Use Ex-fields (and corresponding B-fields) in TWTS laser internal coordinate system. - */ - LINEAR_X = 1u, - /* Polarization lies within the reference plane. - * Use Ey-fields (and corresponding B-fields) in TWTS laser internal coordinate system. - */ - LINEAR_YZ = 2u, - }; - - /* Center of simulation volume in number of cells */ - PMACC_ALIGN(halfSimSize,DataSpace); - /* y-position of TWTS coordinate origin inside the simulation coordinates [meter] - * The other origin coordinates (x and z) default to globally centered values - * with respect to the simulation volume. - */ - PMACC_ALIGN(focus_y_SI, const float_64); - /* Laser wavelength [meter] */ - PMACC_ALIGN(wavelength_SI, const float_64); - /* TWTS laser pulse duration [second] */ - PMACC_ALIGN(pulselength_SI, const float_64); - /* line focus height of TWTS pulse [meter] */ - PMACC_ALIGN(w_x_SI, const float_64); - /* line focus width of TWTS pulse [meter] */ - PMACC_ALIGN(w_y_SI, const float_64); - /* interaction angle between TWTS laser propagation vector and the y-axis [rad] */ - PMACC_ALIGN(phi, const float_X); - /* Takes value 1.0 for phi > 0 and -1.0 for phi < 0. */ - PMACC_ALIGN(phiPositive,float_X); - /* propagation speed of TWTS laser overlap - normalized to the speed of light. [Default: beta0 = 1.0] */ - PMACC_ALIGN(beta_0, const float_X); - /* If auto_tdelay=FALSE, then a user defined delay is used. [second] */ - PMACC_ALIGN(tdelay_user_SI, const float_64); - /* Make time step constant accessible to device. */ - PMACC_ALIGN(dt, const float_64); - /* Make length normalization constant accessible to device. */ - PMACC_ALIGN(unit_length, const float_64); - /* TWTS laser time delay */ - PMACC_ALIGN(tdelay,float_64); - /* Should the TWTS laser time delay be chosen automatically, such that - * the laser gradually enters the simulation volume? [Default: TRUE] - */ - PMACC_ALIGN(auto_tdelay, const bool); - /* Polarization of TWTS laser */ - PMACC_ALIGN(pol, const PolarizationType); - - /** Magnetic field of the TWTS laser - * - * \param focus_y_SI the distance to the laser focus in y-direction [m] - * \param wavelength_SI central wavelength [m] - * \param pulselength_SI sigma of std. gauss for intensity (E^2), - * pulselength_SI = FWHM_of_Intensity / 2.35482 [seconds (sigma)] - * \param w_x beam waist: distance from the axis where the pulse electric field - * decreases to its 1/e^2-th part at the focus position of the laser [m] - * \param w_y \see w_x - * \param phi interaction angle between TWTS laser propagation vector and - * the y-axis [rad, default = 90.*(PI/180.)] - * \param beta_0 propagation speed of overlap normalized to - * the speed of light [c, default = 1.0] - * \param tdelay_user manual time delay if auto_tdelay is false - * \param auto_tdelay calculate the time delay such that the TWTS pulse is not - * inside the simulation volume at simulation start timestep = 0 [default = true] - * \param pol determines the TWTS laser polarization, which is either normal or parallel - * to the laser pulse front tilt plane [ default= LINEAR_X , LINEAR_YZ ] - */ - HINLINE - BField( const float_64 focus_y_SI, - const float_64 wavelength_SI, - const float_64 pulselength_SI, - const float_64 w_x_SI, - const float_64 w_y_SI, - const float_X phi = 90.*(PI / 180.), - const float_X beta_0 = 1.0, - const float_64 tdelay_user_SI = 0.0, - const bool auto_tdelay = true, - const PolarizationType pol = LINEAR_X ); - - - /** Specify your background field B(r,t) here - * - * \param cellIdx The total cell id counted from the start at t=0 - * \param currentStep The current time step */ - HDINLINE float3_X - operator()( const DataSpace& cellIdx, - const uint32_t currentStep ) const; - - /** Calculate the By(r,t) field, when electric field vector (Ex,0,0) - * is normal to the pulse-front-tilt plane (y,z) - * - * \param pos Spatial position of the target field. - * \param time Absolute time (SI, including all offsets and transformations) - * for calculating the field */ - HDINLINE float_T - calcTWTSBy( const float3_64& pos, const float_64 time ) const; - - /** Calculate the Bz(r,t) field, when electric field vector (Ex,0,0) - * is normal to the pulse-front-tilt plane (y,z) - * - * \param pos Spatial position of the target field. - * \param time Absolute time (SI, including all offsets and transformations) - * for calculating the field */ - HDINLINE float_T - calcTWTSBz_Ex( const float3_64& pos, const float_64 time ) const; - - /** Calculate the By(r,t) field, when electric field vector (0,Ey,0) - * lies within the pulse-front-tilt plane (y,z) - * - * \param pos Spatial position of the target field. - * \param time Absolute time (SI, including all offsets and transformations) - * for calculating the field */ - HDINLINE float_T - calcTWTSBx( const float3_64& pos, const float_64 time ) const; - - /** Calculate the Bz(r,t) field here (electric field vector (0,Ey,0) - * lies within the pulse-front-tilt plane (y,z) - * - * \param pos Spatial position of the target field. - * \param time Absolute time (SI, including all offsets and transformations) - * for calculating the field */ - HDINLINE float_T - calcTWTSBz_Ey( const float3_64& pos, const float_64 time ) const; - - /** Calculate the B-field vector of the TWTS laser in SI units. - * \tparam T_dim Specializes for the simulation dimension - * \param cellIdx The total cell id counted from the start at timestep 0 - * \return B-field vector of the rotated TWTS field in SI units */ - template - HDINLINE float3_X - getTWTSBfield_Normalized( - const pmacc::math::Vector& eFieldPositions_SI, - const float_64 time) const; - - /** Calculate the B-field vector of the "in-plane" polarized TWTS laser in SI units. - * \tparam T_dim Specializes for the simulation dimension - * \param cellIdx The total cell id counted from the start at timestep 0 - * \return B-field vector of the rotated TWTS field in SI units */ - template - HDINLINE float3_X - getTWTSBfield_Normalized_Ey( - const pmacc::math::Vector& eFieldPositions_SI, - const float_64 time) const; - -}; - -} /* namespace twts */ -} /* namespace templates */ + /* Traveling-wave Thomson scattering laser pulse */ + namespace twts + { + class BField + { + public: + using float_T = float_X; + + enum PolarizationType + { + /* The linear polarization of the TWTS laser is defined + * relative to the plane of the pulse front tilt (reference plane). + * + * Polarisation is normal to the reference plane. + * Use Ex-fields (and corresponding B-fields) in TWTS laser internal coordinate system. + */ + LINEAR_X = 1u, + /* Polarization lies within the reference plane. + * Use Ey-fields (and corresponding B-fields) in TWTS laser internal coordinate system. + */ + LINEAR_YZ = 2u, + }; + + /* Center of simulation volume in number of cells */ + PMACC_ALIGN(halfSimSize, DataSpace); + /* y-position of TWTS coordinate origin inside the simulation coordinates [meter] + * The other origin coordinates (x and z) default to globally centered values + * with respect to the simulation volume. + */ + PMACC_ALIGN(focus_y_SI, const float_64); + /* Laser wavelength [meter] */ + PMACC_ALIGN(wavelength_SI, const float_64); + /* TWTS laser pulse duration [second] */ + PMACC_ALIGN(pulselength_SI, const float_64); + /* line focus height of TWTS pulse [meter] */ + PMACC_ALIGN(w_x_SI, const float_64); + /* line focus width of TWTS pulse [meter] */ + PMACC_ALIGN(w_y_SI, const float_64); + /* interaction angle between TWTS laser propagation vector and the y-axis [rad] */ + PMACC_ALIGN(phi, const float_X); + /* Takes value 1.0 for phi > 0 and -1.0 for phi < 0. */ + PMACC_ALIGN(phiPositive, float_X); + /* propagation speed of TWTS laser overlap + normalized to the speed of light. [Default: beta0 = 1.0] */ + PMACC_ALIGN(beta_0, const float_X); + /* If auto_tdelay=FALSE, then a user defined delay is used. [second] */ + PMACC_ALIGN(tdelay_user_SI, const float_64); + /* Make time step constant accessible to device. */ + PMACC_ALIGN(dt, const float_64); + /* Make length normalization constant accessible to device. */ + PMACC_ALIGN(unit_length, const float_64); + /* TWTS laser time delay */ + PMACC_ALIGN(tdelay, float_64); + /* Should the TWTS laser time delay be chosen automatically, such that + * the laser gradually enters the simulation volume? [Default: TRUE] + */ + PMACC_ALIGN(auto_tdelay, const bool); + /* Polarization of TWTS laser */ + PMACC_ALIGN(pol, const PolarizationType); + + /** Magnetic field of the TWTS laser + * + * \param focus_y_SI the distance to the laser focus in y-direction [m] + * \param wavelength_SI central wavelength [m] + * \param pulselength_SI sigma of std. gauss for intensity (E^2), + * pulselength_SI = FWHM_of_Intensity / 2.35482 [seconds (sigma)] + * \param w_x beam waist: distance from the axis where the pulse electric field + * decreases to its 1/e^2-th part at the focus position of the laser [m] + * \param w_y \see w_x + * \param phi interaction angle between TWTS laser propagation vector and + * the y-axis [rad, default = 90.*(PI/180.)] + * \param beta_0 propagation speed of overlap normalized to + * the speed of light [c, default = 1.0] + * \param tdelay_user manual time delay if auto_tdelay is false + * \param auto_tdelay calculate the time delay such that the TWTS pulse is not + * inside the simulation volume at simulation start timestep = 0 [default = true] + * \param pol determines the TWTS laser polarization, which is either normal or parallel + * to the laser pulse front tilt plane [ default= LINEAR_X , LINEAR_YZ ] + */ + HINLINE + BField( + const float_64 focus_y_SI, + const float_64 wavelength_SI, + const float_64 pulselength_SI, + const float_64 w_x_SI, + const float_64 w_y_SI, + const float_X phi = 90. * (PI / 180.), + const float_X beta_0 = 1.0, + const float_64 tdelay_user_SI = 0.0, + const bool auto_tdelay = true, + const PolarizationType pol = LINEAR_X); + + + /** Specify your background field B(r,t) here + * + * \param cellIdx The total cell id counted from the start at t=0 + * \param currentStep The current time step */ + HDINLINE float3_X operator()(const DataSpace& cellIdx, const uint32_t currentStep) const; + + /** Calculate the By(r,t) field, when electric field vector (Ex,0,0) + * is normal to the pulse-front-tilt plane (y,z) + * + * \param pos Spatial position of the target field. + * \param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE float_T calcTWTSBy(const float3_64& pos, const float_64 time) const; + + /** Calculate the Bz(r,t) field, when electric field vector (Ex,0,0) + * is normal to the pulse-front-tilt plane (y,z) + * + * \param pos Spatial position of the target field. + * \param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE float_T calcTWTSBz_Ex(const float3_64& pos, const float_64 time) const; + + /** Calculate the By(r,t) field, when electric field vector (0,Ey,0) + * lies within the pulse-front-tilt plane (y,z) + * + * \param pos Spatial position of the target field. + * \param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE float_T calcTWTSBx(const float3_64& pos, const float_64 time) const; + + /** Calculate the Bz(r,t) field here (electric field vector (0,Ey,0) + * lies within the pulse-front-tilt plane (y,z) + * + * \param pos Spatial position of the target field. + * \param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE float_T calcTWTSBz_Ey(const float3_64& pos, const float_64 time) const; + + /** Calculate the B-field vector of the TWTS laser in SI units. + * \tparam T_dim Specializes for the simulation dimension + * \param cellIdx The total cell id counted from the start at timestep 0 + * \return B-field vector of the rotated TWTS field in SI units */ + template + HDINLINE float3_X getTWTSBfield_Normalized( + const pmacc::math::Vector& eFieldPositions_SI, + const float_64 time) const; + + /** Calculate the B-field vector of the "in-plane" polarized TWTS laser in SI units. + * \tparam T_dim Specializes for the simulation dimension + * \param cellIdx The total cell id counted from the start at timestep 0 + * \return B-field vector of the rotated TWTS field in SI units */ + template + HDINLINE float3_X getTWTSBfield_Normalized_Ey( + const pmacc::math::Vector& eFieldPositions_SI, + const float_64 time) const; + }; + + } /* namespace twts */ + } /* namespace templates */ } /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/TWTS/BField.tpp b/include/picongpu/fields/background/templates/TWTS/BField.tpp index 89a29ba74a..b0dfdf918d 100644 --- a/include/picongpu/fields/background/templates/TWTS/BField.tpp +++ b/include/picongpu/fields/background/templates/TWTS/BField.tpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Alexander Debus, Axel Huebl +/* Copyright 2014-2021 Alexander Debus, Axel Huebl * * This file is part of PIConGPU. * @@ -37,695 +37,674 @@ namespace picongpu { -/** Load pre-defined background field */ -namespace templates -{ -/** Traveling-wave Thomson scattering laser pulse */ -namespace twts -{ - - HINLINE - BField::BField( const float_64 focus_y_SI, - const float_64 wavelength_SI, - const float_64 pulselength_SI, - const float_64 w_x_SI, - const float_64 w_y_SI, - const float_X phi, - const float_X beta_0, - const float_64 tdelay_user_SI, - const bool auto_tdelay, - const PolarizationType pol ) : - focus_y_SI(focus_y_SI), wavelength_SI(wavelength_SI), - pulselength_SI(pulselength_SI), w_x_SI(w_x_SI), - w_y_SI(w_y_SI), phi(phi), beta_0(beta_0), - tdelay_user_SI(tdelay_user_SI), dt(SI::DELTA_T_SI), - unit_length(UNIT_LENGTH), auto_tdelay(auto_tdelay), pol(pol), phiPositive( float_X(1.0) ) - { - /* Note: Enviroment-objects cannot be instantiated on CUDA GPU device. Since this is done - * on host (see fieldBackground.param), this is no problem. - */ - const SubGrid& subGrid = Environment::get().SubGrid(); - halfSimSize = subGrid.getGlobalDomain().size / 2; - tdelay = detail::getInitialTimeDelay_SI(auto_tdelay, tdelay_user_SI, - halfSimSize, pulselength_SI, - focus_y_SI, phi, beta_0); - if ( phi < float_X(0.0) ) phiPositive = float_X(-1.0); - } - - template<> - HDINLINE float3_X - BField::getTWTSBfield_Normalized( - const pmacc::math::Vector& bFieldPositions_SI, - const float_64 time) const - { - typedef pmacc::math::Vector PosVecVec; - PosVecVec pos(PosVecVec::create( - float3_64::create(0.0) - )); - - for (uint32_t k = 0; k - HDINLINE float3_X - BField::getTWTSBfield_Normalized_Ey( - const pmacc::math::Vector& bFieldPositions_SI, - const float_64 time) const - { - typedef pmacc::math::Vector PosVecVec; - PosVecVec pos(PosVecVec::create( - float3_64::create(0.0) - )); - - for (uint32_t k = 0; k - HDINLINE float3_X - BField::getTWTSBfield_Normalized( - const pmacc::math::Vector& bFieldPositions_SI, - const float_64 time) const + /** Load pre-defined background field */ + namespace templates { - typedef pmacc::math::Vector PosVecVec; - PosVecVec pos(PosVecVec::create( - float3_64::create(0.0) - )); - - for (uint32_t k = 0; k z (Meaning: In 2D-sim, insert cell-coordinate x - * into TWTS field function coordinate z.) - * y --> y - * z --> -x (Since z=0 for 2D, we use the existing - * 3D TWTS-field-function and set x = -0) - * The transformed 3D coordinates are used to calculate the field components. - * Ex --> Ez (Meaning: Calculate Ex-component of existing 3D TWTS-field (calcTWTSEx) using - * transformed position vectors to obtain the corresponding Ez-component in 2D. - * Note: Swapping field component coordinates also alters the - * intra-cell position offset.) - * By --> By - * Bz --> -Bx (Yes, the sign is necessary.) - * - * An example of intra-cell position offsets is the staggered Yee-grid. - * - * This procedure is analogous to 3D case, but replace By --> By and Bz --> -Bx. Hence the - * grid cell offset for Bx has to be used instead of Bz. Mind the "-"-sign. - */ - - /* Calculate By-component with the intra-cell offset of a By-field */ - const float_64 By_By = calcTWTSBy(pos[1], time); - /* Calculate Bx-component with the intra-cell offset of a By-field */ - const float_64 Bx_By = -calcTWTSBz_Ex(pos[1], time); - /* Calculate By-component with the intra-cell offset of a Bx-field */ - const float_64 By_Bx = calcTWTSBy(pos[0], time); - /* Calculate Bx-component with the intra-cell offset of a Bx-field */ - const float_64 Bx_Bx = -calcTWTSBz_Ex(pos[0], time); - /* Since we rotated all position vectors before calling calcTWTSBy and calcTWTSBz_Ex, we - * need to back-rotate the resulting B-field vector. Now the rotation is done - * analogously in the (y,x)-plane. (Reverse of the position vector transformation.) - * - * RotationMatrix[-(PI / 2+phi)].(By,Bx) for rotating back the field vectors. - */ - const float_64 By_rot = -math::sin(phi)*By_By+math::cos(phi)*Bx_By; - const float_64 Bx_rot = -math::cos(phi)*By_Bx-math::sin(phi)*Bx_Bx; - - /* Finally, the B-field normalized to the peak amplitude. */ - return float3_X( float_X(Bx_rot), - float_X(By_rot), - float_X(0.0) ); - } - - template<> - HDINLINE float3_X - BField::getTWTSBfield_Normalized_Ey( - const pmacc::math::Vector& bFieldPositions_SI, - const float_64 time) const - { - typedef pmacc::math::Vector PosVecVec; - PosVecVec pos(PosVecVec::create( - float3_64::create(0.0) - )); - - for (uint32_t k = 0; k z (Meaning: In 2D-sim, insert cell-coordinate x - * into TWTS field function coordinate z.) - * y --> y - * z --> -x (Since z=0 for 2D, we use the existing - * 3D TWTS-field-function and set x = -0) - * Ex --> Ez (Meaning: Calculate Ex-component of existing 3D TWTS-field to obtain - * corresponding Ez-component in 2D. - * Note: the intra-cell position offset due to the staggered grid for Ez.) - * By --> By - * Bz --> -Bx (Yes, the sign is necessary.) - * - * This procedure is analogous to 3D case, but replace By --> By and Bz --> -Bx. Hence the - * grid cell offset for Bx has to be used instead of Bz. Mind the -sign. - */ - - /* Calculate Bx-component with the intra-cell offset of a By-field */ - const float_64 Bx_By = -calcTWTSBz_Ex(pos[1], time); - /* Calculate Bx-component with the intra-cell offset of a Bx-field */ - const float_64 Bx_Bx = -calcTWTSBz_Ex(pos[0], time); - - /* Since we rotated all position vectors before calling calcTWTSBz_Ex, we - * need to back-rotate the resulting B-field vector. Now the rotation is done - * analogously in the (y,x)-plane. (Reverse of the position vector transformation.) - * - * RotationMatrix[-(PI / 2+phi)].(By,Bx) - * for rotating back the field-vectors. - */ - const float_64 By_rot = +math::cos(phi)*Bx_By; - const float_64 Bx_rot = -math::sin(phi)*Bx_Bx; - - /* Finally, the B-field normalized to the peak amplitude. */ - return float3_X( float_X( Bx_rot ), - float_X( By_rot ), - float_X( calcTWTSBx(pos[2], time) ) ); - } - - HDINLINE float3_X - BField::operator()( const DataSpace& cellIdx, - const uint32_t currentStep ) const - { - const float_64 time_SI = float_64(currentStep) * dt - tdelay; - const traits::FieldPosition fieldPosB; - - const pmacc::math::Vector bFieldPositions_SI = - detail::getFieldPositions_SI(cellIdx, halfSimSize, - fieldPosB(), unit_length, focus_y_SI, phi); - /* Single TWTS-Pulse */ - switch (pol) + /** Traveling-wave Thomson scattering laser pulse */ + namespace twts { - case LINEAR_X : - return getTWTSBfield_Normalized(bFieldPositions_SI, time_SI); - - case LINEAR_YZ : - return getTWTSBfield_Normalized_Ey(bFieldPositions_SI, time_SI); - } - return getTWTSBfield_Normalized(bFieldPositions_SI, time_SI); // defensive default - } - - /** Calculate the By(r,t) field here - * - * \param pos Spatial position of the target field. - * \param time Absolute time (SI, including all offsets and transformations) - * for calculating the field */ - HDINLINE BField::float_T - BField::calcTWTSBy( const float3_64& pos, const float_64 time ) const - { - using complex_T = pmacc::math::Complex< float_T >; - using complex_64 = pmacc::math::Complex< float_64 >; - /* Unit of speed */ - const float_64 UNIT_SPEED = SI::SPEED_OF_LIGHT_SI; - /* Unit of time */ - const float_64 UNIT_TIME = SI::DELTA_T_SI; - /* Unit of length */ - const float_64 UNIT_LENGTH = UNIT_TIME*UNIT_SPEED; - - /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ - const float_T beta0 = float_T(beta_0); - /* If phi < 0 the formulas below are not directly applicable. - * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the - * z-axis of the coordinate system in this function. - */ - const float_T phiReal = float_T( math::abs(phi) ); - const float_T alphaTilt = math::atan2(float_T(1.0)-beta0*math::cos(phiReal), - beta0*math::sin(phiReal)); - /* Definition of the laser pulse front tilt angle for the laser field below. - * - * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the - * local "phi_T" not equal in value to the object member "phiReal" or "phi"? - * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system - * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence - * the dispersion will (although physically correct) be slightly off the ideal TWTS - * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for - * scenarios close to beta0 = 1. - */ - const float_T phiT = float_T(2.0)*alphaTilt; - - /* Angle between the laser pulse front and the y-axis. Not used, but remains in code for - * documentation purposes. - * const float_T eta = float_T(PI/2) - (phiReal - alphaTilt); - */ - - const float_T cspeed = float_T( SI::SPEED_OF_LIGHT_SI / UNIT_SPEED ); - const float_T lambda0 = float_T(wavelength_SI / UNIT_LENGTH); - const float_T om0 = float_T(2.0*PI*cspeed / lambda0); - /* factor 2 in tauG arises from definition convention in laser formula */ - const float_T tauG = float_T(pulselength_SI*2.0 / UNIT_TIME); - /* w0 is wx here --> w0 could be replaced by wx */ - const float_T w0 = float_T(w_x_SI / UNIT_LENGTH); - const float_T rho0 = float_T(PI*w0*w0 / lambda0); - /* wy is width of TWTS pulse */ - const float_T wy = float_T(w_y_SI / UNIT_LENGTH); - const float_T k = float_T(2.0*PI / lambda0); - /* If phi < 0 the entire pulse is rotated by 180 deg around the - * z-axis of the coordinate system without also changing - * the orientation of the resulting field vectors. - */ - const float_T x = float_T(phiPositive * pos.x() / UNIT_LENGTH); - const float_T y = float_T(phiPositive * pos.y() / UNIT_LENGTH); - const float_T z = float_T(pos.z() / UNIT_LENGTH); - const float_T t = float_T(time / UNIT_TIME); - - /* Shortcuts for speeding up the field calculation. */ - const float_T sinPhi = math::sin(phiT); - const float_T cosPhi = math::cos(phiT); - const float_T cosPhi2 = math::cos(phiT / 2.0); - const float_T tanPhi2 = math::tan(phiT / 2.0); - - /* The "helpVar" variables decrease the nesting level of the evaluated expressions and - * thus help with formal code verification through manual code inspection. - */ - const complex_T helpVar1 = rho0 + complex_T(0,1)*y*cosPhi + complex_T(0,1)*z*sinPhi; - const complex_T helpVar2 = cspeed*om0*tauG*tauG + complex_T(0,2) - *(-z - y*math::tan(float_T(PI / 2)-phiT))*tanPhi2*tanPhi2; - const complex_T helpVar3 = complex_T(0,1)*rho0 - y*cosPhi - z*sinPhi; - - const complex_T helpVar4 = float_T(-1.0)*( - cspeed*cspeed*k*om0*tauG*tauG*wy*wy*x*x - + float_T(2.0)*cspeed*cspeed*om0*t*t*wy*wy*rho0 - - complex_T(0,2)*cspeed*cspeed*om0*om0*t*tauG*tauG*wy*wy*rho0 - + float_T(2.0)*cspeed*cspeed*om0*tauG*tauG*y*y*rho0 - - float_T(4.0)*cspeed*om0*t*wy*wy*z*rho0 - + complex_T(0,2)*cspeed*om0*om0*tauG*tauG*wy*wy*z*rho0 - + float_T(2.0)*om0*wy*wy*z*z*rho0 - + float_T(4.0)*cspeed*om0*t*wy*wy*y*rho0*tanPhi2 - - float_T(4.0)*om0*wy*wy*y*z*rho0*tanPhi2 - - complex_T(0,2)*cspeed*k*wy*wy*x*x*z*tanPhi2*tanPhi2 - + float_T(2.0)*om0*wy*wy*y*y*rho0*tanPhi2*tanPhi2 - - float_T(4.0)*cspeed*om0*t*wy*wy*z*rho0*tanPhi2*tanPhi2 - - complex_T(0,4)*cspeed*y*y*z*rho0*tanPhi2*tanPhi2 - + float_T(4.0)*om0*wy*wy*z*z*rho0*tanPhi2*tanPhi2 - - complex_T(0,2)*cspeed*k*wy*wy*x*x*y*math::tan(float_T(PI / 2)-phiT)*tanPhi2*tanPhi2 - - float_T(4.0)*cspeed*om0*t*wy*wy*y*rho0*math::tan(float_T(PI / 2)-phiT) - *tanPhi2*tanPhi2 - - complex_T(0,4)*cspeed*y*y*y*rho0*math::tan(float_T(PI / 2)-phiT)*tanPhi2*tanPhi2 - + float_T(4.0)*om0*wy*wy*y*z*rho0*math::tan(float_T(PI / 2)-phiT)*tanPhi2*tanPhi2 - + float_T(2.0)*z*sinPhi*( - + om0*( - + cspeed*cspeed*( - complex_T(0,1)*t*t*wy*wy - + om0*t*tauG*tauG*wy*wy - + complex_T(0,1)*tauG*tauG*y*y - ) - - cspeed*(complex_T(0,2)*t + om0*tauG*tauG)*wy*wy*z - + complex_T(0,1)*wy*wy*z*z - ) - + complex_T(0,2)*om0*wy*wy*y*(cspeed*t - z)*tanPhi2 - + complex_T(0,1)*tanPhi2*tanPhi2*( - complex_T(0,-2)*cspeed*y*y*z - + om0*wy*wy*( y*y - float_T(2.0)*(cspeed*t - z)*z ) - ) - ) - + float_T(2.0)*y*cosPhi*( - + om0*( - + cspeed*cspeed*( - complex_T(0,1)*t*t*wy*wy - + om0*t*tauG*tauG*wy*wy - + complex_T(0,1)*tauG*tauG*y*y - ) - - cspeed*(complex_T(0,2)*t + om0*tauG*tauG)*wy*wy*z - + complex_T(0,1)*wy*wy*z*z - ) - + complex_T(0,2)*om0*wy*wy*y*(cspeed*t - z)*tanPhi2 - + complex_T(0,1)*( - complex_T(0,-4)*cspeed*y*y*z - + om0*wy*wy*(y*y - float_T(4.0)*(cspeed*t - z)*z) - - float_T(2.0)*y*( - + cspeed*om0*t*wy*wy - + complex_T(0,1)*cspeed*y*y - - om0*wy*wy*z - )*math::tan(float_T(PI / 2)-phiT) - )*tanPhi2*tanPhi2 - ) - /* The "round-trip" conversion in the line below fixes a gross accuracy bug - * in floating-point arithmetics, when float_T is set to float_X. - */ - ) * complex_T( float_64(1.0) / complex_64(float_T(2.0)*cspeed*wy*wy*helpVar1*helpVar2) ); - - const complex_T helpVar5 = complex_T(0,-1)*cspeed*om0*tauG*tauG - + (-z - y*math::tan(float_T(PI / 2)-phiT)) - *tanPhi2*tanPhi2*float_T(2.0); - const complex_T helpVar6 = (cspeed*(cspeed*om0*tauG*tauG + complex_T(0,2) - *(-z - y*math::tan(float_T(PI / 2)-phiT))*tanPhi2*tanPhi2)) - / (om0*rho0); - const complex_T result = (math::exp(helpVar4)*tauG / cosPhi2 / cosPhi2 - *(rho0 + complex_T(0,1)*y*cosPhi + complex_T(0,1)*z*sinPhi) - *( - complex_T(0,2)*cspeed*t + cspeed*om0*tauG*tauG - complex_T(0,4)*z - + cspeed*(complex_T(0,2)*t + om0*tauG*tauG)*cosPhi - + complex_T(0,2)*y*tanPhi2 - )*math::pow(helpVar3,float_T(-1.5)) - ) / (float_T(2.0)*helpVar5*math::sqrt(helpVar6)); - - return result.get_real() / UNIT_SPEED; - } - - /** Calculate the Bz(r,t) field - * - * \param pos Spatial position of the target field. - * \param time Absolute time (SI, including all offsets and transformations) - * for calculating the field */ - HDINLINE BField::float_T - BField::calcTWTSBz_Ex( const float3_64& pos, const float_64 time ) const - { - using complex_T = pmacc::math::Complex< float_T >; - /** Unit of Speed */ - const float_64 UNIT_SPEED = SI::SPEED_OF_LIGHT_SI; - /** Unit of time */ - const float_64 UNIT_TIME = SI::DELTA_T_SI; - /** Unit of length */ - const float_64 UNIT_LENGTH = UNIT_TIME*UNIT_SPEED; - - /* propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ - const float_T beta0 = float_T(beta_0); - /* If phi < 0 the formulas below are not directly applicable. - * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the - * z-axis of the coordinate system in this function. - */ - const float_T phiReal = float_T( math::abs(phi) ); - const float_T alphaTilt = math::atan2(float_T(1.0)-beta0*math::cos(phiReal), - beta0*math::sin(phiReal)); - - /* Definition of the laser pulse front tilt angle for the laser field below. - * - * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the - * local "phi_T" not equal in value to the object member "phiReal" or "phi"? - * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system - * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence - * the dispersion will (although physically correct) be slightly off the ideal TWTS - * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for - * scenarios close to beta0 = 1. - */ - const float_T phiT = float_T(2.0)*alphaTilt; - - /* Angle between the laser pulse front and the y-axis. - * Not used, but remains in code for documentation purposes. - * const float_T eta = float_T(float_T(PI / 2)) - (phiReal - alphaTilt); - */ - - const float_T cspeed = float_T( SI::SPEED_OF_LIGHT_SI / UNIT_SPEED ); - const float_T lambda0 = float_T(wavelength_SI / UNIT_LENGTH); - const float_T om0 = float_T(2.0*PI*cspeed / lambda0); - /* factor 2 in tauG arises from definition convention in laser formula */ - const float_T tauG = float_T(pulselength_SI*2.0 / UNIT_TIME); - /* w0 is wx here --> w0 could be replaced by wx */ - const float_T w0 = float_T(w_x_SI / UNIT_LENGTH); - const float_T rho0 = float_T(PI*w0*w0 / lambda0); - /* wy is width of TWTS pulse */ - const float_T wy = float_T(w_y_SI / UNIT_LENGTH); - const float_T k = float_T(2.0*PI / lambda0); - /* If phi < 0 the entire pulse is rotated by 180 deg around the - * z-axis of the coordinate system without also changing - * the orientation of the resulting field vectors. - */ - const float_T x = float_T(phiPositive * pos.x() / UNIT_LENGTH); - const float_T y = float_T(phiPositive * pos.y() / UNIT_LENGTH); - const float_T z = float_T(pos.z() / UNIT_LENGTH); - const float_T t = float_T(time / UNIT_TIME); - - /* Shortcuts for speeding up the field calculation. */ - const float_T sinPhi = math::sin(phiT); - const float_T cosPhi = math::cos(phiT); - const float_T sinPhi2 = math::sin(phiT / float_T(2.0)); - const float_T cosPhi2 = math::cos(phiT / float_T(2.0)); - const float_T tanPhi2 = math::tan(phiT / float_T(2.0)); - - /* The "helpVar" variables decrease the nesting level of the evaluated expressions and - * thus help with formal code verification through manual code inspection. - */ - const complex_T helpVar1 = -(cspeed*z) - cspeed*y*math::tan(float_T(PI / 2)-phiT) - + complex_T(0,1)*cspeed*rho0 / sinPhi; - const complex_T helpVar2 = complex_T(0,1)*rho0 - y*cosPhi - z*sinPhi; - const complex_T helpVar3 = helpVar2*cspeed; - const complex_T helpVar4 = cspeed*om0*tauG*tauG - - complex_T(0,1)*y*cosPhi / cosPhi2 / cosPhi2*tanPhi2 - - complex_T(0,2)*z*tanPhi2*tanPhi2; - const complex_T helpVar5 = float_T(2.0)*cspeed*t - complex_T(0,1)*cspeed*om0*tauG*tauG - - float_T(2.0)*z + float_T(8.0)*y / sinPhi / sinPhi / sinPhi - *sinPhi2*sinPhi2*sinPhi2*sinPhi2 - - float_T(2.0)*z*tanPhi2*tanPhi2; - - const complex_T helpVar6 = ( - (om0*y*rho0 / cosPhi2 / cosPhi2 / cosPhi2 / cosPhi2) / helpVar1 - - (complex_T(0,2)*k*x*x) / helpVar2 - - (complex_T(0,1)*om0*om0*tauG*tauG*rho0) / helpVar2 - - (complex_T(0,4)*y*y*rho0) / (wy*wy*helpVar2) - + (om0*om0*tauG*tauG*y*cosPhi) / helpVar2 - + (float_T(4.0)*y*y*y*cosPhi) / (wy*wy*helpVar2) - + (om0*om0*tauG*tauG*z*sinPhi) / helpVar2 - + (float_T(4.0)*y*y*z*sinPhi) / (wy*wy*helpVar2) - + (complex_T(0,2)*om0*y*y*cosPhi / cosPhi2 / cosPhi2*tanPhi2) / helpVar3 - + (om0*y*rho0*cosPhi / cosPhi2 / cosPhi2*tanPhi2) / helpVar3 - + (complex_T(0,1)*om0*y*y*cosPhi*cosPhi/cosPhi2/cosPhi2*tanPhi2)/helpVar3 - + (complex_T(0,4)*om0*y*z*tanPhi2*tanPhi2) / helpVar3 - - (float_T(2.0)*om0*z*rho0*tanPhi2*tanPhi2) / helpVar3 - - (complex_T(0,2)*om0*z*z*sinPhi*tanPhi2*tanPhi2) / helpVar3 - - (om0*helpVar5*helpVar5) / (cspeed*helpVar4) - ) / float_T(4.0); - - const complex_T helpVar7 = cspeed*om0*tauG*tauG - - complex_T(0,1)*y*cosPhi / cosPhi2 / cosPhi2*tanPhi2 - - complex_T(0,2)*z*tanPhi2*tanPhi2; - const complex_T result = ( complex_T(0,2)*math::exp(helpVar6)*tauG*tanPhi2 - *(cspeed*t - z + y*tanPhi2) - *math::sqrt( (om0*rho0) / helpVar3 ) - ) / math::pow(helpVar7,float_T(1.5)); - - return result.get_real() / UNIT_SPEED; - } - - /** Calculate the Bx(r,t) field - * - * \param pos Spatial position of the target field. - * \param time Absolute time (SI, including all offsets and transformations) - * for calculating the field */ - HDINLINE BField::float_T - BField::calcTWTSBx( const float3_64& pos, const float_64 time ) const - { - /* The Bx-field for the Ey-field is the same as - * for the By-field for the Ex-field except for the sign. - */ - return -calcTWTSBy( pos, time ); - } - - /** Calculate the Bz(r,t) field - * - * \param pos Spatial position of the target field. - * \param time Absolute time (SI, including all offsets and transformations) - * for calculating the field */ - HDINLINE BField::float_T - BField::calcTWTSBz_Ey( const float3_64& pos, const float_64 time ) const - { - using complex_T = pmacc::math::Complex< float_T >; - using complex_64 = pmacc::math::Complex< float_64 >; - /** Unit of speed */ - const float_64 UNIT_SPEED = SI::SPEED_OF_LIGHT_SI; - /** Unit of time */ - const float_64 UNIT_TIME = SI::DELTA_T_SI; - /** Unit of length */ - const float_64 UNIT_LENGTH = UNIT_TIME*UNIT_SPEED; - - /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ - const float_T beta0 = float_T(beta_0); - /* If phi < 0 the formulas below are not directly applicable. - * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the - * z-axis of the coordinate system in this function. - */ - const float_T phiReal = float_T( math::abs(phi) ); - const float_T alphaTilt = math::atan2(float_T(1.0)-beta0*math::cos(phiReal), - beta0*math::sin(phiReal)); - /* Definition of the laser pulse front tilt angle for the laser field below. - * - * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the - * local "phi_T" not equal in value to the object member "phiReal" or "phi"? - * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system - * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence - * the dispersion will (although physically correct) be slightly off the ideal TWTS - * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for - * scenarios close to beta0 = 1. - */ - const float_T phiT = float_T(2.0)*alphaTilt; - - /* Angle between the laser pulse front and the y-axis. - * Not used, but remains in code for documentation purposes. - * const float_T eta = float_T(float_T(PI / 2)) - (phiReal - alphaTilt); - */ - - const float_T cspeed = float_T( SI::SPEED_OF_LIGHT_SI / UNIT_SPEED ); - const float_T lambda0 = float_T(wavelength_SI / UNIT_LENGTH); - const float_T om0 = float_T(2.0*PI*cspeed / lambda0); - /* factor 2 in tauG arises from definition convention in laser formula */ - const float_T tauG = float_T(pulselength_SI*2.0 / UNIT_TIME); - /* w0 is wx here --> w0 could be replaced by wx */ - const float_T w0 = float_T(w_x_SI / UNIT_LENGTH); - const float_T rho0 = float_T(PI*w0*w0 / lambda0); - /* wy is width of TWTS pulse */ - const float_T wy = float_T(w_y_SI / UNIT_LENGTH); - const float_T k = float_T(2.0*PI / lambda0); - /* If phi < 0 the entire pulse is rotated by 180 deg around the - * z-axis of the coordinate system without also changing - * the orientation of the resulting field vectors. - */ - const float_T x = float_T(phiPositive * pos.x() / UNIT_LENGTH); - const float_T y = float_T(phiPositive * pos.y() / UNIT_LENGTH); - const float_T z = float_T(pos.z() / UNIT_LENGTH); - const float_T t = float_T(time / UNIT_TIME); - - /* Shortcuts for speeding up the field calculation. */ - const float_T sinPhi = math::sin(phiT); - const float_T cosPhi = math::cos(phiT); - const float_T sinPhi2 = math::sin(phiT / float_T(2.0)); - const float_T cosPhi2 = math::cos(phiT / float_T(2.0)); - const float_T tanPhi2 = math::tan(phiT / float_T(2.0)); - - /* The "helpVar" variables decrease the nesting level of the evaluated expressions and - * thus help with formal code verification through manual code inspection. - */ - const complex_T helpVar1 = - complex_T(0,-1)*cspeed*om0*tauG*tauG - - y*cosPhi / cosPhi2 / cosPhi2 * tanPhi2 - - float_T(2.0)*z*tanPhi2*tanPhi2; - const complex_T helpVar2 = complex_T(0,1)*rho0 - y*cosPhi - z*sinPhi; - - const complex_T helpVar3 = ( - - cspeed*cspeed*k*om0*tauG*tauG*wy*wy*x*x - - float_T(2.0)*cspeed*cspeed*om0*t*t*wy*wy*rho0 - + complex_T(0,2)*cspeed*cspeed*om0*om0*t*tauG*tauG*wy*wy*rho0 - - float_T(2.0)*cspeed*cspeed*om0*tauG*tauG*y*y*rho0 - + float_T(4.0)*cspeed*om0*t*wy*wy*z*rho0 - - complex_T(0,2)*cspeed*om0*om0*tauG*tauG*wy*wy*z*rho0 - - float_T(2.0)*om0*wy*wy*z*z*rho0 - - complex_T(0,8)*om0*wy*wy*y*(cspeed*t - z)*z*sinPhi2*sinPhi2 - + complex_T(0,8) / sinPhi *( - float_T(2.0)*z*z*(cspeed*om0*t*wy*wy + complex_T(0,1)*cspeed*y*y - om0*wy*wy*z) - + y*( - cspeed*k*wy*wy*x*x - - complex_T(0,2)*cspeed*om0*t*wy*wy*rho0 - + float_T(2.0)*cspeed*y*y*rho0 - + complex_T(0,2)*om0*wy*wy*z*rho0 - )*math::tan(float_T(PI) / float_T(2.0)-phiT) / sinPhi - )*sinPhi2*sinPhi2*sinPhi2*sinPhi2 - - complex_T(0,2)*cspeed*cspeed*om0*t*t*wy*wy*z*sinPhi - - float_T(2.0)*cspeed*cspeed*om0*om0*t*tauG*tauG*wy*wy*z*sinPhi - - complex_T(0,2)*cspeed*cspeed*om0*tauG*tauG*y*y*z*sinPhi - + complex_T(0,4)*cspeed*om0*t*wy*wy*z*z*sinPhi - + float_T(2.0)*cspeed*om0*om0*tauG*tauG*wy*wy*z*z*sinPhi - - complex_T(0,2)*om0*wy*wy*z*z*z*sinPhi - - float_T(4.0)*cspeed*om0*t*wy*wy*y*rho0*tanPhi2 - + float_T(4.0)*om0*wy*wy*y*z*rho0*tanPhi2 - + complex_T(0,2)*y*y*( - cspeed*om0*t*wy*wy - + complex_T(0,1)*cspeed*y*y - - om0*wy*wy*z - )*cosPhi*cosPhi / cosPhi2 / cosPhi2 * tanPhi2 - + complex_T(0,2)*cspeed*k*wy*wy*x*x*z*tanPhi2*tanPhi2 - - float_T(2.0)*om0*wy*wy*y*y*rho0*tanPhi2*tanPhi2 - + float_T(4.0)*cspeed*om0*t*wy*wy*z*rho0*tanPhi2*tanPhi2 - + complex_T(0,4)*cspeed*y*y*z*rho0*tanPhi2*tanPhi2 - - float_T(4.0)*om0*wy*wy*z*z*rho0*tanPhi2*tanPhi2 - - complex_T(0,2)*om0*wy*wy*y*y*z*sinPhi*tanPhi2*tanPhi2 - - float_T(2.0)*y*cosPhi*( - om0*( - cspeed*cspeed*(complex_T(0,1)*t*t*wy*wy - + om0*t*tauG*tauG*wy*wy - + complex_T(0,1)*tauG*tauG*y*y) - - cspeed*(complex_T(0,2)*t + om0*tauG*tauG)*wy*wy*z - + complex_T(0,1)*wy*wy*z*z - ) - + complex_T(0,2)*om0*wy*wy*y*(cspeed*t - z)*tanPhi2 - + complex_T(0,1)*( - complex_T(0,-4)*cspeed*y*y*z - + om0*wy*wy*(y*y - float_T(4.0)*(cspeed*t - z)*z) - )*tanPhi2*tanPhi2 - ) - /* The "round-trip" conversion in the line below fixes a gross accuracy bug - * in floating-point arithmetics, when float_T is set to float_X. - */ - ) * complex_T( float_64(1.0) / complex_64(float_T(2.0)*cspeed*wy*wy*helpVar2*helpVar1) ); - - const complex_T helpVar4 = ( - cspeed*om0*( - cspeed*om0*tauG*tauG - - complex_T(0,8)*y*math::tan( float_T(PI) / float_T(2.0) - phiT ) - / sinPhi / sinPhi * sinPhi2*sinPhi2*sinPhi2*sinPhi2 - - complex_T(0,2)*z*tanPhi2*tanPhi2 - ) - ) / rho0; - - const complex_T result = float_T(-1.0)*( - cspeed*math::exp(helpVar3)*k*tauG*x*math::pow( helpVar2, float_T(-1.5) ) - / math::sqrt(helpVar4) - ); - - return result.get_real() / UNIT_SPEED; - } - -} /* namespace twts */ -} /* namespace templates */ + HINLINE + BField::BField( + const float_64 focus_y_SI, + const float_64 wavelength_SI, + const float_64 pulselength_SI, + const float_64 w_x_SI, + const float_64 w_y_SI, + const float_X phi, + const float_X beta_0, + const float_64 tdelay_user_SI, + const bool auto_tdelay, + const PolarizationType pol) + : focus_y_SI(focus_y_SI) + , wavelength_SI(wavelength_SI) + , pulselength_SI(pulselength_SI) + , w_x_SI(w_x_SI) + , w_y_SI(w_y_SI) + , phi(phi) + , beta_0(beta_0) + , tdelay_user_SI(tdelay_user_SI) + , dt(SI::DELTA_T_SI) + , unit_length(UNIT_LENGTH) + , auto_tdelay(auto_tdelay) + , pol(pol) + , phiPositive(float_X(1.0)) + { + /* Note: Enviroment-objects cannot be instantiated on CUDA GPU device. Since this is done + * on host (see fieldBackground.param), this is no problem. + */ + const SubGrid& subGrid = Environment::get().SubGrid(); + halfSimSize = subGrid.getGlobalDomain().size / 2; + tdelay = detail::getInitialTimeDelay_SI( + auto_tdelay, + tdelay_user_SI, + halfSimSize, + pulselength_SI, + focus_y_SI, + phi, + beta_0); + if(phi < float_X(0.0)) + phiPositive = float_X(-1.0); + } + + template<> + HDINLINE float3_X BField::getTWTSBfield_Normalized( + const pmacc::math::Vector& bFieldPositions_SI, + const float_64 time) const + { + typedef pmacc::math::Vector PosVecVec; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + for(uint32_t i = 0; i < simDim; ++i) + pos[k][i] = bFieldPositions_SI[k][i]; + } + + /* An example of intra-cell position offsets is the staggered Yee-grid. + * + * Calculate By-component with the intra-cell offset of a By-field + */ + const float_64 By_By = calcTWTSBy(pos[1], time); + /* Calculate Bz-component the the intra-cell offset of a By-field */ + const float_64 Bz_By = calcTWTSBz_Ex(pos[1], time); + /* Calculate By-component the the intra-cell offset of a Bz-field */ + const float_64 By_Bz = calcTWTSBy(pos[2], time); + /* Calculate Bz-component the the intra-cell offset of a Bz-field */ + const float_64 Bz_Bz = calcTWTSBz_Ex(pos[2], time); + /* Since we rotated all position vectors before calling calcTWTSBy and calcTWTSBz_Ex, + * we need to back-rotate the resulting B-field vector. + * + * RotationMatrix[-(PI/2+phi)].(By,Bz) for rotating back the field vectors. + */ + const float_64 By_rot = -math::sin(+phi) * By_By + math::cos(+phi) * Bz_By; + const float_64 Bz_rot = -math::cos(+phi) * By_Bz - math::sin(+phi) * Bz_Bz; + + /* Finally, the B-field normalized to the peak amplitude. */ + return float3_X(float_X(0.0), float_X(By_rot), float_X(Bz_rot)); + } + + template<> + HDINLINE float3_X BField::getTWTSBfield_Normalized_Ey( + const pmacc::math::Vector& bFieldPositions_SI, + const float_64 time) const + { + typedef pmacc::math::Vector PosVecVec; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + for(uint32_t i = 0; i < simDim; ++i) + pos[k][i] = bFieldPositions_SI[k][i]; + } + + /* Calculate Bz-component with the intra-cell offset of a By-field */ + const float_64 Bz_By = calcTWTSBz_Ey(pos[1], time); + /* Calculate Bz-component with the intra-cell offset of a Bz-field */ + const float_64 Bz_Bz = calcTWTSBz_Ey(pos[2], time); + /* Since we rotated all position vectors before calling calcTWTSBz_Ey, + * we need to back-rotate the resulting B-field vector. + * + * RotationMatrix[-(PI/2+phi)].(By,Bz) for rotating back the field-vectors. + */ + const float_64 By_rot = +math::cos(+phi) * Bz_By; + const float_64 Bz_rot = -math::sin(+phi) * Bz_Bz; + + /* Finally, the B-field normalized to the peak amplitude. */ + return float3_X(float_X(calcTWTSBx(pos[0], time)), float_X(By_rot), float_X(Bz_rot)); + } + + template<> + HDINLINE float3_X BField::getTWTSBfield_Normalized( + const pmacc::math::Vector& bFieldPositions_SI, + const float_64 time) const + { + typedef pmacc::math::Vector PosVecVec; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + /* 2D (y,z) vectors are mapped on 3D (x,y,z) vectors. */ + for(uint32_t i = 0; i < DIM2; ++i) + pos[k][i + 1] = bFieldPositions_SI[k][i]; + } + + /* General background comment for the rest of this function: + * + * Corresponding position vector for the field components in 2D simulations. + * 3D 3D vectors in 2D space (x, y) + * x --> z (Meaning: In 2D-sim, insert cell-coordinate x + * into TWTS field function coordinate z.) + * y --> y + * z --> -x (Since z=0 for 2D, we use the existing + * 3D TWTS-field-function and set x = -0) + * The transformed 3D coordinates are used to calculate the field components. + * Ex --> Ez (Meaning: Calculate Ex-component of existing 3D TWTS-field (calcTWTSEx) using + * transformed position vectors to obtain the corresponding Ez-component in 2D. + * Note: Swapping field component coordinates also alters the + * intra-cell position offset.) + * By --> By + * Bz --> -Bx (Yes, the sign is necessary.) + * + * An example of intra-cell position offsets is the staggered Yee-grid. + * + * This procedure is analogous to 3D case, but replace By --> By and Bz --> -Bx. Hence the + * grid cell offset for Bx has to be used instead of Bz. Mind the "-"-sign. + */ + + /* Calculate By-component with the intra-cell offset of a By-field */ + const float_64 By_By = calcTWTSBy(pos[1], time); + /* Calculate Bx-component with the intra-cell offset of a By-field */ + const float_64 Bx_By = -calcTWTSBz_Ex(pos[1], time); + /* Calculate By-component with the intra-cell offset of a Bx-field */ + const float_64 By_Bx = calcTWTSBy(pos[0], time); + /* Calculate Bx-component with the intra-cell offset of a Bx-field */ + const float_64 Bx_Bx = -calcTWTSBz_Ex(pos[0], time); + /* Since we rotated all position vectors before calling calcTWTSBy and calcTWTSBz_Ex, we + * need to back-rotate the resulting B-field vector. Now the rotation is done + * analogously in the (y,x)-plane. (Reverse of the position vector transformation.) + * + * RotationMatrix[-(PI / 2+phi)].(By,Bx) for rotating back the field vectors. + */ + const float_64 By_rot = -math::sin(phi) * By_By + math::cos(phi) * Bx_By; + const float_64 Bx_rot = -math::cos(phi) * By_Bx - math::sin(phi) * Bx_Bx; + + /* Finally, the B-field normalized to the peak amplitude. */ + return float3_X(float_X(Bx_rot), float_X(By_rot), float_X(0.0)); + } + + template<> + HDINLINE float3_X BField::getTWTSBfield_Normalized_Ey( + const pmacc::math::Vector& bFieldPositions_SI, + const float_64 time) const + { + typedef pmacc::math::Vector PosVecVec; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + /* The 2D output of getFieldPositions_SI only returns + * the y- and z-component of a 3D vector. + */ + for(uint32_t i = 0; i < DIM2; ++i) + pos[k][i + 1] = bFieldPositions_SI[k][i]; + } + + /* General background comment for the rest of this function: + * + * Corresponding position vector for the field components in 2D simulations. + * 3D 3D vectors in 2D space (x, y) + * x --> z (Meaning: In 2D-sim, insert cell-coordinate x + * into TWTS field function coordinate z.) + * y --> y + * z --> -x (Since z=0 for 2D, we use the existing + * 3D TWTS-field-function and set x = -0) + * Ex --> Ez (Meaning: Calculate Ex-component of existing 3D TWTS-field to obtain + * corresponding Ez-component in 2D. + * Note: the intra-cell position offset due to the staggered grid for Ez.) + * By --> By + * Bz --> -Bx (Yes, the sign is necessary.) + * + * This procedure is analogous to 3D case, but replace By --> By and Bz --> -Bx. Hence the + * grid cell offset for Bx has to be used instead of Bz. Mind the -sign. + */ + + /* Calculate Bx-component with the intra-cell offset of a By-field */ + const float_64 Bx_By = -calcTWTSBz_Ex(pos[1], time); + /* Calculate Bx-component with the intra-cell offset of a Bx-field */ + const float_64 Bx_Bx = -calcTWTSBz_Ex(pos[0], time); + + /* Since we rotated all position vectors before calling calcTWTSBz_Ex, we + * need to back-rotate the resulting B-field vector. Now the rotation is done + * analogously in the (y,x)-plane. (Reverse of the position vector transformation.) + * + * RotationMatrix[-(PI / 2+phi)].(By,Bx) + * for rotating back the field-vectors. + */ + const float_64 By_rot = +math::cos(phi) * Bx_By; + const float_64 Bx_rot = -math::sin(phi) * Bx_Bx; + + /* Finally, the B-field normalized to the peak amplitude. */ + return float3_X(float_X(Bx_rot), float_X(By_rot), float_X(calcTWTSBx(pos[2], time))); + } + + HDINLINE float3_X BField::operator()(const DataSpace& cellIdx, const uint32_t currentStep) const + { + const float_64 time_SI = float_64(currentStep) * dt - tdelay; + const traits::FieldPosition fieldPosB; + + const pmacc::math::Vector bFieldPositions_SI + = detail::getFieldPositions_SI(cellIdx, halfSimSize, fieldPosB(), unit_length, focus_y_SI, phi); + /* Single TWTS-Pulse */ + switch(pol) + { + case LINEAR_X: + return getTWTSBfield_Normalized(bFieldPositions_SI, time_SI); + + case LINEAR_YZ: + return getTWTSBfield_Normalized_Ey(bFieldPositions_SI, time_SI); + } + return getTWTSBfield_Normalized(bFieldPositions_SI, time_SI); // defensive default + } + + /** Calculate the By(r,t) field here + * + * \param pos Spatial position of the target field. + * \param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE BField::float_T BField::calcTWTSBy(const float3_64& pos, const float_64 time) const + { + using complex_T = pmacc::math::Complex; + using complex_64 = pmacc::math::Complex; + /* Unit of speed */ + const float_64 UNIT_SPEED = SI::SPEED_OF_LIGHT_SI; + /* Unit of time */ + const float_64 UNIT_TIME = SI::DELTA_T_SI; + /* Unit of length */ + const float_64 UNIT_LENGTH = UNIT_TIME * UNIT_SPEED; + + /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ + const float_T beta0 = float_T(beta_0); + /* If phi < 0 the formulas below are not directly applicable. + * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the + * z-axis of the coordinate system in this function. + */ + const float_T phiReal = float_T(math::abs(phi)); + const float_T alphaTilt + = math::atan2(float_T(1.0) - beta0 * math::cos(phiReal), beta0 * math::sin(phiReal)); + /* Definition of the laser pulse front tilt angle for the laser field below. + * + * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the + * local "phi_T" not equal in value to the object member "phiReal" or "phi"? + * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system + * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence + * the dispersion will (although physically correct) be slightly off the ideal TWTS + * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for + * scenarios close to beta0 = 1. + */ + const float_T phiT = float_T(2.0) * alphaTilt; + + /* Angle between the laser pulse front and the y-axis. Not used, but remains in code for + * documentation purposes. + * const float_T eta = float_T(PI/2) - (phiReal - alphaTilt); + */ + + const float_T cspeed = float_T(SI::SPEED_OF_LIGHT_SI / UNIT_SPEED); + const float_T lambda0 = float_T(wavelength_SI / UNIT_LENGTH); + const float_T om0 = float_T(2.0 * PI * cspeed / lambda0); + /* factor 2 in tauG arises from definition convention in laser formula */ + const float_T tauG = float_T(pulselength_SI * 2.0 / UNIT_TIME); + /* w0 is wx here --> w0 could be replaced by wx */ + const float_T w0 = float_T(w_x_SI / UNIT_LENGTH); + const float_T rho0 = float_T(PI * w0 * w0 / lambda0); + /* wy is width of TWTS pulse */ + const float_T wy = float_T(w_y_SI / UNIT_LENGTH); + const float_T k = float_T(2.0 * PI / lambda0); + /* If phi < 0 the entire pulse is rotated by 180 deg around the + * z-axis of the coordinate system without also changing + * the orientation of the resulting field vectors. + */ + const float_T x = float_T(phiPositive * pos.x() / UNIT_LENGTH); + const float_T y = float_T(phiPositive * pos.y() / UNIT_LENGTH); + const float_T z = float_T(pos.z() / UNIT_LENGTH); + const float_T t = float_T(time / UNIT_TIME); + + /* Shortcuts for speeding up the field calculation. */ + const float_T sinPhi = math::sin(phiT); + const float_T cosPhi = math::cos(phiT); + const float_T cosPhi2 = math::cos(phiT / 2.0); + const float_T tanPhi2 = math::tan(phiT / 2.0); + + /* The "helpVar" variables decrease the nesting level of the evaluated expressions and + * thus help with formal code verification through manual code inspection. + */ + const complex_T helpVar1 = rho0 + complex_T(0, 1) * y * cosPhi + complex_T(0, 1) * z * sinPhi; + const complex_T helpVar2 = cspeed * om0 * tauG * tauG + + complex_T(0, 2) * (-z - y * math::tan(float_T(PI / 2) - phiT)) * tanPhi2 * tanPhi2; + const complex_T helpVar3 = complex_T(0, 1) * rho0 - y * cosPhi - z * sinPhi; + + const complex_T helpVar4 = float_T(-1.0) + * (cspeed * cspeed * k * om0 * tauG * tauG * wy * wy * x * x + + float_T(2.0) * cspeed * cspeed * om0 * t * t * wy * wy * rho0 + - complex_T(0, 2) * cspeed * cspeed * om0 * om0 * t * tauG * tauG * wy * wy * rho0 + + float_T(2.0) * cspeed * cspeed * om0 * tauG * tauG * y * y * rho0 + - float_T(4.0) * cspeed * om0 * t * wy * wy * z * rho0 + + complex_T(0, 2) * cspeed * om0 * om0 * tauG * tauG * wy * wy * z * rho0 + + float_T(2.0) * om0 * wy * wy * z * z * rho0 + + float_T(4.0) * cspeed * om0 * t * wy * wy * y * rho0 * tanPhi2 + - float_T(4.0) * om0 * wy * wy * y * z * rho0 * tanPhi2 + - complex_T(0, 2) * cspeed * k * wy * wy * x * x * z * tanPhi2 * tanPhi2 + + float_T(2.0) * om0 * wy * wy * y * y * rho0 * tanPhi2 * tanPhi2 + - float_T(4.0) * cspeed * om0 * t * wy * wy * z * rho0 * tanPhi2 * tanPhi2 + - complex_T(0, 4) * cspeed * y * y * z * rho0 * tanPhi2 * tanPhi2 + + float_T(4.0) * om0 * wy * wy * z * z * rho0 * tanPhi2 * tanPhi2 + - complex_T(0, 2) * cspeed * k * wy * wy * x * x * y * math::tan(float_T(PI / 2) - phiT) + * tanPhi2 * tanPhi2 + - float_T(4.0) * cspeed * om0 * t * wy * wy * y * rho0 * math::tan(float_T(PI / 2) - phiT) + * tanPhi2 * tanPhi2 + - complex_T(0, 4) * cspeed * y * y * y * rho0 * math::tan(float_T(PI / 2) - phiT) * tanPhi2 + * tanPhi2 + + float_T(4.0) * om0 * wy * wy * y * z * rho0 * math::tan(float_T(PI / 2) - phiT) * tanPhi2 + * tanPhi2 + + float_T(2.0) * z * sinPhi + * (+om0 + * (+cspeed * cspeed + * (complex_T(0, 1) * t * t * wy * wy + om0 * t * tauG * tauG * wy * wy + + complex_T(0, 1) * tauG * tauG * y * y) + - cspeed * (complex_T(0, 2) * t + om0 * tauG * tauG) * wy * wy * z + + complex_T(0, 1) * wy * wy * z * z) + + complex_T(0, 2) * om0 * wy * wy * y * (cspeed * t - z) * tanPhi2 + + complex_T(0, 1) * tanPhi2 * tanPhi2 + * (complex_T(0, -2) * cspeed * y * y * z + + om0 * wy * wy * (y * y - float_T(2.0) * (cspeed * t - z) * z))) + + float_T(2.0) * y * cosPhi + * (+om0 + * (+cspeed * cspeed + * (complex_T(0, 1) * t * t * wy * wy + om0 * t * tauG * tauG * wy * wy + + complex_T(0, 1) * tauG * tauG * y * y) + - cspeed * (complex_T(0, 2) * t + om0 * tauG * tauG) * wy * wy * z + + complex_T(0, 1) * wy * wy * z * z) + + complex_T(0, 2) * om0 * wy * wy * y * (cspeed * t - z) * tanPhi2 + + complex_T(0, 1) + * (complex_T(0, -4) * cspeed * y * y * z + + om0 * wy * wy * (y * y - float_T(4.0) * (cspeed * t - z) * z) + - float_T(2.0) * y + * (+cspeed * om0 * t * wy * wy + complex_T(0, 1) * cspeed * y * y + - om0 * wy * wy * z) + * math::tan(float_T(PI / 2) - phiT)) + * tanPhi2 * tanPhi2) + /* The "round-trip" conversion in the line below fixes a gross accuracy bug + * in floating-point arithmetics, when float_T is set to float_X. + */ + ) + * complex_T(float_64(1.0) / complex_64(float_T(2.0) * cspeed * wy * wy * helpVar1 * helpVar2)); + + const complex_T helpVar5 = complex_T(0, -1) * cspeed * om0 * tauG * tauG + + (-z - y * math::tan(float_T(PI / 2) - phiT)) * tanPhi2 * tanPhi2 * float_T(2.0); + const complex_T helpVar6 + = (cspeed + * (cspeed * om0 * tauG * tauG + + complex_T(0, 2) * (-z - y * math::tan(float_T(PI / 2) - phiT)) * tanPhi2 * tanPhi2)) + / (om0 * rho0); + const complex_T result + = (math::exp(helpVar4) * tauG / cosPhi2 / cosPhi2 + * (rho0 + complex_T(0, 1) * y * cosPhi + complex_T(0, 1) * z * sinPhi) + * (complex_T(0, 2) * cspeed * t + cspeed * om0 * tauG * tauG - complex_T(0, 4) * z + + cspeed * (complex_T(0, 2) * t + om0 * tauG * tauG) * cosPhi + + complex_T(0, 2) * y * tanPhi2) + * math::pow(helpVar3, float_T(-1.5))) + / (float_T(2.0) * helpVar5 * math::sqrt(helpVar6)); + + return result.get_real() / UNIT_SPEED; + } + + /** Calculate the Bz(r,t) field + * + * \param pos Spatial position of the target field. + * \param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE BField::float_T BField::calcTWTSBz_Ex(const float3_64& pos, const float_64 time) const + { + using complex_T = pmacc::math::Complex; + /** Unit of Speed */ + const float_64 UNIT_SPEED = SI::SPEED_OF_LIGHT_SI; + /** Unit of time */ + const float_64 UNIT_TIME = SI::DELTA_T_SI; + /** Unit of length */ + const float_64 UNIT_LENGTH = UNIT_TIME * UNIT_SPEED; + + /* propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ + const float_T beta0 = float_T(beta_0); + /* If phi < 0 the formulas below are not directly applicable. + * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the + * z-axis of the coordinate system in this function. + */ + const float_T phiReal = float_T(math::abs(phi)); + const float_T alphaTilt + = math::atan2(float_T(1.0) - beta0 * math::cos(phiReal), beta0 * math::sin(phiReal)); + + /* Definition of the laser pulse front tilt angle for the laser field below. + * + * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the + * local "phi_T" not equal in value to the object member "phiReal" or "phi"? + * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system + * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence + * the dispersion will (although physically correct) be slightly off the ideal TWTS + * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for + * scenarios close to beta0 = 1. + */ + const float_T phiT = float_T(2.0) * alphaTilt; + + /* Angle between the laser pulse front and the y-axis. + * Not used, but remains in code for documentation purposes. + * const float_T eta = float_T(float_T(PI / 2)) - (phiReal - alphaTilt); + */ + + const float_T cspeed = float_T(SI::SPEED_OF_LIGHT_SI / UNIT_SPEED); + const float_T lambda0 = float_T(wavelength_SI / UNIT_LENGTH); + const float_T om0 = float_T(2.0 * PI * cspeed / lambda0); + /* factor 2 in tauG arises from definition convention in laser formula */ + const float_T tauG = float_T(pulselength_SI * 2.0 / UNIT_TIME); + /* w0 is wx here --> w0 could be replaced by wx */ + const float_T w0 = float_T(w_x_SI / UNIT_LENGTH); + const float_T rho0 = float_T(PI * w0 * w0 / lambda0); + /* wy is width of TWTS pulse */ + const float_T wy = float_T(w_y_SI / UNIT_LENGTH); + const float_T k = float_T(2.0 * PI / lambda0); + /* If phi < 0 the entire pulse is rotated by 180 deg around the + * z-axis of the coordinate system without also changing + * the orientation of the resulting field vectors. + */ + const float_T x = float_T(phiPositive * pos.x() / UNIT_LENGTH); + const float_T y = float_T(phiPositive * pos.y() / UNIT_LENGTH); + const float_T z = float_T(pos.z() / UNIT_LENGTH); + const float_T t = float_T(time / UNIT_TIME); + + /* Shortcuts for speeding up the field calculation. */ + const float_T sinPhi = math::sin(phiT); + const float_T cosPhi = math::cos(phiT); + const float_T sinPhi2 = math::sin(phiT / float_T(2.0)); + const float_T cosPhi2 = math::cos(phiT / float_T(2.0)); + const float_T tanPhi2 = math::tan(phiT / float_T(2.0)); + + /* The "helpVar" variables decrease the nesting level of the evaluated expressions and + * thus help with formal code verification through manual code inspection. + */ + const complex_T helpVar1 = -(cspeed * z) - cspeed * y * math::tan(float_T(PI / 2) - phiT) + + complex_T(0, 1) * cspeed * rho0 / sinPhi; + const complex_T helpVar2 = complex_T(0, 1) * rho0 - y * cosPhi - z * sinPhi; + const complex_T helpVar3 = helpVar2 * cspeed; + const complex_T helpVar4 = cspeed * om0 * tauG * tauG + - complex_T(0, 1) * y * cosPhi / cosPhi2 / cosPhi2 * tanPhi2 + - complex_T(0, 2) * z * tanPhi2 * tanPhi2; + const complex_T helpVar5 = float_T(2.0) * cspeed * t - complex_T(0, 1) * cspeed * om0 * tauG * tauG + - float_T(2.0) * z + + float_T(8.0) * y / sinPhi / sinPhi / sinPhi * sinPhi2 * sinPhi2 * sinPhi2 * sinPhi2 + - float_T(2.0) * z * tanPhi2 * tanPhi2; + + const complex_T helpVar6 + = ((om0 * y * rho0 / cosPhi2 / cosPhi2 / cosPhi2 / cosPhi2) / helpVar1 + - (complex_T(0, 2) * k * x * x) / helpVar2 + - (complex_T(0, 1) * om0 * om0 * tauG * tauG * rho0) / helpVar2 + - (complex_T(0, 4) * y * y * rho0) / (wy * wy * helpVar2) + + (om0 * om0 * tauG * tauG * y * cosPhi) / helpVar2 + + (float_T(4.0) * y * y * y * cosPhi) / (wy * wy * helpVar2) + + (om0 * om0 * tauG * tauG * z * sinPhi) / helpVar2 + + (float_T(4.0) * y * y * z * sinPhi) / (wy * wy * helpVar2) + + (complex_T(0, 2) * om0 * y * y * cosPhi / cosPhi2 / cosPhi2 * tanPhi2) / helpVar3 + + (om0 * y * rho0 * cosPhi / cosPhi2 / cosPhi2 * tanPhi2) / helpVar3 + + (complex_T(0, 1) * om0 * y * y * cosPhi * cosPhi / cosPhi2 / cosPhi2 * tanPhi2) / helpVar3 + + (complex_T(0, 4) * om0 * y * z * tanPhi2 * tanPhi2) / helpVar3 + - (float_T(2.0) * om0 * z * rho0 * tanPhi2 * tanPhi2) / helpVar3 + - (complex_T(0, 2) * om0 * z * z * sinPhi * tanPhi2 * tanPhi2) / helpVar3 + - (om0 * helpVar5 * helpVar5) / (cspeed * helpVar4)) + / float_T(4.0); + + const complex_T helpVar7 = cspeed * om0 * tauG * tauG + - complex_T(0, 1) * y * cosPhi / cosPhi2 / cosPhi2 * tanPhi2 + - complex_T(0, 2) * z * tanPhi2 * tanPhi2; + const complex_T result = (complex_T(0, 2) * math::exp(helpVar6) * tauG * tanPhi2 + * (cspeed * t - z + y * tanPhi2) * math::sqrt((om0 * rho0) / helpVar3)) + / math::pow(helpVar7, float_T(1.5)); + + return result.get_real() / UNIT_SPEED; + } + + /** Calculate the Bx(r,t) field + * + * \param pos Spatial position of the target field. + * \param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE BField::float_T BField::calcTWTSBx(const float3_64& pos, const float_64 time) const + { + /* The Bx-field for the Ey-field is the same as + * for the By-field for the Ex-field except for the sign. + */ + return -calcTWTSBy(pos, time); + } + + /** Calculate the Bz(r,t) field + * + * \param pos Spatial position of the target field. + * \param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE BField::float_T BField::calcTWTSBz_Ey(const float3_64& pos, const float_64 time) const + { + using complex_T = pmacc::math::Complex; + using complex_64 = pmacc::math::Complex; + /** Unit of speed */ + const float_64 UNIT_SPEED = SI::SPEED_OF_LIGHT_SI; + /** Unit of time */ + const float_64 UNIT_TIME = SI::DELTA_T_SI; + /** Unit of length */ + const float_64 UNIT_LENGTH = UNIT_TIME * UNIT_SPEED; + + /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ + const float_T beta0 = float_T(beta_0); + /* If phi < 0 the formulas below are not directly applicable. + * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the + * z-axis of the coordinate system in this function. + */ + const float_T phiReal = float_T(math::abs(phi)); + const float_T alphaTilt + = math::atan2(float_T(1.0) - beta0 * math::cos(phiReal), beta0 * math::sin(phiReal)); + /* Definition of the laser pulse front tilt angle for the laser field below. + * + * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the + * local "phi_T" not equal in value to the object member "phiReal" or "phi"? + * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system + * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence + * the dispersion will (although physically correct) be slightly off the ideal TWTS + * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for + * scenarios close to beta0 = 1. + */ + const float_T phiT = float_T(2.0) * alphaTilt; + + /* Angle between the laser pulse front and the y-axis. + * Not used, but remains in code for documentation purposes. + * const float_T eta = float_T(float_T(PI / 2)) - (phiReal - alphaTilt); + */ + + const float_T cspeed = float_T(SI::SPEED_OF_LIGHT_SI / UNIT_SPEED); + const float_T lambda0 = float_T(wavelength_SI / UNIT_LENGTH); + const float_T om0 = float_T(2.0 * PI * cspeed / lambda0); + /* factor 2 in tauG arises from definition convention in laser formula */ + const float_T tauG = float_T(pulselength_SI * 2.0 / UNIT_TIME); + /* w0 is wx here --> w0 could be replaced by wx */ + const float_T w0 = float_T(w_x_SI / UNIT_LENGTH); + const float_T rho0 = float_T(PI * w0 * w0 / lambda0); + /* wy is width of TWTS pulse */ + const float_T wy = float_T(w_y_SI / UNIT_LENGTH); + const float_T k = float_T(2.0 * PI / lambda0); + /* If phi < 0 the entire pulse is rotated by 180 deg around the + * z-axis of the coordinate system without also changing + * the orientation of the resulting field vectors. + */ + const float_T x = float_T(phiPositive * pos.x() / UNIT_LENGTH); + const float_T y = float_T(phiPositive * pos.y() / UNIT_LENGTH); + const float_T z = float_T(pos.z() / UNIT_LENGTH); + const float_T t = float_T(time / UNIT_TIME); + + /* Shortcuts for speeding up the field calculation. */ + const float_T sinPhi = math::sin(phiT); + const float_T cosPhi = math::cos(phiT); + const float_T sinPhi2 = math::sin(phiT / float_T(2.0)); + const float_T cosPhi2 = math::cos(phiT / float_T(2.0)); + const float_T tanPhi2 = math::tan(phiT / float_T(2.0)); + + /* The "helpVar" variables decrease the nesting level of the evaluated expressions and + * thus help with formal code verification through manual code inspection. + */ + const complex_T helpVar1 = complex_T(0, -1) * cspeed * om0 * tauG * tauG + - y * cosPhi / cosPhi2 / cosPhi2 * tanPhi2 - float_T(2.0) * z * tanPhi2 * tanPhi2; + const complex_T helpVar2 = complex_T(0, 1) * rho0 - y * cosPhi - z * sinPhi; + + const complex_T helpVar3 + = (-cspeed * cspeed * k * om0 * tauG * tauG * wy * wy * x * x + - float_T(2.0) * cspeed * cspeed * om0 * t * t * wy * wy * rho0 + + complex_T(0, 2) * cspeed * cspeed * om0 * om0 * t * tauG * tauG * wy * wy * rho0 + - float_T(2.0) * cspeed * cspeed * om0 * tauG * tauG * y * y * rho0 + + float_T(4.0) * cspeed * om0 * t * wy * wy * z * rho0 + - complex_T(0, 2) * cspeed * om0 * om0 * tauG * tauG * wy * wy * z * rho0 + - float_T(2.0) * om0 * wy * wy * z * z * rho0 + - complex_T(0, 8) * om0 * wy * wy * y * (cspeed * t - z) * z * sinPhi2 * sinPhi2 + + complex_T(0, 8) / sinPhi + * (float_T(2.0) * z * z + * (cspeed * om0 * t * wy * wy + complex_T(0, 1) * cspeed * y * y - om0 * wy * wy * z) + + y + * (cspeed * k * wy * wy * x * x - complex_T(0, 2) * cspeed * om0 * t * wy * wy * rho0 + + float_T(2.0) * cspeed * y * y * rho0 + + complex_T(0, 2) * om0 * wy * wy * z * rho0) + * math::tan(float_T(PI) / float_T(2.0) - phiT) / sinPhi) + * sinPhi2 * sinPhi2 * sinPhi2 * sinPhi2 + - complex_T(0, 2) * cspeed * cspeed * om0 * t * t * wy * wy * z * sinPhi + - float_T(2.0) * cspeed * cspeed * om0 * om0 * t * tauG * tauG * wy * wy * z * sinPhi + - complex_T(0, 2) * cspeed * cspeed * om0 * tauG * tauG * y * y * z * sinPhi + + complex_T(0, 4) * cspeed * om0 * t * wy * wy * z * z * sinPhi + + float_T(2.0) * cspeed * om0 * om0 * tauG * tauG * wy * wy * z * z * sinPhi + - complex_T(0, 2) * om0 * wy * wy * z * z * z * sinPhi + - float_T(4.0) * cspeed * om0 * t * wy * wy * y * rho0 * tanPhi2 + + float_T(4.0) * om0 * wy * wy * y * z * rho0 * tanPhi2 + + complex_T(0, 2) * y * y + * (cspeed * om0 * t * wy * wy + complex_T(0, 1) * cspeed * y * y - om0 * wy * wy * z) + * cosPhi * cosPhi / cosPhi2 / cosPhi2 * tanPhi2 + + complex_T(0, 2) * cspeed * k * wy * wy * x * x * z * tanPhi2 * tanPhi2 + - float_T(2.0) * om0 * wy * wy * y * y * rho0 * tanPhi2 * tanPhi2 + + float_T(4.0) * cspeed * om0 * t * wy * wy * z * rho0 * tanPhi2 * tanPhi2 + + complex_T(0, 4) * cspeed * y * y * z * rho0 * tanPhi2 * tanPhi2 + - float_T(4.0) * om0 * wy * wy * z * z * rho0 * tanPhi2 * tanPhi2 + - complex_T(0, 2) * om0 * wy * wy * y * y * z * sinPhi * tanPhi2 * tanPhi2 + - float_T(2.0) * y * cosPhi + * (om0 + * (cspeed * cspeed + * (complex_T(0, 1) * t * t * wy * wy + om0 * t * tauG * tauG * wy * wy + + complex_T(0, 1) * tauG * tauG * y * y) + - cspeed * (complex_T(0, 2) * t + om0 * tauG * tauG) * wy * wy * z + + complex_T(0, 1) * wy * wy * z * z) + + complex_T(0, 2) * om0 * wy * wy * y * (cspeed * t - z) * tanPhi2 + + complex_T(0, 1) + * (complex_T(0, -4) * cspeed * y * y * z + + om0 * wy * wy * (y * y - float_T(4.0) * (cspeed * t - z) * z)) + * tanPhi2 * tanPhi2) + /* The "round-trip" conversion in the line below fixes a gross accuracy bug + * in floating-point arithmetics, when float_T is set to float_X. + */ + ) + * complex_T(float_64(1.0) / complex_64(float_T(2.0) * cspeed * wy * wy * helpVar2 * helpVar1)); + + const complex_T helpVar4 = (cspeed * om0 + * (cspeed * om0 * tauG * tauG + - complex_T(0, 8) * y * math::tan(float_T(PI) / float_T(2.0) - phiT) + / sinPhi / sinPhi * sinPhi2 * sinPhi2 * sinPhi2 * sinPhi2 + - complex_T(0, 2) * z * tanPhi2 * tanPhi2)) + / rho0; + + const complex_T result = float_T(-1.0) + * (cspeed * math::exp(helpVar3) * k * tauG * x * math::pow(helpVar2, float_T(-1.5)) + / math::sqrt(helpVar4)); + + return result.get_real() / UNIT_SPEED; + } + + } /* namespace twts */ + } /* namespace templates */ } /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/TWTS/EField.hpp b/include/picongpu/fields/background/templates/TWTS/EField.hpp index 19b66bcaa2..af73b3338f 100644 --- a/include/picongpu/fields/background/templates/TWTS/EField.hpp +++ b/include/picongpu/fields/background/templates/TWTS/EField.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Alexander Debus, Axel Huebl +/* Copyright 2014-2021 Alexander Debus, Axel Huebl * * This file is part of PIConGPU. * @@ -28,150 +28,143 @@ namespace picongpu { -/* Load pre-defined background field */ -namespace templates -{ -/* Traveling-wave Thomson scattering laser pulse */ -namespace twts -{ - -class EField -{ -public: - using float_T = float_X; - - enum PolarizationType + /* Load pre-defined background field */ + namespace templates { - /* The linear polarization of the TWTS laser is defined - * relative to the plane of the pulse front tilt. - * - * Polarisation is normal to the reference plane. - * Use Ex-fields (and corresponding B-fields) in TWTS laser internal coordinate system. - */ - LINEAR_X = 1u, - /* Polarization lies within the reference plane. - * Use Ey-fields (and corresponding B-fields) in TWTS laser internal coordinate system. - */ - LINEAR_YZ = 2u, - }; - - /* Center of simulation volume in number of cells */ - PMACC_ALIGN(halfSimSize,DataSpace); - /* y-position of TWTS coordinate origin inside the simulation coordinates [meter] - The other origin coordinates (x and z) default to globally centered values - with respect to the simulation volume. */ - PMACC_ALIGN(focus_y_SI, const float_64); - /* Laser wavelength [meter] */ - PMACC_ALIGN(wavelength_SI, const float_64); - /* TWTS laser pulse duration [second] */ - PMACC_ALIGN(pulselength_SI, const float_64); - /* line focus height of TWTS pulse [meter] */ - PMACC_ALIGN(w_x_SI, const float_64); - /* line focus width of TWTS pulse [meter] */ - PMACC_ALIGN(w_y_SI, const float_64); - /* interaction angle between TWTS laser propagation vector and the y-axis [rad] */ - PMACC_ALIGN(phi, const float_X); - /* Takes value 1.0 for phi > 0 and -1.0 for phi < 0. */ - PMACC_ALIGN(phiPositive,float_X); - /* propagation speed of TWTS laser overlap - normalized to the speed of light. [Default: beta0=1.0] */ - PMACC_ALIGN(beta_0, const float_X); - /* If auto_tdelay=FALSE, then a user defined delay is used. [second] */ - PMACC_ALIGN(tdelay_user_SI, const float_64); - /* Make time step constant accessible to device. */ - PMACC_ALIGN(dt, const float_64); - /* Make length normalization constant accessible to device. */ - PMACC_ALIGN(unit_length, const float_64); - /* TWTS laser time delay */ - PMACC_ALIGN(tdelay,float_64); - /* Should the TWTS laser delay be chosen automatically, such that - * the laser gradually enters the simulation volume? [Default: TRUE] - */ - PMACC_ALIGN(auto_tdelay, const bool); - /* Polarization of TWTS laser */ - PMACC_ALIGN(pol, const PolarizationType); - - /** Electric field of the TWTS laser - * - * \param focus_y_SI the distance to the laser focus in y-direction [m] - * \param wavelength_SI central wavelength [m] - * \param pulselength_SI sigma of std. gauss for intensity (E^2), - * pulselength_SI = FWHM_of_Intensity / 2.35482 [seconds (sigma)] - * \param w_x beam waist: distance from the axis where the pulse electric field - * decreases to its 1/e^2-th part at the focus position of the laser [m] - * \param w_y \see w_x - * \param phi interaction angle between TWTS laser propagation vector and - * the y-axis [rad, default = 90.*(PI/180.)] - * \param beta_0 propagation speed of overlap normalized to - * the speed of light [c, default = 1.0] - * \param tdelay_user manual time delay if auto_tdelay is false - * \param auto_tdelay calculate the time delay such that the TWTS pulse is not - * inside the simulation volume at simulation start timestep = 0 [default = true] - * \param pol dtermines the TWTS laser polarization, which is either normal or parallel - * to the laser pulse front tilt plane [ default= LINEAR_X , LINEAR_YZ ] - */ - HINLINE - EField( const float_64 focus_y_SI, - const float_64 wavelength_SI, - const float_64 pulselength_SI, - const float_64 w_x_SI, - const float_64 w_y_SI, - const float_X phi = 90.*(PI / 180.), - const float_X beta_0 = 1.0, - const float_64 tdelay_user_SI = 0.0, - const bool auto_tdelay = true, - const PolarizationType pol = LINEAR_X ); - - /** Specify your background field E(r,t) here - * - * \param cellIdx The total cell id counted from the start at timestep 0. - * \param currentStep The current time step - * \return float3_X with field normalized to amplitude in range [-1.:1.] - */ - HDINLINE float3_X - operator()( const DataSpace& cellIdx, - const uint32_t currentStep ) const; - - /** Calculate the Ex(r,t) field here (electric field vector normal to pulse-front-tilt plane) - * - * \param pos Spatial position of the target field - * \param time Absolute time (SI, including all offsets and transformations) - * for calculating the field - * \return Ex-field component of the non-rotated TWTS field in SI units */ - HDINLINE float_T - calcTWTSEx( const float3_64& pos, const float_64 time ) const; - - /** Calculate the Ey(r,t) field here (electric field vector in pulse-front-tilt plane) - * - * \param pos Spatial position of the target field - * \param time Absolute time (SI, including all offsets and transformations) - * for calculating the field - * \return Ex-field component of the non-rotated TWTS field in SI units */ - HDINLINE float_T - calcTWTSEy( const float3_64& pos, const float_64 time ) const; - - /** Calculate the E-field vector of the TWTS laser in SI units. - * \tparam T_dim Specializes for the simulation dimension - * \param cellIdx The total cell id counted from the start at timestep 0 - * \return Efield vector of the rotated TWTS field in SI units */ - template - HDINLINE float3_X - getTWTSEfield_Normalized( - const pmacc::math::Vector& eFieldPositions_SI, - const float_64 time) const; - - /** Calculate the E-field vector of the "in-plane polarized" TWTS laser in SI units. - * \tparam T_dim Specializes for the simulation dimension - * \param cellIdx The total cell id counted from the start at timestep 0 - * \return Efield vector of the rotated TWTS field in SI units */ - template - HDINLINE float3_X - getTWTSEfield_Normalized_Ey( - const pmacc::math::Vector& eFieldPositions_SI, - const float_64 time) const; - -}; - -} /* namespace twts */ -} /* namespace templates */ + /* Traveling-wave Thomson scattering laser pulse */ + namespace twts + { + class EField + { + public: + using float_T = float_X; + + enum PolarizationType + { + /* The linear polarization of the TWTS laser is defined + * relative to the plane of the pulse front tilt. + * + * Polarisation is normal to the reference plane. + * Use Ex-fields (and corresponding B-fields) in TWTS laser internal coordinate system. + */ + LINEAR_X = 1u, + /* Polarization lies within the reference plane. + * Use Ey-fields (and corresponding B-fields) in TWTS laser internal coordinate system. + */ + LINEAR_YZ = 2u, + }; + + /* Center of simulation volume in number of cells */ + PMACC_ALIGN(halfSimSize, DataSpace); + /* y-position of TWTS coordinate origin inside the simulation coordinates [meter] + The other origin coordinates (x and z) default to globally centered values + with respect to the simulation volume. */ + PMACC_ALIGN(focus_y_SI, const float_64); + /* Laser wavelength [meter] */ + PMACC_ALIGN(wavelength_SI, const float_64); + /* TWTS laser pulse duration [second] */ + PMACC_ALIGN(pulselength_SI, const float_64); + /* line focus height of TWTS pulse [meter] */ + PMACC_ALIGN(w_x_SI, const float_64); + /* line focus width of TWTS pulse [meter] */ + PMACC_ALIGN(w_y_SI, const float_64); + /* interaction angle between TWTS laser propagation vector and the y-axis [rad] */ + PMACC_ALIGN(phi, const float_X); + /* Takes value 1.0 for phi > 0 and -1.0 for phi < 0. */ + PMACC_ALIGN(phiPositive, float_X); + /* propagation speed of TWTS laser overlap + normalized to the speed of light. [Default: beta0=1.0] */ + PMACC_ALIGN(beta_0, const float_X); + /* If auto_tdelay=FALSE, then a user defined delay is used. [second] */ + PMACC_ALIGN(tdelay_user_SI, const float_64); + /* Make time step constant accessible to device. */ + PMACC_ALIGN(dt, const float_64); + /* Make length normalization constant accessible to device. */ + PMACC_ALIGN(unit_length, const float_64); + /* TWTS laser time delay */ + PMACC_ALIGN(tdelay, float_64); + /* Should the TWTS laser delay be chosen automatically, such that + * the laser gradually enters the simulation volume? [Default: TRUE] + */ + PMACC_ALIGN(auto_tdelay, const bool); + /* Polarization of TWTS laser */ + PMACC_ALIGN(pol, const PolarizationType); + + /** Electric field of the TWTS laser + * + * \param focus_y_SI the distance to the laser focus in y-direction [m] + * \param wavelength_SI central wavelength [m] + * \param pulselength_SI sigma of std. gauss for intensity (E^2), + * pulselength_SI = FWHM_of_Intensity / 2.35482 [seconds (sigma)] + * \param w_x beam waist: distance from the axis where the pulse electric field + * decreases to its 1/e^2-th part at the focus position of the laser [m] + * \param w_y \see w_x + * \param phi interaction angle between TWTS laser propagation vector and + * the y-axis [rad, default = 90.*(PI/180.)] + * \param beta_0 propagation speed of overlap normalized to + * the speed of light [c, default = 1.0] + * \param tdelay_user manual time delay if auto_tdelay is false + * \param auto_tdelay calculate the time delay such that the TWTS pulse is not + * inside the simulation volume at simulation start timestep = 0 [default = true] + * \param pol dtermines the TWTS laser polarization, which is either normal or parallel + * to the laser pulse front tilt plane [ default= LINEAR_X , LINEAR_YZ ] + */ + HINLINE + EField( + const float_64 focus_y_SI, + const float_64 wavelength_SI, + const float_64 pulselength_SI, + const float_64 w_x_SI, + const float_64 w_y_SI, + const float_X phi = 90. * (PI / 180.), + const float_X beta_0 = 1.0, + const float_64 tdelay_user_SI = 0.0, + const bool auto_tdelay = true, + const PolarizationType pol = LINEAR_X); + + /** Specify your background field E(r,t) here + * + * \param cellIdx The total cell id counted from the start at timestep 0. + * \param currentStep The current time step + * \return float3_X with field normalized to amplitude in range [-1.:1.] + */ + HDINLINE float3_X operator()(const DataSpace& cellIdx, const uint32_t currentStep) const; + + /** Calculate the Ex(r,t) field here (electric field vector normal to pulse-front-tilt plane) + * + * \param pos Spatial position of the target field + * \param time Absolute time (SI, including all offsets and transformations) + * for calculating the field + * \return Ex-field component of the non-rotated TWTS field in SI units */ + HDINLINE float_T calcTWTSEx(const float3_64& pos, const float_64 time) const; + + /** Calculate the Ey(r,t) field here (electric field vector in pulse-front-tilt plane) + * + * \param pos Spatial position of the target field + * \param time Absolute time (SI, including all offsets and transformations) + * for calculating the field + * \return Ex-field component of the non-rotated TWTS field in SI units */ + HDINLINE float_T calcTWTSEy(const float3_64& pos, const float_64 time) const; + + /** Calculate the E-field vector of the TWTS laser in SI units. + * \tparam T_dim Specializes for the simulation dimension + * \param cellIdx The total cell id counted from the start at timestep 0 + * \return Efield vector of the rotated TWTS field in SI units */ + template + HDINLINE float3_X getTWTSEfield_Normalized( + const pmacc::math::Vector& eFieldPositions_SI, + const float_64 time) const; + + /** Calculate the E-field vector of the "in-plane polarized" TWTS laser in SI units. + * \tparam T_dim Specializes for the simulation dimension + * \param cellIdx The total cell id counted from the start at timestep 0 + * \return Efield vector of the rotated TWTS field in SI units */ + template + HDINLINE float3_X getTWTSEfield_Normalized_Ey( + const pmacc::math::Vector& eFieldPositions_SI, + const float_64 time) const; + }; + + } /* namespace twts */ + } /* namespace templates */ } /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/TWTS/EField.tpp b/include/picongpu/fields/background/templates/TWTS/EField.tpp index ab108252ed..ef60bf0ecc 100644 --- a/include/picongpu/fields/background/templates/TWTS/EField.tpp +++ b/include/picongpu/fields/background/templates/TWTS/EField.tpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Alexander Debus, Axel Huebl +/* Copyright 2014-2021 Alexander Debus, Axel Huebl * * This file is part of PIConGPU. * @@ -36,319 +36,315 @@ namespace picongpu { -/* Load pre-defined background field */ -namespace templates -{ -/* Traveling-wave Thomson scattering laser pulse */ -namespace twts -{ - - HINLINE - EField::EField( const float_64 focus_y_SI, - const float_64 wavelength_SI, - const float_64 pulselength_SI, - const float_64 w_x_SI, - const float_64 w_y_SI, - const float_X phi, - const float_X beta_0, - const float_64 tdelay_user_SI, - const bool auto_tdelay, - const PolarizationType pol ) : - focus_y_SI(focus_y_SI), wavelength_SI(wavelength_SI), - pulselength_SI(pulselength_SI), w_x_SI(w_x_SI), - w_y_SI(w_y_SI), phi(phi), beta_0(beta_0), - tdelay_user_SI(tdelay_user_SI), dt(SI::DELTA_T_SI), - unit_length(UNIT_LENGTH), auto_tdelay(auto_tdelay), pol(pol), phiPositive( float_X(1.0) ) + /* Load pre-defined background field */ + namespace templates { - /* Note: Enviroment-objects cannot be instantiated on CUDA GPU device. Since this is done - on host (see fieldBackground.param), this is no problem. - */ - const SubGrid& subGrid = Environment::get().SubGrid(); - halfSimSize = subGrid.getGlobalDomain().size / 2; - tdelay = detail::getInitialTimeDelay_SI(auto_tdelay, tdelay_user_SI, - halfSimSize, pulselength_SI, - focus_y_SI, phi, beta_0); - if ( phi < float_X(0.0) ) phiPositive = float_X(-1.0); - } - - template<> - HDINLINE float3_X - EField::getTWTSEfield_Normalized( - const pmacc::math::Vector& eFieldPositions_SI, + /* Traveling-wave Thomson scattering laser pulse */ + namespace twts + { + HINLINE + EField::EField( + const float_64 focus_y_SI, + const float_64 wavelength_SI, + const float_64 pulselength_SI, + const float_64 w_x_SI, + const float_64 w_y_SI, + const float_X phi, + const float_X beta_0, + const float_64 tdelay_user_SI, + const bool auto_tdelay, + const PolarizationType pol) + : focus_y_SI(focus_y_SI) + , wavelength_SI(wavelength_SI) + , pulselength_SI(pulselength_SI) + , w_x_SI(w_x_SI) + , w_y_SI(w_y_SI) + , phi(phi) + , beta_0(beta_0) + , tdelay_user_SI(tdelay_user_SI) + , dt(SI::DELTA_T_SI) + , unit_length(UNIT_LENGTH) + , auto_tdelay(auto_tdelay) + , pol(pol) + , phiPositive(float_X(1.0)) + { + /* Note: Enviroment-objects cannot be instantiated on CUDA GPU device. Since this is done + on host (see fieldBackground.param), this is no problem. + */ + const SubGrid& subGrid = Environment::get().SubGrid(); + halfSimSize = subGrid.getGlobalDomain().size / 2; + tdelay = detail::getInitialTimeDelay_SI( + auto_tdelay, + tdelay_user_SI, + halfSimSize, + pulselength_SI, + focus_y_SI, + phi, + beta_0); + if(phi < float_X(0.0)) + phiPositive = float_X(-1.0); + } + + template<> + HDINLINE float3_X EField::getTWTSEfield_Normalized( + const pmacc::math::Vector& eFieldPositions_SI, const float_64 time) const - { - float3_64 pos(float3_64::create(0.0)); - for (uint32_t i = 0; i - HDINLINE float3_X - EField::getTWTSEfield_Normalized_Ey( - const pmacc::math::Vector& eFieldPositions_SI, + { + float3_64 pos(float3_64::create(0.0)); + for(uint32_t i = 0; i < simDim; ++i) + pos[i] = eFieldPositions_SI[0][i]; + return float3_X(float_X(calcTWTSEx(pos, time)), float_X(0.), float_X(0.)); + } + + template<> + HDINLINE float3_X EField::getTWTSEfield_Normalized_Ey( + const pmacc::math::Vector& eFieldPositions_SI, const float_64 time) const - { - typedef pmacc::math::Vector PosVecVec; - PosVecVec pos(PosVecVec::create( - float3_64::create(0.0) - )); - - for (uint32_t k = 0; k - HDINLINE float3_X - EField::getTWTSEfield_Normalized( - const pmacc::math::Vector& eFieldPositions_SI, - const float_64 time) const - { - /* Ex->Ez, so also the grid cell offset for Ez has to be used. */ - float3_64 pos(float3_64::create(0.0)); - /* 2D (y,z) vectors are mapped on 3D (x,y,z) vectors. */ - for (uint32_t i = 0; i - HDINLINE float3_X - EField::getTWTSEfield_Normalized_Ey( - const pmacc::math::Vector& eFieldPositions_SI, - const float_64 time) const - { - typedef pmacc::math::Vector PosVecVec; - PosVecVec pos(PosVecVec::create( - float3_64::create(0.0) - )); - - /* The 2D output of getFieldPositions_SI only returns - * the y- and z-component of a 3D vector. - */ - for (uint32_t k = 0; kEy, but grid cell offsets for Ex and Ey have to be used. - * - * Calculate Ey-component with the intra-cell offset of a Ey-field - */ - const float_64 Ey_Ey = calcTWTSEy(pos[1], time); - /* Calculate Ey-component with the intra-cell offset of a Ex-field */ - const float_64 Ey_Ex = calcTWTSEy(pos[0], time); - - /* Since we rotated all position vectors before calling calcTWTSEy, - * we need to back-rotate the resulting E-field vector. - * - * RotationMatrix[-(PI / 2+phi)].(Ey,Ex) for rotating back the field-vectors. - */ - const float_64 Ey_rot = -math::sin(+phi)*Ey_Ey; - const float_64 Ex_rot = -math::cos(+phi)*Ey_Ex; - - /* Finally, the E-field normalized to the peak amplitude. */ - return float3_X( float_X(Ex_rot), - float_X(Ey_rot), - float_X(0.0) ); - } - - HDINLINE float3_X - EField::operator()( const DataSpace& cellIdx, - const uint32_t currentStep ) const - { - const float_64 time_SI = float_64(currentStep) * dt - tdelay; - const traits::FieldPosition fieldPosE; - - const pmacc::math::Vector eFieldPositions_SI = - detail::getFieldPositions_SI(cellIdx, halfSimSize, - fieldPosE(), unit_length, focus_y_SI, phi); - - /* Single TWTS-Pulse */ - switch (pol) - { - case LINEAR_X : - return getTWTSEfield_Normalized(eFieldPositions_SI, time_SI); - - case LINEAR_YZ : - return getTWTSEfield_Normalized_Ey(eFieldPositions_SI, time_SI); - } - return getTWTSEfield_Normalized(eFieldPositions_SI, time_SI); // defensive default - } - - /** Calculate the Ex(r,t) field here - * - * \param pos Spatial position of the target field. - * \param time Absolute time (SI, including all offsets and transformations) for calculating - * the field */ - HDINLINE EField::float_T - EField::calcTWTSEx( const float3_64& pos, const float_64 time) const - { - using complex_T = pmacc::math::Complex< float_T >; - using complex_64 = pmacc::math::Complex< float_64 >; - /* Unit of speed */ - const float_64 UNIT_SPEED = SI::SPEED_OF_LIGHT_SI; - /* Unit of time */ - const float_64 UNIT_TIME = SI::DELTA_T_SI; - /* Unit of length */ - const float_64 UNIT_LENGTH = UNIT_TIME*UNIT_SPEED; - - /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ - const float_T beta0 = float_T(beta_0); - /* If phi < 0 the formulas below are not directly applicable. - * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the - * z-axis of the coordinate system in this function. - */ - const float_T phiReal = float_T( math::abs(phi) ); - const float_T alphaTilt = math::atan2(float_T(1.0)-beta0*math::cos(phiReal), - beta0*math::sin(phiReal)); - /* Definition of the laser pulse front tilt angle for the laser field below. - * - * For beta0 = 1.0, this is equivalent to our standard definition. Question: Why is the - * local "phi_T" not equal in value to the object member "phiReal" or "phi"? - * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system - * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence - * the dispersion will (although physically correct) be slightly off the ideal TWTS - * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for - * scenarios close to beta0 = 1. - */ - const float_T phiT = float_T(2.0)*alphaTilt; - - /* Angle between the laser pulse front and the y-axis. Not used, but remains in code for - * documentation purposes. - * const float_T eta = (PI / 2) - (phiReal - alphaTilt); - */ - - const float_T cspeed = float_T( SI::SPEED_OF_LIGHT_SI / UNIT_SPEED ); - const float_T lambda0 = float_T(wavelength_SI / UNIT_LENGTH); - const float_T om0 = float_T(2.0*PI*cspeed / lambda0); - /* factor 2 in tauG arises from definition convention in laser formula */ - const float_T tauG = float_T(pulselength_SI*2.0 / UNIT_TIME); - /* w0 is wx here --> w0 could be replaced by wx */ - const float_T w0 = float_T(w_x_SI / UNIT_LENGTH); - const float_T rho0 = float_T(PI*w0*w0/lambda0); - /* wy is width of TWTS pulse */ - const float_T wy = float_T(w_y_SI / UNIT_LENGTH); - const float_T k = float_T(2.0*PI / lambda0); - const float_T x = float_T(phiPositive * pos.x() / UNIT_LENGTH); - const float_T y = float_T(phiPositive * pos.y() / UNIT_LENGTH); - const float_T z = float_T(pos.z() / UNIT_LENGTH); - const float_T t = float_T(time / UNIT_TIME); - - /* Calculating shortcuts for speeding up field calculation */ - const float_T sinPhi = math::sin(phiT); - const float_T cosPhi = math::cos(phiT); - const float_T sinPhi2 = math::sin(phiT / float_T(2.0)); - const float_T cosPhi2 = math::cos(phiT / float_T(2.0)); - const float_T tanPhi2 = math::tan(phiT / float_T(2.0)); - - /* The "helpVar" variables decrease the nesting level of the evaluated expressions and - * thus help with formal code verification through manual code inspection. - */ - const complex_T helpVar1 = complex_T(0,1)*rho0 - y*cosPhi - z*sinPhi; - const complex_T helpVar2 = complex_T(0,-1)*cspeed*om0*tauG*tauG - - y*cosPhi / cosPhi2 / cosPhi2*tanPhi2 - - float_T(2.0)*z*tanPhi2*tanPhi2; - const complex_T helpVar3 = complex_T(0,1)*rho0 - y*cosPhi - z*sinPhi; - - const complex_T helpVar4 = ( - -(cspeed*cspeed*k*om0*tauG*tauG*wy*wy*x*x) - - float_T(2.0)*cspeed*cspeed*om0*t*t*wy*wy*rho0 - + complex_T(0,2)*cspeed*cspeed*om0*om0*t*tauG*tauG*wy*wy*rho0 - - float_T(2.0)*cspeed*cspeed*om0*tauG*tauG*y*y*rho0 - + float_T(4.0)*cspeed*om0*t*wy*wy*z*rho0 - - complex_T(0,2)*cspeed*om0*om0*tauG*tauG*wy*wy*z*rho0 - - float_T(2.0)*om0*wy*wy*z*z*rho0 - - complex_T(0,8)*om0*wy*wy*y*(cspeed*t - z)*z*sinPhi2*sinPhi2 - + complex_T(0,8) / sinPhi*( - +float_T(2.0)*z*z*(cspeed*om0*t*wy*wy+complex_T(0,1)*cspeed*y*y-om0*wy*wy*z) - + y*( - + cspeed*k*wy*wy*x*x - - complex_T(0,2)*cspeed*om0*t*wy*wy*rho0 - + float_T(2.0)*cspeed*y*y*rho0 - + complex_T(0,2)*om0*wy*wy*z*rho0 - )*math::tan(float_T(PI / 2.0)-phiT)/sinPhi - )*sinPhi2*sinPhi2*sinPhi2*sinPhi2 - - complex_T(0,2)*cspeed*cspeed*om0*t*t*wy*wy*z*sinPhi - - float_T(2.0)*cspeed*cspeed*om0*om0*t*tauG*tauG*wy*wy*z*sinPhi - - complex_T(0,2)*cspeed*cspeed*om0*tauG*tauG*y*y*z*sinPhi - + complex_T(0,4)*cspeed*om0*t*wy*wy*z*z*sinPhi - + float_T(2.0)*cspeed*om0*om0*tauG*tauG*wy*wy*z*z*sinPhi - - complex_T(0,2)*om0*wy*wy*z*z*z*sinPhi - - float_T(4.0)*cspeed*om0*t*wy*wy*y*rho0*tanPhi2 - + float_T(4.0)*om0*wy*wy*y*z*rho0*tanPhi2 - + complex_T(0,2)*y*y*( - + cspeed*om0*t*wy*wy + complex_T(0,1)*cspeed*y*y - om0*wy*wy*z - )*cosPhi*cosPhi / cosPhi2 / cosPhi2*tanPhi2 - + complex_T(0,2)*cspeed*k*wy*wy*x*x*z*tanPhi2*tanPhi2 - - float_T(2.0)*om0*wy*wy*y*y*rho0*tanPhi2*tanPhi2 - + float_T(4.0)*cspeed*om0*t*wy*wy*z*rho0*tanPhi2*tanPhi2 - + complex_T(0,4)*cspeed*y*y*z*rho0*tanPhi2*tanPhi2 - - float_T(4.0)*om0*wy*wy*z*z*rho0*tanPhi2*tanPhi2 - - complex_T(0,2)*om0*wy*wy*y*y*z*sinPhi*tanPhi2*tanPhi2 - - float_T(2.0)*y*cosPhi*( - + om0*( - + cspeed*cspeed*( - complex_T(0,1)*t*t*wy*wy - + om0*t*tauG*tauG*wy*wy - + complex_T(0,1)*tauG*tauG*y*y - ) - - cspeed*(complex_T(0,2)*t - + om0*tauG*tauG)*wy*wy*z - + complex_T(0,1)*wy*wy*z*z - ) - + complex_T(0,2)*om0*wy*wy*y*(cspeed*t - z)*tanPhi2 - + complex_T(0,1)*tanPhi2*tanPhi2*( - complex_T(0,-4)*cspeed*y*y*z - + om0*wy*wy*(y*y - float_T(4.0)*(cspeed*t - z)*z) - ) - ) - /* The "round-trip" conversion in the line below fixes a gross accuracy bug - * in floating-point arithmetics, when float_T is set to float_X. - */ - ) * complex_T( float_64(1.0) / complex_64(float_T(2.0)*cspeed*wy*wy*helpVar1*helpVar2) ); - - const complex_T helpVar5 = cspeed*om0*tauG*tauG - - complex_T(0,8)*y*math::tan( float_T(PI / 2)-phiT ) - / sinPhi / sinPhi*sinPhi2*sinPhi2*sinPhi2*sinPhi2 - - complex_T(0,2)*z*tanPhi2*tanPhi2; - const complex_T result = (math::exp(helpVar4)*tauG - *math::sqrt((cspeed*om0*rho0) / helpVar3)) / math::sqrt(helpVar5); - return result.get_real(); - } - - /** Calculate the Ey(r,t) field here - * - * \param pos Spatial position of the target field. - * \param time Absolute time (SI, including all offsets and transformations) for calculating - * the field */ - HDINLINE EField::float_T - EField::calcTWTSEy( const float3_64& pos, const float_64 time) const - { - /* The field function of Ey (polarization in pulse-front-tilt plane) - * is by definition identical to Ex (polarization normal to pulse-front-tilt plane) - */ - return calcTWTSEx( pos, time ); - } - -} /* namespace twts */ -} /* namespace templates */ + { + typedef pmacc::math::Vector PosVecVec; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + for(uint32_t i = 0; i < simDim; ++i) + pos[k][i] = eFieldPositions_SI[k][i]; + } + + /* Calculate Ey-component with the intra-cell offset of a Ey-field */ + const float_64 Ey_Ey = calcTWTSEy(pos[1], time); + /* Calculate Ey-component with the intra-cell offset of a Ez-field */ + const float_64 Ey_Ez = calcTWTSEy(pos[2], time); + + /* Since we rotated all position vectors before calling calcTWTSEy, + * we need to back-rotate the resulting E-field vector. + * + * RotationMatrix[-(PI/2+phi)].(Ey,Ez) for rotating back the field-vectors. + */ + const float_64 Ey_rot = -math::sin(+phi) * Ey_Ey; + const float_64 Ez_rot = -math::cos(+phi) * Ey_Ez; + + /* Finally, the E-field normalized to the peak amplitude. */ + return float3_X(float_X(0.0), float_X(Ey_rot), float_X(Ez_rot)); + } + + template<> + HDINLINE float3_X EField::getTWTSEfield_Normalized( + const pmacc::math::Vector& eFieldPositions_SI, + const float_64 time) const + { + /* Ex->Ez, so also the grid cell offset for Ez has to be used. */ + float3_64 pos(float3_64::create(0.0)); + /* 2D (y,z) vectors are mapped on 3D (x,y,z) vectors. */ + for(uint32_t i = 0; i < DIM2; ++i) + pos[i + 1] = eFieldPositions_SI[2][i]; + return float3_X(float_X(0.), float_X(0.), float_X(calcTWTSEx(pos, time))); + } + + template<> + HDINLINE float3_X EField::getTWTSEfield_Normalized_Ey( + const pmacc::math::Vector& eFieldPositions_SI, + const float_64 time) const + { + typedef pmacc::math::Vector PosVecVec; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + /* The 2D output of getFieldPositions_SI only returns + * the y- and z-component of a 3D vector. + */ + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + for(uint32_t i = 0; i < DIM2; ++i) + pos[k][i + 1] = eFieldPositions_SI[k][i]; + } + + /* Ey->Ey, but grid cell offsets for Ex and Ey have to be used. + * + * Calculate Ey-component with the intra-cell offset of a Ey-field + */ + const float_64 Ey_Ey = calcTWTSEy(pos[1], time); + /* Calculate Ey-component with the intra-cell offset of a Ex-field */ + const float_64 Ey_Ex = calcTWTSEy(pos[0], time); + + /* Since we rotated all position vectors before calling calcTWTSEy, + * we need to back-rotate the resulting E-field vector. + * + * RotationMatrix[-(PI / 2+phi)].(Ey,Ex) for rotating back the field-vectors. + */ + const float_64 Ey_rot = -math::sin(+phi) * Ey_Ey; + const float_64 Ex_rot = -math::cos(+phi) * Ey_Ex; + + /* Finally, the E-field normalized to the peak amplitude. */ + return float3_X(float_X(Ex_rot), float_X(Ey_rot), float_X(0.0)); + } + + HDINLINE float3_X EField::operator()(const DataSpace& cellIdx, const uint32_t currentStep) const + { + const float_64 time_SI = float_64(currentStep) * dt - tdelay; + const traits::FieldPosition fieldPosE; + + const pmacc::math::Vector eFieldPositions_SI + = detail::getFieldPositions_SI(cellIdx, halfSimSize, fieldPosE(), unit_length, focus_y_SI, phi); + + /* Single TWTS-Pulse */ + switch(pol) + { + case LINEAR_X: + return getTWTSEfield_Normalized(eFieldPositions_SI, time_SI); + + case LINEAR_YZ: + return getTWTSEfield_Normalized_Ey(eFieldPositions_SI, time_SI); + } + return getTWTSEfield_Normalized(eFieldPositions_SI, time_SI); // defensive default + } + + /** Calculate the Ex(r,t) field here + * + * \param pos Spatial position of the target field. + * \param time Absolute time (SI, including all offsets and transformations) for calculating + * the field */ + HDINLINE EField::float_T EField::calcTWTSEx(const float3_64& pos, const float_64 time) const + { + using complex_T = pmacc::math::Complex; + using complex_64 = pmacc::math::Complex; + /* Unit of speed */ + const float_64 UNIT_SPEED = SI::SPEED_OF_LIGHT_SI; + /* Unit of time */ + const float_64 UNIT_TIME = SI::DELTA_T_SI; + /* Unit of length */ + const float_64 UNIT_LENGTH = UNIT_TIME * UNIT_SPEED; + + /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ + const float_T beta0 = float_T(beta_0); + /* If phi < 0 the formulas below are not directly applicable. + * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the + * z-axis of the coordinate system in this function. + */ + const float_T phiReal = float_T(math::abs(phi)); + const float_T alphaTilt + = math::atan2(float_T(1.0) - beta0 * math::cos(phiReal), beta0 * math::sin(phiReal)); + /* Definition of the laser pulse front tilt angle for the laser field below. + * + * For beta0 = 1.0, this is equivalent to our standard definition. Question: Why is the + * local "phi_T" not equal in value to the object member "phiReal" or "phi"? + * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system + * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence + * the dispersion will (although physically correct) be slightly off the ideal TWTS + * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for + * scenarios close to beta0 = 1. + */ + const float_T phiT = float_T(2.0) * alphaTilt; + + /* Angle between the laser pulse front and the y-axis. Not used, but remains in code for + * documentation purposes. + * const float_T eta = (PI / 2) - (phiReal - alphaTilt); + */ + + const float_T cspeed = float_T(SI::SPEED_OF_LIGHT_SI / UNIT_SPEED); + const float_T lambda0 = float_T(wavelength_SI / UNIT_LENGTH); + const float_T om0 = float_T(2.0 * PI * cspeed / lambda0); + /* factor 2 in tauG arises from definition convention in laser formula */ + const float_T tauG = float_T(pulselength_SI * 2.0 / UNIT_TIME); + /* w0 is wx here --> w0 could be replaced by wx */ + const float_T w0 = float_T(w_x_SI / UNIT_LENGTH); + const float_T rho0 = float_T(PI * w0 * w0 / lambda0); + /* wy is width of TWTS pulse */ + const float_T wy = float_T(w_y_SI / UNIT_LENGTH); + const float_T k = float_T(2.0 * PI / lambda0); + const float_T x = float_T(phiPositive * pos.x() / UNIT_LENGTH); + const float_T y = float_T(phiPositive * pos.y() / UNIT_LENGTH); + const float_T z = float_T(pos.z() / UNIT_LENGTH); + const float_T t = float_T(time / UNIT_TIME); + + /* Calculating shortcuts for speeding up field calculation */ + const float_T sinPhi = math::sin(phiT); + const float_T cosPhi = math::cos(phiT); + const float_T sinPhi2 = math::sin(phiT / float_T(2.0)); + const float_T cosPhi2 = math::cos(phiT / float_T(2.0)); + const float_T tanPhi2 = math::tan(phiT / float_T(2.0)); + + /* The "helpVar" variables decrease the nesting level of the evaluated expressions and + * thus help with formal code verification through manual code inspection. + */ + const complex_T helpVar1 = complex_T(0, 1) * rho0 - y * cosPhi - z * sinPhi; + const complex_T helpVar2 = complex_T(0, -1) * cspeed * om0 * tauG * tauG + - y * cosPhi / cosPhi2 / cosPhi2 * tanPhi2 - float_T(2.0) * z * tanPhi2 * tanPhi2; + const complex_T helpVar3 = complex_T(0, 1) * rho0 - y * cosPhi - z * sinPhi; + + const complex_T helpVar4 + = (-(cspeed * cspeed * k * om0 * tauG * tauG * wy * wy * x * x) + - float_T(2.0) * cspeed * cspeed * om0 * t * t * wy * wy * rho0 + + complex_T(0, 2) * cspeed * cspeed * om0 * om0 * t * tauG * tauG * wy * wy * rho0 + - float_T(2.0) * cspeed * cspeed * om0 * tauG * tauG * y * y * rho0 + + float_T(4.0) * cspeed * om0 * t * wy * wy * z * rho0 + - complex_T(0, 2) * cspeed * om0 * om0 * tauG * tauG * wy * wy * z * rho0 + - float_T(2.0) * om0 * wy * wy * z * z * rho0 + - complex_T(0, 8) * om0 * wy * wy * y * (cspeed * t - z) * z * sinPhi2 * sinPhi2 + + complex_T(0, 8) / sinPhi + * (+float_T(2.0) * z * z + * (cspeed * om0 * t * wy * wy + complex_T(0, 1) * cspeed * y * y - om0 * wy * wy * z) + + y + * (+cspeed * k * wy * wy * x * x + - complex_T(0, 2) * cspeed * om0 * t * wy * wy * rho0 + + float_T(2.0) * cspeed * y * y * rho0 + + complex_T(0, 2) * om0 * wy * wy * z * rho0) + * math::tan(float_T(PI / 2.0) - phiT) / sinPhi) + * sinPhi2 * sinPhi2 * sinPhi2 * sinPhi2 + - complex_T(0, 2) * cspeed * cspeed * om0 * t * t * wy * wy * z * sinPhi + - float_T(2.0) * cspeed * cspeed * om0 * om0 * t * tauG * tauG * wy * wy * z * sinPhi + - complex_T(0, 2) * cspeed * cspeed * om0 * tauG * tauG * y * y * z * sinPhi + + complex_T(0, 4) * cspeed * om0 * t * wy * wy * z * z * sinPhi + + float_T(2.0) * cspeed * om0 * om0 * tauG * tauG * wy * wy * z * z * sinPhi + - complex_T(0, 2) * om0 * wy * wy * z * z * z * sinPhi + - float_T(4.0) * cspeed * om0 * t * wy * wy * y * rho0 * tanPhi2 + + float_T(4.0) * om0 * wy * wy * y * z * rho0 * tanPhi2 + + complex_T(0, 2) * y * y + * (+cspeed * om0 * t * wy * wy + complex_T(0, 1) * cspeed * y * y - om0 * wy * wy * z) + * cosPhi * cosPhi / cosPhi2 / cosPhi2 * tanPhi2 + + complex_T(0, 2) * cspeed * k * wy * wy * x * x * z * tanPhi2 * tanPhi2 + - float_T(2.0) * om0 * wy * wy * y * y * rho0 * tanPhi2 * tanPhi2 + + float_T(4.0) * cspeed * om0 * t * wy * wy * z * rho0 * tanPhi2 * tanPhi2 + + complex_T(0, 4) * cspeed * y * y * z * rho0 * tanPhi2 * tanPhi2 + - float_T(4.0) * om0 * wy * wy * z * z * rho0 * tanPhi2 * tanPhi2 + - complex_T(0, 2) * om0 * wy * wy * y * y * z * sinPhi * tanPhi2 * tanPhi2 + - float_T(2.0) * y * cosPhi + * (+om0 + * (+cspeed * cspeed + * (complex_T(0, 1) * t * t * wy * wy + om0 * t * tauG * tauG * wy * wy + + complex_T(0, 1) * tauG * tauG * y * y) + - cspeed * (complex_T(0, 2) * t + om0 * tauG * tauG) * wy * wy * z + + complex_T(0, 1) * wy * wy * z * z) + + complex_T(0, 2) * om0 * wy * wy * y * (cspeed * t - z) * tanPhi2 + + complex_T(0, 1) * tanPhi2 * tanPhi2 + * (complex_T(0, -4) * cspeed * y * y * z + + om0 * wy * wy * (y * y - float_T(4.0) * (cspeed * t - z) * z))) + /* The "round-trip" conversion in the line below fixes a gross accuracy bug + * in floating-point arithmetics, when float_T is set to float_X. + */ + ) + * complex_T(float_64(1.0) / complex_64(float_T(2.0) * cspeed * wy * wy * helpVar1 * helpVar2)); + + const complex_T helpVar5 = cspeed * om0 * tauG * tauG + - complex_T(0, 8) * y * math::tan(float_T(PI / 2) - phiT) / sinPhi / sinPhi * sinPhi2 * sinPhi2 + * sinPhi2 * sinPhi2 + - complex_T(0, 2) * z * tanPhi2 * tanPhi2; + const complex_T result = (math::exp(helpVar4) * tauG * math::sqrt((cspeed * om0 * rho0) / helpVar3)) + / math::sqrt(helpVar5); + return result.get_real(); + } + + /** Calculate the Ey(r,t) field here + * + * \param pos Spatial position of the target field. + * \param time Absolute time (SI, including all offsets and transformations) for calculating + * the field */ + HDINLINE EField::float_T EField::calcTWTSEy(const float3_64& pos, const float_64 time) const + { + /* The field function of Ey (polarization in pulse-front-tilt plane) + * is by definition identical to Ex (polarization normal to pulse-front-tilt plane) + */ + return calcTWTSEx(pos, time); + } + + } /* namespace twts */ + } /* namespace templates */ } /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/TWTS/GetInitialTimeDelay_SI.tpp b/include/picongpu/fields/background/templates/TWTS/GetInitialTimeDelay_SI.tpp index acf73c8f45..0975f83a34 100644 --- a/include/picongpu/fields/background/templates/TWTS/GetInitialTimeDelay_SI.tpp +++ b/include/picongpu/fields/background/templates/TWTS/GetInitialTimeDelay_SI.tpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Alexander Debus +/* Copyright 2014-2021 Alexander Debus * * This file is part of PIConGPU. * @@ -26,134 +26,137 @@ namespace picongpu { -namespace templates -{ -namespace twts -{ -/* Auxiliary functions for calculating the TWTS field */ -namespace detail -{ - - template - class GetInitialTimeDelay + namespace templates { - public: - /** Obtain the SI time delay that later enters the Ex(r, t), By(r, t) and Bz(r, t) - * calculations as t. - * \tparam T_dim Specializes for the simulation dimension - * \param auto_tdelay calculate the time delay such that the TWTS pulse is not - * inside the simulation volume at simulation start - * timestep = 0 [default = true] - * \param tdelay_user_SI manual time delay if auto_tdelay is false - * \param halfSimSize center of simulation volume in number of cells - * \param pulselength_SI sigma of std. gauss for intensity (E^2) - * \param focus_y_SI the distance to the laser focus in y-direction [m] - * \param phi interaction angle between TWTS laser propagation vector and - * the y-axis [rad, default = 90.*(PI / 180.)] - * \param beta_0 propagation speed of overlap normalized - * to the speed of light [c, default = 1.0] - * \return time delay in SI units */ - HDINLINE float_64 operator()( const bool auto_tdelay, - const float_64 tdelay_user_SI, - const DataSpace& halfSimSize, - const float_64 pulselength_SI, - const float_64 focus_y_SI, - const float_X phi, - const float_X beta_0 ) const; - }; + namespace twts + { + /* Auxiliary functions for calculating the TWTS field */ + namespace detail + { + template + class GetInitialTimeDelay + { + public: + /** Obtain the SI time delay that later enters the Ex(r, t), By(r, t) and Bz(r, t) + * calculations as t. + * \tparam T_dim Specializes for the simulation dimension + * \param auto_tdelay calculate the time delay such that the TWTS pulse is not + * inside the simulation volume at simulation start + * timestep = 0 [default = true] + * \param tdelay_user_SI manual time delay if auto_tdelay is false + * \param halfSimSize center of simulation volume in number of cells + * \param pulselength_SI sigma of std. gauss for intensity (E^2) + * \param focus_y_SI the distance to the laser focus in y-direction [m] + * \param phi interaction angle between TWTS laser propagation vector and + * the y-axis [rad, default = 90.*(PI / 180.)] + * \param beta_0 propagation speed of overlap normalized + * to the speed of light [c, default = 1.0] + * \return time delay in SI units */ + HDINLINE float_64 operator()( + const bool auto_tdelay, + const float_64 tdelay_user_SI, + const DataSpace& halfSimSize, + const float_64 pulselength_SI, + const float_64 focus_y_SI, + const float_X phi, + const float_X beta_0) const; + }; - template<> - HDINLINE float_64 - GetInitialTimeDelay::operator()( const bool auto_tdelay, - const float_64 tdelay_user_SI, - const DataSpace& halfSimSize, - const float_64 pulselength_SI, - const float_64 focus_y_SI, - const float_X phi, - const float_X beta_0 ) const - { - if ( auto_tdelay ) { + template<> + HDINLINE float_64 GetInitialTimeDelay::operator()( + const bool auto_tdelay, + const float_64 tdelay_user_SI, + const DataSpace& halfSimSize, + const float_64 pulselength_SI, + const float_64 focus_y_SI, + const float_X phi, + const float_X beta_0) const + { + if(auto_tdelay) + { + /* angle between the laser pulse front and the y-axis. Good approximation for + * beta0\simeq 1. For exact relation look in TWTS core routines for Ex, By or Bz. */ + const float_64 eta = (PI / 2) - (phi / 2); + /* halfSimSize[2] --> Half-depth of simulation volume (in z); By geometric + * projection we calculate the y-distance walkoff of the TWTS-pulse. + * The abs()-function is for correct offset for -phi<-90Deg and +phi>+90Deg. */ + const float_64 y1 + = float_64(halfSimSize[2] * picongpu::SI::CELL_DEPTH_SI) * math::abs(math::cos(eta)); + /* Fudge parameter to make sure, that TWTS pulse starts to impact simulation volume + * at low intensity values. */ + const float_64 m = 3.; + /* Approximate cross section of laser pulse through y-axis, + * scaled with "fudge factor" m. */ + const float_64 y2 = m * (pulselength_SI * picongpu::SI::SPEED_OF_LIGHT_SI) / math::cos(eta); + /* y-position of laser coordinate system origin within simulation. */ + const float_64 y3 = focus_y_SI; + /* Programmatically obtained time-delay */ + const float_64 tdelay = (y1 + y2 + y3) / (picongpu::SI::SPEED_OF_LIGHT_SI * beta_0); - /* angle between the laser pulse front and the y-axis. Good approximation for - * beta0\simeq 1. For exact relation look in TWTS core routines for Ex, By or Bz. */ - const float_64 eta = (PI / 2) - (phi / 2); - /* halfSimSize[2] --> Half-depth of simulation volume (in z); By geometric - * projection we calculate the y-distance walkoff of the TWTS-pulse. - * The abs()-function is for correct offset for -phi<-90Deg and +phi>+90Deg. */ - const float_64 y1 = float_64(halfSimSize[2] - *picongpu::SI::CELL_DEPTH_SI)*math::abs(math::cos(eta)); - /* Fudge parameter to make sure, that TWTS pulse starts to impact simulation volume - * at low intensity values. */ - const float_64 m = 3.; - /* Approximate cross section of laser pulse through y-axis, - * scaled with "fudge factor" m. */ - const float_64 y2 = m*(pulselength_SI*picongpu::SI::SPEED_OF_LIGHT_SI) - / math::cos(eta); - /* y-position of laser coordinate system origin within simulation. */ - const float_64 y3 = focus_y_SI; - /* Programmatically obtained time-delay */ - const float_64 tdelay = (y1+y2+y3) / (picongpu::SI::SPEED_OF_LIGHT_SI*beta_0); + return tdelay; + } + else + return tdelay_user_SI; + } - return tdelay; - } - else - return tdelay_user_SI; - } + template<> + HDINLINE float_64 GetInitialTimeDelay::operator()( + const bool auto_tdelay, + const float_64 tdelay_user_SI, + const DataSpace& halfSimSize, + const float_64 pulselength_SI, + const float_64 focus_y_SI, + const float_X phi, + const float_X beta_0) const + { + if(auto_tdelay) + { + /* angle between the laser pulse front and the y-axis. Good approximation for + * beta0\simeq 1. For exact relation look in TWTS core routines for Ex, By or Bz. */ + const float_64 eta = (PI / 2) - (phi / 2); + /* halfSimSize[0] --> Half-depth of simulation volume (in x); By geometric + * projection we calculate the y-distance walkoff of the TWTS-pulse. + * The abs()-function is for correct offset for -phi<-90Deg and +phi>+90Deg. */ + const float_64 y1 + = float_64(halfSimSize[0] * picongpu::SI::CELL_WIDTH_SI) * math::abs(math::cos(eta)); + /* Fudge parameter to make sure, that TWTS pulse starts to impact simulation volume + * at low intensity values. */ + const float_64 m = 3.; + /* Approximate cross section of laser pulse through y-axis, + * scaled with "fudge factor" m. */ + const float_64 y2 = m * (pulselength_SI * picongpu::SI::SPEED_OF_LIGHT_SI) / math::cos(eta); + /* y-position of laser coordinate system origin within simulation. */ + const float_64 y3 = focus_y_SI; + /* Programmatically obtained time-delay */ + const float_64 tdelay = (y1 + y2 + y3) / (picongpu::SI::SPEED_OF_LIGHT_SI * beta_0); - template <> - HDINLINE float_64 - GetInitialTimeDelay::operator()( const bool auto_tdelay, - const float_64 tdelay_user_SI, - const DataSpace& halfSimSize, - const float_64 pulselength_SI, - const float_64 focus_y_SI, - const float_X phi, - const float_X beta_0 ) const - { - if ( auto_tdelay ) { - - /* angle between the laser pulse front and the y-axis. Good approximation for - * beta0\simeq 1. For exact relation look in TWTS core routines for Ex, By or Bz. */ - const float_64 eta = (PI / 2) - (phi / 2); - /* halfSimSize[0] --> Half-depth of simulation volume (in x); By geometric - * projection we calculate the y-distance walkoff of the TWTS-pulse. - * The abs()-function is for correct offset for -phi<-90Deg and +phi>+90Deg. */ - const float_64 y1 = float_64(halfSimSize[0] - *picongpu::SI::CELL_WIDTH_SI)*math::abs(math::cos(eta)); - /* Fudge parameter to make sure, that TWTS pulse starts to impact simulation volume - * at low intensity values. */ - const float_64 m = 3.; - /* Approximate cross section of laser pulse through y-axis, - * scaled with "fudge factor" m. */ - const float_64 y2 = m*(pulselength_SI*picongpu::SI::SPEED_OF_LIGHT_SI) - / math::cos(eta); - /* y-position of laser coordinate system origin within simulation. */ - const float_64 y3 = focus_y_SI; - /* Programmatically obtained time-delay */ - const float_64 tdelay = (y1+y2+y3) / (picongpu::SI::SPEED_OF_LIGHT_SI*beta_0); + return tdelay; + } + else + return tdelay_user_SI; + } - return tdelay; - } - else - return tdelay_user_SI; - } - - template - HDINLINE float_64 - getInitialTimeDelay_SI( const bool auto_tdelay, - const float_64 tdelay_user_SI, - const DataSpace& halfSimSize, - const float_64 pulselength_SI, - const float_64 focus_y_SI, - const float_X phi, - const float_X beta_0 ) - { - return GetInitialTimeDelay()(auto_tdelay, tdelay_user_SI, - halfSimSize, pulselength_SI, - focus_y_SI, phi, beta_0); - } + template + HDINLINE float_64 getInitialTimeDelay_SI( + const bool auto_tdelay, + const float_64 tdelay_user_SI, + const DataSpace& halfSimSize, + const float_64 pulselength_SI, + const float_64 focus_y_SI, + const float_X phi, + const float_X beta_0) + { + return GetInitialTimeDelay()( + auto_tdelay, + tdelay_user_SI, + halfSimSize, + pulselength_SI, + focus_y_SI, + phi, + beta_0); + } -} /* namespace detail */ -} /* namespace twts */ -} /* namespace templates */ + } /* namespace detail */ + } /* namespace twts */ + } /* namespace templates */ } /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/TWTS/RotateField.tpp b/include/picongpu/fields/background/templates/TWTS/RotateField.tpp index 401bc135a9..a422161346 100644 --- a/include/picongpu/fields/background/templates/TWTS/RotateField.tpp +++ b/include/picongpu/fields/background/templates/TWTS/RotateField.tpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Alexander Debus, Rene Widera +/* Copyright 2014-2021 Alexander Debus, Rene Widera * * This file is part of PIConGPU. * @@ -26,96 +26,90 @@ namespace picongpu { -namespace templates -{ -namespace twts -{ -/** Auxiliary functions for calculating the TWTS field */ -namespace detail -{ - - template - struct RotateField; - - template - struct RotateField, T_AngleType > + namespace templates { - typedef pmacc::math::Vector result; - typedef T_AngleType AngleType; - HDINLINE result - operator()( const result& fieldPosVector, - const AngleType phi ) const + namespace twts { - /* Since, the laser propagation direction encloses an angle of phi with the - * simulation y-axis (i.e. direction of sliding window), the positions vectors are - * rotated around the simulation x-axis before calling the TWTS field functions. - * Note: The TWTS field functions are in non-rotated frame and only use the angle - * phi to determine the required amount of pulse front tilt. - * RotationMatrix[PI/2+phi].(y,z) (180Deg-flip at phi=90Deg since coordinate - * system in paper is oriented the other way round.) */ - return result( - fieldPosVector.x(), - -math::sin(AngleType(phi))*fieldPosVector.y() - -math::cos(AngleType(phi))*fieldPosVector.z() , - +math::cos(AngleType(phi))*fieldPosVector.y() - -math::sin(AngleType(phi))*fieldPosVector.z() ); - } + /** Auxiliary functions for calculating the TWTS field */ + namespace detail + { + template + struct RotateField; - }; + template + struct RotateField, T_AngleType> + { + typedef pmacc::math::Vector result; + typedef T_AngleType AngleType; + HDINLINE result operator()(const result& fieldPosVector, const AngleType phi) const + { + /* Since, the laser propagation direction encloses an angle of phi with the + * simulation y-axis (i.e. direction of sliding window), the positions vectors are + * rotated around the simulation x-axis before calling the TWTS field functions. + * Note: The TWTS field functions are in non-rotated frame and only use the angle + * phi to determine the required amount of pulse front tilt. + * RotationMatrix[PI/2+phi].(y,z) (180Deg-flip at phi=90Deg since coordinate + * system in paper is oriented the other way round.) */ + return result( + fieldPosVector.x(), + -math::sin(AngleType(phi)) * fieldPosVector.y() + - math::cos(AngleType(phi)) * fieldPosVector.z(), + +math::cos(AngleType(phi)) * fieldPosVector.y() + - math::sin(AngleType(phi)) * fieldPosVector.z()); + } + }; - template - struct RotateField, T_AngleType > - { - typedef pmacc::math::Vector result; - typedef T_AngleType AngleType; - HDINLINE result - operator()( const result& fieldPosVector, - const AngleType phi ) const - { - /* Since, the laser propagation direction encloses an angle of phi with the - * simulation y-axis (i.e. direction of sliding window), the positions vectors are - * rotated around the simulation x-axis before calling the TWTS field functions. - * Note: The TWTS field functions are in non-rotated frame and only use the angle - * phi to determine the required amount of pulse front tilt. - * RotationMatrix[PI/2+phi].(y,z) (180Deg-flip at phi=90Deg since coordinate - * system in paper is oriented the other way round.) */ + template + struct RotateField, T_AngleType> + { + typedef pmacc::math::Vector result; + typedef T_AngleType AngleType; + HDINLINE result operator()(const result& fieldPosVector, const AngleType phi) const + { + /* Since, the laser propagation direction encloses an angle of phi with the + * simulation y-axis (i.e. direction of sliding window), the positions vectors are + * rotated around the simulation x-axis before calling the TWTS field functions. + * Note: The TWTS field functions are in non-rotated frame and only use the angle + * phi to determine the required amount of pulse front tilt. + * RotationMatrix[PI/2+phi].(y,z) (180Deg-flip at phi=90Deg since coordinate + * system in paper is oriented the other way round.) */ - /* Rotate 90 degree around y-axis, so that TWTS laser propagates within - * the 2D (x,y)-plane. Corresponding position vector for the Ez-components - * in 2D simulations. - * 3D 3D vectors in 2D space (x,y) - * x --> z - * y --> y - * z --> -x (Since z=0 for 2D, we use the existing - * TWTS-field-function and set -x=0) - * - * Explicit implementation in 3D coordinates: - * fieldPosVector = float3_64( -fieldPosVector.z(), //(Here: ==0) - * fieldPosVector.y(), - * fieldPosVector.x() ); - * fieldPosVector = float3_64( fieldPosVector.x(), - * -sin(phi)*fieldPosVector.y()-cos(phi)*fieldPosVector.z(), - * +cos(phi)*fieldPosVector.y()-sin(phi)*fieldPosVector.z() ); - * The 2D implementation here only calculates the last two components. - * Note: The x-axis of rotation is fine in 2D, because that component now contains - * the (non-existing) simulation z-coordinate. */ - return result( - -math::sin(AngleType(phi))*fieldPosVector.y() - -math::cos(AngleType(phi))*fieldPosVector.x() , - +math::cos(AngleType(phi))*fieldPosVector.y() - -math::sin(AngleType(phi))*fieldPosVector.x() ); - } - }; + /* Rotate 90 degree around y-axis, so that TWTS laser propagates within + * the 2D (x,y)-plane. Corresponding position vector for the Ez-components + * in 2D simulations. + * 3D 3D vectors in 2D space (x,y) + * x --> z + * y --> y + * z --> -x (Since z=0 for 2D, we use the existing + * TWTS-field-function and set -x=0) + * + * Explicit implementation in 3D coordinates: + * fieldPosVector = float3_64( -fieldPosVector.z(), //(Here: ==0) + * fieldPosVector.y(), + * fieldPosVector.x() ); + * fieldPosVector = float3_64( fieldPosVector.x(), + * -sin(phi)*fieldPosVector.y()-cos(phi)*fieldPosVector.z(), + * +cos(phi)*fieldPosVector.y()-sin(phi)*fieldPosVector.z() ); + * The 2D implementation here only calculates the last two components. + * Note: The x-axis of rotation is fine in 2D, because that component now contains + * the (non-existing) simulation z-coordinate. */ + return result( + -math::sin(AngleType(phi)) * fieldPosVector.y() + - math::cos(AngleType(phi)) * fieldPosVector.x(), + +math::cos(AngleType(phi)) * fieldPosVector.y() + - math::sin(AngleType(phi)) * fieldPosVector.x()); + } + }; - template - HDINLINE typename RotateField::result - rotateField( const T_Type& fieldPosVector, - const T_AngleType phi ) - { - return RotateField()(fieldPosVector,phi); - } + template + HDINLINE typename RotateField::result rotateField( + const T_Type& fieldPosVector, + const T_AngleType phi) + { + return RotateField()(fieldPosVector, phi); + } -} /* namespace detail */ -} /* namespace twts */ -} /* namespace templates */ + } /* namespace detail */ + } /* namespace twts */ + } /* namespace templates */ } /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/TWTS/TWTS.hpp b/include/picongpu/fields/background/templates/TWTS/TWTS.hpp index 0757567c52..a93796b405 100644 --- a/include/picongpu/fields/background/templates/TWTS/TWTS.hpp +++ b/include/picongpu/fields/background/templates/TWTS/TWTS.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Alexander Debus +/* Copyright 2014-2021 Alexander Debus * * This file is part of PIConGPU. * diff --git a/include/picongpu/fields/background/templates/TWTS/TWTS.tpp b/include/picongpu/fields/background/templates/TWTS/TWTS.tpp index 9dd20c62b4..248c501da9 100644 --- a/include/picongpu/fields/background/templates/TWTS/TWTS.tpp +++ b/include/picongpu/fields/background/templates/TWTS/TWTS.tpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Alexander Debus +/* Copyright 2014-2021 Alexander Debus * * This file is part of PIConGPU. * diff --git a/include/picongpu/fields/background/templates/TWTS/getFieldPositions_SI.tpp b/include/picongpu/fields/background/templates/TWTS/getFieldPositions_SI.tpp index e1f8f086d1..a88de8b99c 100644 --- a/include/picongpu/fields/background/templates/TWTS/getFieldPositions_SI.tpp +++ b/include/picongpu/fields/background/templates/TWTS/getFieldPositions_SI.tpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Alexander Debus +/* Copyright 2014-2021 Alexander Debus * * This file is part of PIConGPU. * @@ -27,55 +27,55 @@ namespace picongpu { -namespace templates -{ -namespace twts -{ -/** Auxiliary functions for calculating the TWTS field */ -namespace detail -{ - /** Calculate the SI position vectors that later enter the Ex(r, t), By(r, t) - * and Bz(r ,t) calculations as r. - * \param cellIdx The total cell id counted from the start at timestep 0. */ - HDINLINE pmacc::math::Vector - getFieldPositions_SI(const DataSpace& cellIdx, - const DataSpace& halfSimSize, - const pmacc::math::Vector& fieldOnGridPositions, - const float_64 unit_length, - const float_64 focus_y_SI, - const float_X phi ) + namespace templates { - /* Note: Neither direct precisionCast on picongpu::cellSize - or casting on floatD_ does work. */ - const floatD_64 cellDim(picongpu::cellSize.shrink()); - const floatD_64 cellDimensions = cellDim * unit_length; + namespace twts + { + /** Auxiliary functions for calculating the TWTS field */ + namespace detail + { + /** Calculate the SI position vectors that later enter the Ex(r, t), By(r, t) + * and Bz(r ,t) calculations as r. + * \param cellIdx The total cell id counted from the start at timestep 0. */ + HDINLINE pmacc::math::Vector getFieldPositions_SI( + const DataSpace& cellIdx, + const DataSpace& halfSimSize, + const pmacc::math::Vector& fieldOnGridPositions, + const float_64 unit_length, + const float_64 focus_y_SI, + const float_X phi) + { + /* Note: Neither direct precisionCast on picongpu::cellSize + or casting on floatD_ does work. */ + const floatD_64 cellDim(picongpu::cellSize.shrink()); + const floatD_64 cellDimensions = cellDim * unit_length; - /* TWTS laser coordinate origin is centered transversally and defined longitudinally by - the laser center in y (usually maximum of intensity). */ - floatD_X laserOrigin = precisionCast(halfSimSize); - laserOrigin.y() = float_X( focus_y_SI/cellDimensions.y() ); + /* TWTS laser coordinate origin is centered transversally and defined longitudinally by + the laser center in y (usually maximum of intensity). */ + floatD_X laserOrigin = precisionCast(halfSimSize); + laserOrigin.y() = float_X(focus_y_SI / cellDimensions.y()); - /* For staggered fields (e.g. Yee-grid), obtain the fractional cell index components and add - * that to the total cell indices. The physical field coordinate origin is transversally - * centered with respect to the global simulation volume. - * pmacc::math::Vector fieldPositions = - * traits::FieldPosition(); */ - pmacc::math::Vector fieldPositions = fieldOnGridPositions; + /* For staggered fields (e.g. Yee-grid), obtain the fractional cell index components and add + * that to the total cell indices. The physical field coordinate origin is transversally + * centered with respect to the global simulation volume. + * pmacc::math::Vector fieldPositions = + * traits::FieldPosition(); */ + pmacc::math::Vector fieldPositions = fieldOnGridPositions; - pmacc::math::Vector fieldPositions_SI; + pmacc::math::Vector fieldPositions_SI; - for( uint32_t i = 0; i < numComponents; ++i ) /* cellIdx Ex, Ey and Ez */ - { - fieldPositions[i] += ( precisionCast(cellIdx) - laserOrigin ); - fieldPositions_SI[i] = precisionCast(fieldPositions[i]) * cellDimensions; + for(uint32_t i = 0; i < numComponents; ++i) /* cellIdx Ex, Ey and Ez */ + { + fieldPositions[i] += (precisionCast(cellIdx) - laserOrigin); + fieldPositions_SI[i] = precisionCast(fieldPositions[i]) * cellDimensions; - fieldPositions_SI[i] = rotateField(fieldPositions_SI[i],phi); - } + fieldPositions_SI[i] = rotateField(fieldPositions_SI[i], phi); + } - return fieldPositions_SI; - } + return fieldPositions_SI; + } -} /* namespace detail */ -} /* namespace twts */ -} /* namespace templates */ + } /* namespace detail */ + } /* namespace twts */ + } /* namespace templates */ } /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/TWTS/numComponents.hpp b/include/picongpu/fields/background/templates/TWTS/numComponents.hpp index 7ea538b4a1..7d40e6d593 100644 --- a/include/picongpu/fields/background/templates/TWTS/numComponents.hpp +++ b/include/picongpu/fields/background/templates/TWTS/numComponents.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Alexander Debus, Axel Huebl +/* Copyright 2014-2021 Alexander Debus, Axel Huebl * * This file is part of PIConGPU. * @@ -22,15 +22,15 @@ namespace picongpu { -namespace templates -{ -namespace twts -{ -namespace detail -{ - /* Number of field components used in the simulation. [Default: 3 for both 2D and 3D] */ - const uint32_t numComponents = 3; -} /* namespace detail */ -} /* namespace twts */ -} /* namespace templates */ + namespace templates + { + namespace twts + { + namespace detail + { + /* Number of field components used in the simulation. [Default: 3 for both 2D and 3D] */ + const uint32_t numComponents = 3; + } /* namespace detail */ + } /* namespace twts */ + } /* namespace templates */ } /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/twtsfast/BField.hpp b/include/picongpu/fields/background/templates/twtsfast/BField.hpp new file mode 100644 index 0000000000..b06600a962 --- /dev/null +++ b/include/picongpu/fields/background/templates/twtsfast/BField.hpp @@ -0,0 +1,182 @@ +/* Copyright 2014-2021 Alexander Debus, Axel Huebl + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + +#include + +#include +#include +#include "picongpu/fields/background/templates/twtsfast/numComponents.hpp" + +namespace picongpu +{ + /* Load pre-defined background field */ + namespace templates + { + /* Traveling-wave Thomson scattering laser pulse */ + namespace twtsfast + { + class BField + { + public: + using float_T = float_X; + + enum PolarizationType + { + /** The linear polarization of the TWTS laser is defined + * relative to the plane of the pulse front tilt (reference plane). + * + * Polarisation is normal to the reference plane. + * Use Ex-fields (and corresponding B-fields) in TWTS laser internal coordinate system. + */ + LINEAR_X = 1u, + /** Polarization lies within the reference plane. + * Use Ey-fields (and corresponding B-fields) in TWTS laser internal coordinate system. + */ + LINEAR_YZ = 2u, + }; + + /** Center of simulation volume in number of cells */ + PMACC_ALIGN(halfSimSize, DataSpace); + /** y-position of TWTS coordinate origin inside the simulation coordinates [meter] + * The other origin coordinates (x and z) default to globally centered values + * with respect to the simulation volume. + */ + PMACC_ALIGN(focus_y_SI, float_64 const); + /** Laser wavelength [meter] */ + PMACC_ALIGN(wavelength_SI, float_64 const); + /** TWTS laser pulse duration [second] */ + PMACC_ALIGN(pulselength_SI, float_64 const); + /** line focus height of TWTS pulse [meter] */ + PMACC_ALIGN(w_x_SI, float_64 const); + /** interaction angle between TWTS laser propagation vector and the y-axis [rad] */ + PMACC_ALIGN(phi, float_X const); + /** Takes value 1.0 for phi > 0 and -1.0 for phi < 0. */ + PMACC_ALIGN(phiPositive, float_X); + /** propagation speed of TWTS laser overlap + normalized to the speed of light. [Default: beta0 = 1.0] */ + PMACC_ALIGN(beta_0, float_X const); + /** If auto_tdelay=FALSE, then a user defined delay is used. [second] */ + PMACC_ALIGN(tdelay_user_SI, float_64 const); + /** Make time step constant accessible to device. */ + PMACC_ALIGN(dt, float_64 const); + /** Make length normalization constant accessible to device. */ + PMACC_ALIGN(unit_length, float_64 const); + /** TWTS laser time delay */ + PMACC_ALIGN(tdelay, float_64); + /** Should the TWTS laser time delay be chosen automatically, such that + * the laser gradually enters the simulation volume? [Default: TRUE] + */ + PMACC_ALIGN(auto_tdelay, bool const); + /** Polarization of TWTS laser */ + PMACC_ALIGN(pol, PolarizationType const); + + /** Magnetic field of the TWTS laser + * + * @param focus_y_SI the distance to the laser focus in y-direction [m] + * @param wavelength_SI central wavelength [m] + * @param pulselength_SI sigma of std. gauss for intensity (E^2), + * pulselength_SI = FWHM_of_Intensity / 2.35482 [seconds (sigma)] + * @param w_x beam waist: distance from the axis where the pulse electric field + * decreases to its 1/e^2-th part at the focus position of the laser [m] + * @param phi interaction angle between TWTS laser propagation vector and + * the y-axis [rad, default = 90.*(PI/180.)] + * @param beta_0 propagation speed of overlap normalized to + * the speed of light [c, default = 1.0] + * @param tdelay_user manual time delay if auto_tdelay is false + * @param auto_tdelay calculate the time delay such that the TWTS pulse is not + * inside the simulation volume at simulation start timestep = 0 [default = true] + * @param pol determines the TWTS laser polarization, which is either normal or parallel + * to the laser pulse front tilt plane [ default= LINEAR_X , LINEAR_YZ ] + */ + HINLINE + BField( + float_64 const focus_y_SI, + float_64 const wavelength_SI, + float_64 const pulselength_SI, + float_64 const w_x_SI, + float_X const phi = 90. * (PI / 180.), + float_X const beta_0 = 1.0, + float_64 const tdelay_user_SI = 0.0, + bool const auto_tdelay = true, + PolarizationType const pol = LINEAR_X); + + + /** Specify your background field B(r,t) here + * + * @param cellIdx The total cell id counted from the start at t=0 + * @param currentStep The current time step */ + HDINLINE float3_X operator()(DataSpace const& cellIdx, uint32_t const currentStep) const; + + /** Calculate the By(r,t) field, when electric field vector (Ex,0,0) + * is normal to the pulse-front-tilt plane (y,z) + * + * @param pos Spatial position of the target field. + * @param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE float_T calcTWTSBy(float3_64 const& pos, float_64 const time) const; + + /** Calculate the Bz(r,t) field, when electric field vector (Ex,0,0) + * is normal to the pulse-front-tilt plane (y,z) + * + * @param pos Spatial position of the target field. + * @param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE float_T calcTWTSBz_Ex(float3_64 const& pos, float_64 const time) const; + + /** Calculate the By(r,t) field, when electric field vector (0,Ey,0) + * lies within the pulse-front-tilt plane (y,z) + * + * @param pos Spatial position of the target field. + * @param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE float_T calcTWTSBx(float3_64 const& pos, float_64 const time) const; + + /** Calculate the Bz(r,t) field here (electric field vector (0,Ey,0) + * lies within the pulse-front-tilt plane (y,z) + * + * @param pos Spatial position of the target field. + * @param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE float_T calcTWTSBz_Ey(float3_64 const& pos, float_64 const time) const; + + /** Calculate the B-field vector of the TWTS laser in SI units. + * @tparam T_dim Specializes for the simulation dimension + * @param cellIdx The total cell id counted from the start at timestep 0 + * @return B-field vector of the rotated TWTS field in SI units */ + template + HDINLINE float3_X getTWTSBfield_Normalized( + pmacc::math::Vector const& eFieldPositions_SI, + float_64 const time) const; + + /** Calculate the B-field vector of the "in-plane" polarized TWTS laser in SI units. + * @tparam T_dim Specializes for the simulation dimension + * @param cellIdx The total cell id counted from the start at timestep 0 + * @return B-field vector of the rotated TWTS field in SI units */ + template + HDINLINE float3_X getTWTSBfield_Normalized_Ey( + pmacc::math::Vector const& eFieldPositions_SI, + float_64 const time) const; + }; + + } /* namespace twtsfast */ + } /* namespace templates */ +} /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/twtsfast/BField.tpp b/include/picongpu/fields/background/templates/twtsfast/BField.tpp new file mode 100644 index 0000000000..19b2575cb9 --- /dev/null +++ b/include/picongpu/fields/background/templates/twtsfast/BField.tpp @@ -0,0 +1,761 @@ +/* Copyright 2014-2021 Alexander Debus, Axel Huebl + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + +#include +#include "picongpu/simulation_defines.hpp" + +#include +#include +#include +#include + +#include "picongpu/fields/background/templates/twtsfast/RotateField.tpp" +#include "picongpu/fields/background/templates/twtsfast/GetInitialTimeDelay_SI.tpp" +#include "picongpu/fields/background/templates/twtsfast/getFieldPositions_SI.tpp" +#include "picongpu/fields/background/templates/twtsfast/BField.hpp" +#include "picongpu/fields/CellType.hpp" + + +namespace picongpu +{ + /** Load pre-defined background field */ + namespace templates + { + /** Traveling-wave Thomson scattering laser pulse */ + namespace twtsfast + { + HINLINE + BField::BField( + float_64 const focus_y_SI, + float_64 const wavelength_SI, + float_64 const pulselength_SI, + float_64 const w_x_SI, + float_X const phi, + float_X const beta_0, + float_64 const tdelay_user_SI, + bool const auto_tdelay, + PolarizationType const pol) + : focus_y_SI(focus_y_SI) + , wavelength_SI(wavelength_SI) + , pulselength_SI(pulselength_SI) + , w_x_SI(w_x_SI) + , phi(phi) + , beta_0(beta_0) + , tdelay_user_SI(tdelay_user_SI) + , dt(SI::DELTA_T_SI) + , unit_length(UNIT_LENGTH) + , auto_tdelay(auto_tdelay) + , pol(pol) + , phiPositive(float_X(1.0)) + { + /* Note: Enviroment-objects cannot be instantiated on CUDA GPU device. Since this is done + * on host (see fieldBackground.param), this is no problem. + */ + SubGrid const& subGrid = Environment::get().SubGrid(); + halfSimSize = subGrid.getGlobalDomain().size / 2; + tdelay = detail::getInitialTimeDelay_SI( + auto_tdelay, + tdelay_user_SI, + halfSimSize, + pulselength_SI, + focus_y_SI, + phi, + beta_0); + if(phi < float_X(0.0)) + phiPositive = float_X(-1.0); + } + + template<> + HDINLINE float3_X BField::getTWTSBfield_Normalized( + pmacc::math::Vector const& bFieldPositions_SI, + float_64 const time) const + { + using PosVecVec = pmacc::math::Vector; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + for(uint32_t i = 0; i < simDim; ++i) + { + pos[k][i] = bFieldPositions_SI[k][i]; + } + } + + /* An example of intra-cell position offsets is the staggered Yee-grid. + * + * Calculate By-component with the intra-cell offset of a By-field + */ + float_64 const By_By = calcTWTSBy(pos[1], time); + /* Calculate Bz-component the the intra-cell offset of a By-field */ + float_64 const Bz_By = calcTWTSBz_Ex(pos[1], time); + /* Calculate By-component the the intra-cell offset of a Bz-field */ + float_64 const By_Bz = calcTWTSBy(pos[2], time); + /* Calculate Bz-component the the intra-cell offset of a Bz-field */ + float_64 const Bz_Bz = calcTWTSBz_Ex(pos[2], time); + /* Since we rotated all position vectors before calling calcTWTSBy and calcTWTSBz_Ex, + * we need to back-rotate the resulting B-field vector. + * + * RotationMatrix[-(PI/2+phi)].(By,Bz) for rotating back the field vectors. + */ + float_X sinPhi; + float_X cosPhi; + pmacc::math::sincos(phi, sinPhi, cosPhi); + float_X const By_rot = -sinPhi * float_X(By_By) + cosPhi * float_X(Bz_By); + float_X const Bz_rot = -cosPhi * float_X(By_Bz) - sinPhi * float_X(Bz_Bz); + + /* Finally, the B-field normalized to the peak amplitude. */ + return float3_X(0.0_X, By_rot, Bz_rot); + } + + template<> + HDINLINE float3_X BField::getTWTSBfield_Normalized_Ey( + pmacc::math::Vector const& bFieldPositions_SI, + float_64 const time) const + { + using PosVecVec = pmacc::math::Vector; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + for(uint32_t i = 0; i < simDim; ++i) + { + pos[k][i] = bFieldPositions_SI[k][i]; + } + } + + /* Calculate Bz-component with the intra-cell offset of a By-field */ + float_64 const Bz_By = calcTWTSBz_Ey(pos[1], time); + /* Calculate Bz-component with the intra-cell offset of a Bz-field */ + float_64 const Bz_Bz = calcTWTSBz_Ey(pos[2], time); + /* Since we rotated all position vectors before calling calcTWTSBz_Ey, + * we need to back-rotate the resulting B-field vector. + * + * RotationMatrix[-(PI/2+phi)].(By,Bz) for rotating back the field-vectors. + */ + float_X sinPhi; + float_X cosPhi; + pmacc::math::sincos(phi, sinPhi, cosPhi); + float_X const By_rot = +cosPhi * float_X(Bz_By); + float_X const Bz_rot = -sinPhi * float_X(Bz_Bz); + + /* Finally, the B-field normalized to the peak amplitude. */ + return float3_X(float_X(calcTWTSBx(pos[0], time)), By_rot, Bz_rot); + } + + template<> + HDINLINE float3_X BField::getTWTSBfield_Normalized( + pmacc::math::Vector const& bFieldPositions_SI, + float_64 const time) const + { + using PosVecVec = pmacc::math::Vector; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + /* 2D (y,z) vectors are mapped on 3D (x,y,z) vectors. */ + for(uint32_t i = 0; i < DIM2; ++i) + { + pos[k][i + 1] = bFieldPositions_SI[k][i]; + } + } + + /* General background comment for the rest of this function: + * + * Corresponding position vector for the field components in 2D simulations. + * 3D 3D vectors in 2D space (x, y) + * x --> z (Meaning: In 2D-sim, insert cell-coordinate x + * into TWTS field function coordinate z.) + * y --> y + * z --> -x (Since z=0 for 2D, we use the existing + * 3D TWTS-field-function and set x = -0) + * The transformed 3D coordinates are used to calculate the field components. + * Ex --> Ez (Meaning: Calculate Ex-component of existing 3D TWTS-field (calcTWTSEx) using + * transformed position vectors to obtain the corresponding Ez-component in 2D. + * Note: Swapping field component coordinates also alters the + * intra-cell position offset.) + * By --> By + * Bz --> -Bx (Yes, the sign is necessary.) + * + * An example of intra-cell position offsets is the staggered Yee-grid. + * + * This procedure is analogous to 3D case, but replace By --> By and Bz --> -Bx. Hence the + * grid cell offset for Bx has to be used instead of Bz. Mind the "-"-sign. + */ + + /* Calculate By-component with the intra-cell offset of a By-field */ + float_64 const By_By = calcTWTSBy(pos[1], time); + /* Calculate Bx-component with the intra-cell offset of a By-field */ + float_64 const Bx_By = -calcTWTSBz_Ex(pos[1], time); + /* Calculate By-component with the intra-cell offset of a Bx-field */ + float_64 const By_Bx = calcTWTSBy(pos[0], time); + /* Calculate Bx-component with the intra-cell offset of a Bx-field */ + float_64 const Bx_Bx = -calcTWTSBz_Ex(pos[0], time); + /* Since we rotated all position vectors before calling calcTWTSBy and calcTWTSBz_Ex, we + * need to back-rotate the resulting B-field vector. Now the rotation is done + * analogously in the (y,x)-plane. (Reverse of the position vector transformation.) + * + * RotationMatrix[-(PI / 2+phi)].(By,Bx) for rotating back the field vectors. + */ + float_X sinPhi; + float_X cosPhi; + pmacc::math::sincos(phi, sinPhi, cosPhi); + float_X const By_rot = -sinPhi * float_X(By_By) + cosPhi * float_X(Bx_By); + float_X const Bx_rot = -cosPhi * float_X(By_Bx) - sinPhi * float_X(Bx_Bx); + + /* Finally, the B-field normalized to the peak amplitude. */ + return float3_X(Bx_rot, By_rot, 0.0_X); + } + + template<> + HDINLINE float3_X BField::getTWTSBfield_Normalized_Ey( + pmacc::math::Vector const& bFieldPositions_SI, + float_64 const time) const + { + using PosVecVec = pmacc::math::Vector; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + /* The 2D output of getFieldPositions_SI only returns + * the y- and z-component of a 3D vector. + */ + for(uint32_t i = 0; i < DIM2; ++i) + { + pos[k][i + 1] = bFieldPositions_SI[k][i]; + } + } + + /* General background comment for the rest of this function: + * + * Corresponding position vector for the field components in 2D simulations. + * 3D 3D vectors in 2D space (x, y) + * x --> z (Meaning: In 2D-sim, insert cell-coordinate x + * into TWTS field function coordinate z.) + * y --> y + * z --> -x (Since z=0 for 2D, we use the existing + * 3D TWTS-field-function and set x = -0) + * Ex --> Ez (Meaning: Calculate Ex-component of existing 3D TWTS-field to obtain + * corresponding Ez-component in 2D. + * Note: the intra-cell position offset due to the staggered grid for Ez.) + * By --> By + * Bz --> -Bx (Yes, the sign is necessary.) + * + * This procedure is analogous to 3D case, but replace By --> By and Bz --> -Bx. Hence the + * grid cell offset for Bx has to be used instead of Bz. Mind the -sign. + */ + + /* Calculate Bx-component with the intra-cell offset of a By-field */ + float_64 const Bx_By = -calcTWTSBz_Ex(pos[1], time); + /* Calculate Bx-component with the intra-cell offset of a Bx-field */ + float_64 const Bx_Bx = -calcTWTSBz_Ex(pos[0], time); + + /* Since we rotated all position vectors before calling calcTWTSBz_Ex, we + * need to back-rotate the resulting B-field vector. Now the rotation is done + * analogously in the (y,x)-plane. (Reverse of the position vector transformation.) + * + * RotationMatrix[-(PI / 2+phi)].(By,Bx) + * for rotating back the field-vectors. + */ + float_X sinPhi; + float_X cosPhi; + pmacc::math::sincos(phi, sinPhi, cosPhi); + float_X const By_rot = +cosPhi * float_X(Bx_By); + float_X const Bx_rot = -sinPhi * float_X(Bx_Bx); + + /* Finally, the B-field normalized to the peak amplitude. */ + return float3_X(Bx_rot, By_rot, float_X(calcTWTSBx(pos[2], time))); + } + + HDINLINE + float3_X BField::operator()(DataSpace const& cellIdx, uint32_t const currentStep) const + { + float_64 const time_SI = float_64(currentStep) * dt - tdelay; + traits::FieldPosition const fieldPosB; + + pmacc::math::Vector const bFieldPositions_SI + = detail::getFieldPositions_SI(cellIdx, halfSimSize, fieldPosB(), unit_length, focus_y_SI, phi); + /* Single TWTS-Pulse */ + switch(pol) + { + case LINEAR_X: + return getTWTSBfield_Normalized(bFieldPositions_SI, time_SI); + + case LINEAR_YZ: + return getTWTSBfield_Normalized_Ey(bFieldPositions_SI, time_SI); + } + return getTWTSBfield_Normalized(bFieldPositions_SI, + time_SI); // defensive default + } + + /** Calculate the By(r,t) field here + * + * @param pos Spatial position of the target field. + * @param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE + BField::float_T BField::calcTWTSBy(float3_64 const& pos, float_64 const time) const + { + using complex_T = pmacc::math::Complex; + using complex_64 = pmacc::math::Complex; + + /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ + float_T const beta0 = float_T(beta_0); + /* If phi < 0 the formulas below are not directly applicable. + * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the + * z-axis of the coordinate system in this function. + */ + float_T const phiReal = float_T(math::abs(phi)); + float_T sinPhiReal; + float_T cosPhiReal; + pmacc::math::sincos(phiReal, sinPhiReal, cosPhiReal); + float_T const alphaTilt = math::atan2(float_T(1.0) - beta0 * cosPhiReal, beta0 * sinPhiReal); + /* Definition of the laser pulse front tilt angle for the laser field below. + * + * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the + * local "phi_T" not equal in value to the object member "phiReal" or "phi"? + * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system + * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence + * the dispersion will (although physically correct) be slightly off the ideal TWTS + * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for + * scenarios close to beta0 = 1. + */ + float_T const phiT = float_T(2.0) * alphaTilt; + + /* Angle between the laser pulse front and the y-axis. Not used, but remains in code for + * documentation purposes. + * float_T const eta = float_T(PI/2) - (phiReal - alphaTilt); + */ + + float_T const cspeed = float_T(SI::SPEED_OF_LIGHT_SI / UNIT_SPEED); + float_T const lambda0 = float_T(wavelength_SI / UNIT_LENGTH); + float_T const om0 = float_T(2.0 * PI) * cspeed / lambda0; + /* factor 2 in tauG arises from definition convention in laser formula */ + float_T const tauG = float_T(pulselength_SI * 2.0 / UNIT_TIME); + /* w0 is wx here --> w0 could be replaced by wx */ + float_T const w0 = float_T(w_x_SI / UNIT_LENGTH); + float_T const rho0 = float_T(PI * w0 * w0 / lambda0); + float_T const k = float_T(2.0 * PI / lambda0); + + /* In order to calculate in single-precision and in order to account for errors in + * the approximations far from the coordinate origin, we use the wavelength-periodicity and + * the known propagation direction for realizing the laser pulse using relative coordinates + * (i.e. from a finite coordinate range) only. All these quantities have to be calculated + * in double precision. + */ + float_64 sinPhiVal; + float_64 cosPhiVal; + pmacc::math::sincos(precisionCast(phi), sinPhiVal, cosPhiVal); + float_64 const tanAlpha = (1.0 - beta_0 * cosPhiVal) / (beta_0 * sinPhiVal); + float_64 const tanFocalLine = math::tan(PI / 2.0 - phi); + float_64 const deltaT = wavelength_SI / SI::SPEED_OF_LIGHT_SI * (1.0 + tanAlpha / tanFocalLine); + float_64 const deltaY = wavelength_SI / tanFocalLine; + float_64 const deltaZ = -wavelength_SI; + float_64 const numberOfPeriods = math::floor(time / deltaT); + float_T const timeMod = float_T(time - numberOfPeriods * deltaT); + float_T const yMod = float_T(pos.y() + numberOfPeriods * deltaY); + float_T const zMod = float_T(pos.z() + numberOfPeriods * deltaZ); + + float_T const x = float_T(phiPositive * pos.x() / UNIT_LENGTH); + float_T const y = float_T(phiPositive * yMod / UNIT_LENGTH); + float_T const z = float_T(zMod / UNIT_LENGTH); + float_T const t = float_T(timeMod / UNIT_TIME); + + /* Calculating shortcuts for speeding up field calculation */ + float_T sinPhi; + float_T cosPhi; + pmacc::math::sincos(phiT, sinPhi, cosPhi); + float_T const cscPhi = float_T(1.0) / sinPhi; + float_T const secPhi2 = float_T(1.0) / math::cos(phiT / float_T(2.0)); + float_T const sinPhi2 = math::sin(phiT / float_T(2.0)); + float_T const sin2Phi = math::sin(phiT * float_T(2.0)); + float_T const tanPhi2 = math::tan(phiT / float_T(2.0)); + + float_T const sinPhi_2 = sinPhi * sinPhi; + float_T const sinPhi_3 = sinPhi * sinPhi_2; + float_T const sinPhi_4 = sinPhi_2 * sinPhi_2; + + float_T const sinPhi2_2 = sinPhi2 * sinPhi2; + float_T const sinPhi2_4 = sinPhi2_2 * sinPhi2_2; + float_T const tanPhi2_2 = tanPhi2 * tanPhi2; + + float_T const tauG2 = tauG * tauG; + float_T const x2 = x * x; + float_T const y2 = y * y; + float_T const z2 = z * z; + + /* The "helpVar" variables decrease the nesting level of the evaluated expressions and + * thus help with formal code verification through manual code inspection. + */ + const complex_T helpVar1 = cspeed * om0 * tauG2 * sinPhi_4 + - complex_T(0, 8) * sinPhi2_4 * sinPhi * (y * cosPhi + z * sinPhi); + + const complex_T helpVar2 = complex_T(0, 1) * rho0 - y * cosPhi - z * sinPhi; + + const complex_T helpVar3 + = (complex_T(0, float_T(-0.5)) * cscPhi + * (complex_T(0, -8) * om0 * y * (cspeed * t - z) * sinPhi2_2 * sinPhi_4 + * (complex_T(0, 1) * rho0 - z * sinPhi) + - om0 * sinPhi_4 * sinPhi + * (-float_t(2.0) * z2 * rho0 + - cspeed * cspeed + * (k * tauG2 * x2 + float_t(2.0) * t * (t - complex_T(0, 1) * om0 * tauG2) * rho0) + + cspeed * (4 * t * z * rho0 - complex_T(0, 2) * om0 * tauG2 * z * rho0) + - complex_T(0, 2) * (cspeed * t - z) + * (cspeed * (t - complex_T(0, 1) * om0 * tauG2) - z) * z * sinPhi) + + y * sinPhi + * (complex_T(0, 4) * om0 * y * (cspeed * t - z) * sinPhi2_2 * sinPhi_2 + + om0 * (cspeed * t - z) + * (complex_T(0, 1) * cspeed * t + cspeed * om0 * tauG2 - complex_T(0, 1) * z) + * sinPhi_3 + - complex_T(0, 4) * sinPhi2_4 + * (cspeed * k * x2 - om0 * (y2 - float_T(4.0) * (cspeed * t - z) * z) * sinPhi)) + * sin2Phi + - complex_T(0, 4) * sinPhi2_4 + * (complex_T(0, -4) * om0 * y * (cspeed * t - z) * rho0 * cosPhi * sinPhi_2 + + complex_T(0, 2) + * (om0 * (y2 + float_T(2.0) * z2) * rho0 + - cspeed * z * (complex_T(0, 1) * k * x2 + float_T(2.0) * om0 * t * rho0)) + * sinPhi_3 + - float_T(2.0) * om0 * z * (y2 - float_T(2.0) * (cspeed * t - z) * z) * sinPhi_4 + + om0 * y2 * (cspeed * t - z) * sin2Phi * sin2Phi))) + / (cspeed * helpVar2 * helpVar1); + + complex_T const helpVar4 = cspeed * om0 * tauG * tauG + - complex_T(0, 8) * y * math::tan(float_T(PI / 2.0) - phiT) * cscPhi * cscPhi * sinPhi2_4 + - complex_T(0, 2) * z * tanPhi2_2; + + complex_T const result + = (math::exp(helpVar3) * tauG * secPhi2 * secPhi2 + * (complex_T(0, 2) * cspeed * t + cspeed * om0 * tauG2 - complex_T(0, 4) * z + + cspeed * (complex_T(0, 2) * t + om0 * tauG2) * cosPhi + complex_T(0, 2) * y * tanPhi2) + * math::sqrt(cspeed * om0 * rho0 / helpVar2)) + / (float_T(2.0) * cspeed * math::pow(helpVar4, float_T(1.5))); + + return result.get_real() / UNIT_SPEED; + } + + /** Calculate the Bz(r,t) field + * + * @param pos Spatial position of the target field. + * @param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE + BField::float_T BField::calcTWTSBz_Ex(float3_64 const& pos, float_64 const time) const + { + using complex_T = pmacc::math::Complex; + + /* propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ + float_T const beta0 = float_T(beta_0); + /* If phi < 0 the formulas below are not directly applicable. + * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the + * z-axis of the coordinate system in this function. + */ + float_T const phiReal = float_T(math::abs(phi)); + float_T sinPhiReal; + float_T cosPhiReal; + pmacc::math::sincos(phiReal, sinPhiReal, cosPhiReal); + float_T const alphaTilt = math::atan2(float_T(1.0) - beta0 * cosPhiReal, beta0 * sinPhiReal); + + /* Definition of the laser pulse front tilt angle for the laser field below. + * + * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the + * local "phi_T" not equal in value to the object member "phiReal" or "phi"? + * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system + * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence + * the dispersion will (although physically correct) be slightly off the ideal TWTS + * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for + * scenarios close to beta0 = 1. + */ + float_T const phiT = float_T(2.0) * alphaTilt; + + /* Angle between the laser pulse front and the y-axis. + * Not used, but remains in code for documentation purposes. + * float_T const eta = float_T(float_T(PI / 2)) - (phiReal - alphaTilt); + */ + + float_T const cspeed = float_T(SI::SPEED_OF_LIGHT_SI / UNIT_SPEED); + float_T const lambda0 = float_T(wavelength_SI / UNIT_LENGTH); + float_T const om0 = float_T(2.0 * PI) * cspeed / lambda0; + /* factor 2 in tauG arises from definition convention in laser formula */ + float_T const tauG = float_T(pulselength_SI * 2.0 / UNIT_TIME); + /* w0 is wx here --> w0 could be replaced by wx */ + float_T const w0 = float_T(w_x_SI / UNIT_LENGTH); + float_T const rho0 = float_T(PI * w0 * w0 / lambda0); + float_T const k = float_T(2.0 * PI / lambda0); + + /* In order to calculate in single-precision and in order to account for errors in + * the approximations far from the coordinate origin, we use the wavelength-periodicity and + * the known propagation direction for realizing the laser pulse using relative coordinates + * (i.e. from a finite coordinate range) only. All these quantities have to be calculated + * in double precision. + */ + float_64 sinPhiVal; + float_64 cosPhiVal; + pmacc::math::sincos(precisionCast(phi), sinPhiVal, cosPhiVal); + float_64 const tanAlpha = (1.0 - beta_0 * cosPhiVal) / (beta_0 * sinPhiVal); + float_64 const tanFocalLine = math::tan(PI / 2.0 - phi); + float_64 const deltaT = wavelength_SI / SI::SPEED_OF_LIGHT_SI * (1.0 + tanAlpha / tanFocalLine); + float_64 const deltaY = wavelength_SI / tanFocalLine; + float_64 const deltaZ = -wavelength_SI; + float_64 const numberOfPeriods = math::floor(time / deltaT); + float_T const timeMod = float_T(time - numberOfPeriods * deltaT); + float_T const yMod = float_T(pos.y() + numberOfPeriods * deltaY); + float_T const zMod = float_T(pos.z() + numberOfPeriods * deltaZ); + + float_T const x = float_T(phiPositive * pos.x() / UNIT_LENGTH); + float_T const y = float_T(phiPositive * yMod / UNIT_LENGTH); + float_T const z = float_T(zMod / UNIT_LENGTH); + float_T const t = float_T(timeMod / UNIT_TIME); + + /* Calculating shortcuts for speeding up field calculation */ + float_T sinPhi; + float_T cosPhi; + pmacc::math::sincos(phiT, sinPhi, cosPhi); + float_T const cscPhi = float_T(1.0) / sinPhi; + float_T const secPhi2 = float_T(1.0) / math::cos(phiT / float_T(2.0)); + float_T const sinPhi2 = math::sin(phiT / float_T(2.0)); + float_T const tanPhi2 = math::tan(phiT / float_T(2.0)); + + float_T const cscPhi_3 = cscPhi * cscPhi * cscPhi; + + float_T const sinPhi2_2 = sinPhi2 * sinPhi2; + float_T const sinPhi2_4 = sinPhi2_2 * sinPhi2_2; + float_T const tanPhi2_2 = tanPhi2 * tanPhi2; + float_T const secPhi2_2 = secPhi2 * secPhi2; + + float_T const tanPI2_phi = math::tan(float_T(PI / 2.0) - phiT); + + float_T const tauG2 = tauG * tauG; + float_T const om02 = om0 * om0; + float_T const x2 = x * x; + float_T const y2 = y * y; + float_T const z2 = z * z; + + /* The "helpVar" variables decrease the nesting level of the evaluated expressions and + * thus help with formal code verification through manual code inspection. + */ + const complex_T helpVar1 = cspeed * om0 * tauG2 - complex_T(0, 1) * y * cosPhi * secPhi2_2 * tanPhi2 + - complex_T(0, 2) * z * tanPhi2_2; + const complex_T helpVar2 = complex_T(0, 1) * cspeed * rho0 - cspeed * y * cosPhi - cspeed * z * sinPhi; + const complex_T helpVar3 = rho0 + complex_T(0, 1) * y * cosPhi + complex_T(0, 1) * z * sinPhi; + const complex_T helpVar4 = complex_T(0, 1) * rho0 - y * cosPhi - z * sinPhi; + const complex_T helpVar5 = -z - y * tanPI2_phi + complex_T(0, 1) * rho0 * cscPhi; + const complex_T helpVar6 + = -cspeed * z - cspeed * y * tanPI2_phi + complex_T(0, 1) * cspeed * rho0 * cscPhi; + const complex_T helpVar7 = complex_T(0, 1) * cspeed * rho0 - cspeed * y * cosPhi - cspeed * z * sinPhi; + + const complex_T helpVar8 + = (om0 * y * rho0 * secPhi2_2 * secPhi2_2 / helpVar6 + + (om0 * y * tanPI2_phi + * (cspeed * om0 * tauG2 + + float_T(8.0) * (complex_T(0, 2) * y + rho0) * cscPhi_3 * sinPhi2_4)) + / (cspeed * helpVar5) + + om02 * tauG2 * z * sinPhi / helpVar4 - float_T(2.0) * k * x2 / helpVar3 + - om02 * tauG2 * rho0 / helpVar3 + + complex_T(0, 1) * om0 * y2 * cosPhi * cosPhi * secPhi2_2 * tanPhi2 / helpVar2 + + complex_T(0, 4) * om0 * y * z * tanPhi2_2 / helpVar2 + - float_T(2.0) * om0 * z * rho0 * tanPhi2_2 / helpVar2 + - complex_T(0, 2) * om0 * z2 * sinPhi * tanPhi2_2 / helpVar2 + - (om0 + * math::pow( + float_T(2.0) * cspeed * t - complex_T(0, 1) * cspeed * om0 * tauG2 - float_T(2.0) * z + + float_T(8.0) * y * cscPhi_3 * sinPhi2_4 - float_T(2.0) * z * tanPhi2_2, + float_T(2.0))) + / (cspeed * helpVar1)) + / float_T(4.0); + + const complex_T helpVar9 = cspeed * om0 * tauG2 - complex_T(0, 1) * y * cosPhi * secPhi2_2 * tanPhi2 + - complex_T(0, 2) * z * tanPhi2_2; + + const complex_T result = float_T(phiPositive) + * (complex_T(0, 2) * math::exp(helpVar8) * tauG * tanPhi2 * (cspeed * t - z + y * tanPhi2) + * math::sqrt(om0 * rho0 / helpVar7)) + / math::pow(helpVar9, float_T(1.5)); + + return result.get_real() / UNIT_SPEED; + } + + /** Calculate the Bx(r,t) field + * + * @param pos Spatial position of the target field. + * @param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE + BField::float_T BField::calcTWTSBx(float3_64 const& pos, float_64 const time) const + { + /* The Bx-field for the Ey-field is the same as + * for the By-field for the Ex-field except for the sign. + */ + return -calcTWTSBy(pos, time); + } + + /** Calculate the Bz(r,t) field + * + * @param pos Spatial position of the target field. + * @param time Absolute time (SI, including all offsets and transformations) + * for calculating the field */ + HDINLINE + BField::float_T BField::calcTWTSBz_Ey(float3_64 const& pos, float_64 const time) const + { + using complex_T = pmacc::math::Complex; + using complex_64 = pmacc::math::Complex; + + /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ + float_T const beta0 = float_T(beta_0); + /* If phi < 0 the formulas below are not directly applicable. + * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the + * z-axis of the coordinate system in this function. + */ + float_T const phiReal = float_T(math::abs(phi)); + float_T cosPhiReal; + float_T sinPhiReal; + pmacc::math::sincos(phiReal, sinPhiReal, cosPhiReal); + float_T const alphaTilt = math::atan2(float_T(1.0) - beta0 * cosPhiReal, beta0 * sinPhiReal); + /* Definition of the laser pulse front tilt angle for the laser field below. + * + * For beta0=1.0, this is equivalent to our standard definition. Question: Why is the + * local "phi_T" not equal in value to the object member "phiReal" or "phi"? + * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system + * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence + * the dispersion will (although physically correct) be slightly off the ideal TWTS + * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for + * scenarios close to beta0 = 1. + */ + float_T const phiT = float_T(2.0) * alphaTilt; + + /* Angle between the laser pulse front and the y-axis. + * Not used, but remains in code for documentation purposes. + * float_T const eta = float_T(float_T(PI / 2)) - (phiReal - alphaTilt); + */ + + float_T const cspeed = float_T(SI::SPEED_OF_LIGHT_SI / UNIT_SPEED); + float_T const lambda0 = float_T(wavelength_SI / UNIT_LENGTH); + float_T const om0 = float_T(2.0 * PI) * cspeed / lambda0; + /* factor 2 in tauG arises from definition convention in laser formula */ + float_T const tauG = float_T(pulselength_SI * 2.0 / UNIT_TIME); + /* w0 is wx here --> w0 could be replaced by wx */ + float_T const w0 = float_T(w_x_SI / UNIT_LENGTH); + float_T const rho0 = float_T(PI * w0 * w0 / lambda0); + float_T const k = float_T(2.0 * PI / lambda0); + + /* In order to calculate in single-precision and in order to account for errors in + * the approximations far from the coordinate origin, we use the wavelength-periodicity and + * the known propagation direction for realizing the laser pulse using relative coordinates + * (i.e. from a finite coordinate range) only. All these quantities have to be calculated + * in double precision. + */ + float_64 sinPhiVal; + float_64 cosPhiVal; + pmacc::math::sincos(precisionCast(phi), sinPhiVal, cosPhiVal); + float_64 const tanAlpha = (1.0 - beta_0 * cosPhiVal) / (beta_0 * sinPhiVal); + float_64 const tanFocalLine = math::tan(PI / 2.0 - phi); + float_64 const deltaT = wavelength_SI / SI::SPEED_OF_LIGHT_SI * (1.0 + tanAlpha / tanFocalLine); + float_64 const deltaY = wavelength_SI / tanFocalLine; + float_64 const deltaZ = -wavelength_SI; + float_64 const numberOfPeriods = math::floor(time / deltaT); + float_T const timeMod = float_T(time - numberOfPeriods * deltaT); + float_T const yMod = float_T(pos.y() + numberOfPeriods * deltaY); + float_T const zMod = float_T(pos.z() + numberOfPeriods * deltaZ); + + float_T const x = float_T(phiPositive * pos.x() / UNIT_LENGTH); + float_T const y = float_T(phiPositive * yMod / UNIT_LENGTH); + float_T const z = float_T(zMod / UNIT_LENGTH); + float_T const t = float_T(timeMod / UNIT_TIME); + + /* Shortcuts for speeding up the field calculation. */ + float_T sinPhi; + float_T cosPhi; + pmacc::math::sincos(phiT, sinPhi, cosPhi); + float_T const sin2Phi = math::sin(phiT * float_T(2.0)); + float_T const sinPhi2 = math::sin(phiT / float_T(2.0)); + float_T const tanPhi2 = math::tan(phiT / float_T(2.0)); + + float_T const cscPhi = float_T(1.0) / sinPhi; + float_T const tanPI2_phi = math::tan(float_T(PI / 2.0) - phiT); + + float_T const sinPhi_2 = sinPhi * sinPhi; + float_T const sinPhi_4 = sinPhi_2 * sinPhi_2; + float_T const sinPhi2_2 = sinPhi2 * sinPhi2; + float_T const sinPhi2_4 = sinPhi2_2 * sinPhi2_2; + float_T const tanPhi2_2 = tanPhi2 * tanPhi2; + + float_T const tauG2 = tauG * tauG; + + float_T const x2 = x * x; + float_T const y2 = y * y; + float_T const z2 = z * z; + + /* The "helpVar" variables decrease the nesting level of the evaluated expressions and + * thus help with formal code verification through manual code inspection. + */ + const complex_T helpVar1 = cspeed * om0 * tauG2 * sinPhi_4 + - complex_T(0, 8) * sinPhi2_4 * sinPhi * (y * cosPhi + z * sinPhi); + + const complex_T helpVar2 = complex_T(0, 1) * rho0 - y * cosPhi - z * sinPhi; + + const complex_T helpVar3 + = (complex_T(0, float_T(-0.5)) * cscPhi + * (complex_T(0, -8) * om0 * y * (cspeed * t - z) * sinPhi2_2 * sinPhi_4 + * (complex_T(0, 1) * rho0 - z * sinPhi) + - om0 * sinPhi * sinPhi_4 + * (float_T(-2.0) * z2 * rho0 + - cspeed * cspeed + * (k * tauG2 * x2 + float_T(2.0) * t * (t - complex_T(0, 1) * om0 * tauG2) * rho0) + + cspeed * (float_T(4.0) * t * z * rho0 - complex_T(0, 2) * om0 * tauG2 * z * rho0) + - complex_T(0, 2) * (cspeed * t - z) + * (cspeed * (t - complex_T(0, 1) * om0 * tauG2) - z) * z * sinPhi) + + float_T(2.0) * y * cosPhi * sinPhi_2 + * (complex_T(0, 4) * om0 * y * (cspeed * t - z) * sinPhi2_2 * sinPhi_2 + + om0 * (cspeed * t - z) + * (complex_T(0, 1) * cspeed * t + cspeed * om0 * tauG2 - complex_T(0, 1) * z) + * sinPhi_2 * sinPhi + - complex_T(0, 4) * sinPhi2_4 + * (cspeed * k * x2 - om0 * (y2 - float_T(4.0) * (cspeed * t - z) * z) * sinPhi)) + - complex_T(0, 4) * sinPhi2_4 + * (complex_T(0, -4) * om0 * y * (cspeed * t - z) * rho0 * cosPhi * sinPhi_2 + + complex_T(0, 2) + * (om0 * (y2 + float_T(2.0) * z2) * rho0 + - cspeed * z * (complex_T(0, 1) * k * x2 + float_T(2.0) * om0 * t * rho0)) + * sinPhi_2 * sinPhi + - float_T(2.0) * om0 * z * (y2 - float_T(2.0) * (cspeed * t - z) * z) * sinPhi_4 + + om0 * y2 * (cspeed * t - z) * sin2Phi * sin2Phi)) + /* The "round-trip" conversion in the line below fixes a gross accuracy bug + * in floating-point arithmetics, when float_T is set to float_X. + */ + ) + * complex_T(1.0 / complex_64(cspeed * helpVar2 * helpVar1)); + + const complex_T helpVar4 = cspeed * om0 * rho0 + * (cspeed * om0 * tauG2 - complex_T(0, 8) * y * tanPI2_phi * cscPhi * cscPhi * sinPhi2_4 + - complex_T(0, 2) * z * tanPhi2_2); + + const complex_T result = float_T(-1.0) + * (cspeed * math::exp(helpVar3) * k * tauG * x * rho0 + * math::pow(float_T(1.0) / helpVar2, float_T(1.5))) + / math::sqrt(helpVar4); + + return result.get_real() / UNIT_SPEED; + } + + } /* namespace twtsfast */ + } /* namespace templates */ +} /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/twtsfast/EField.hpp b/include/picongpu/fields/background/templates/twtsfast/EField.hpp new file mode 100644 index 0000000000..43c39802ae --- /dev/null +++ b/include/picongpu/fields/background/templates/twtsfast/EField.hpp @@ -0,0 +1,166 @@ +/* Copyright 2014-2021 Alexander Debus, Axel Huebl + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + +#include + +#include +#include +#include "picongpu/fields/background/templates/twtsfast/numComponents.hpp" + +namespace picongpu +{ + /* Load pre-defined background field */ + namespace templates + { + /* Traveling-wave Thomson scattering laser pulse */ + namespace twtsfast + { + class EField + { + public: + using float_T = float_X; + + enum PolarizationType + { + /** The linear polarization of the TWTS laser is defined + * relative to the plane of the pulse front tilt. + * + * Polarisation is normal to the reference plane. + * Use Ex-fields (and corresponding B-fields) in TWTS laser internal coordinate system. + */ + LINEAR_X = 1u, + /** Polarization lies within the reference plane. + * Use Ey-fields (and corresponding B-fields) in TWTS laser internal coordinate system. + */ + LINEAR_YZ = 2u, + }; + + /** Center of simulation volume in number of cells */ + PMACC_ALIGN(halfSimSize, DataSpace); + /** y-position of TWTS coordinate origin inside the simulation coordinates [meter] + The other origin coordinates (x and z) default to globally centered values + with respect to the simulation volume. */ + PMACC_ALIGN(focus_y_SI, float_64 const); + /** Laser wavelength [meter] */ + PMACC_ALIGN(wavelength_SI, float_64 const); + /** TWTS laser pulse duration [second] */ + PMACC_ALIGN(pulselength_SI, float_64 const); + /** line focus height of TWTS pulse [meter] */ + PMACC_ALIGN(w_x_SI, float_64 const); + /** interaction angle between TWTS laser propagation vector and the y-axis [rad] */ + PMACC_ALIGN(phi, float_X const); + /** Takes value 1.0 for phi > 0 and -1.0 for phi < 0. */ + PMACC_ALIGN(phiPositive, float_X); + /** propagation speed of TWTS laser overlap + normalized to the speed of light. [Default: beta0=1.0] */ + PMACC_ALIGN(beta_0, float_X const); + /** If auto_tdelay=FALSE, then a user defined delay is used. [second] */ + PMACC_ALIGN(tdelay_user_SI, float_64 const); + /** Make time step constant accessible to device. */ + PMACC_ALIGN(dt, float_64 const); + /** Make length normalization constant accessible to device. */ + PMACC_ALIGN(unit_length, float_64 const); + /** TWTS laser time delay */ + PMACC_ALIGN(tdelay, float_64); + /** Should the TWTS laser delay be chosen automatically, such that + * the laser gradually enters the simulation volume? [Default: TRUE] + */ + PMACC_ALIGN(auto_tdelay, bool const); + /** Polarization of TWTS laser */ + PMACC_ALIGN(pol, PolarizationType const); + + /** Electric field of the TWTS laser + * + * @param focus_y_SI the distance to the laser focus in y-direction [m] + * @param wavelength_SI central wavelength [m] + * @param pulselength_SI sigma of std. gauss for intensity (E^2), + * pulselength_SI = FWHM_of_Intensity / 2.35482 [seconds (sigma)] + * @param w_x beam waist: distance from the axis where the pulse electric field + * decreases to its 1/e^2-th part at the focus position of the laser [m] + * @param phi interaction angle between TWTS laser propagation vector and + * the y-axis [rad, default = 90.*(PI/180.)] + * @param beta_0 propagation speed of overlap normalized to + * the speed of light [c, default = 1.0] + * @param tdelay_user manual time delay if auto_tdelay is false + * @param auto_tdelay calculate the time delay such that the TWTS pulse is not + * inside the simulation volume at simulation start timestep = 0 [default = true] + * @param pol dtermines the TWTS laser polarization, which is either normal or parallel + * to the laser pulse front tilt plane [ default= LINEAR_X , LINEAR_YZ ] + */ + HINLINE + EField( + float_64 const focus_y_SI, + float_64 const wavelength_SI, + float_64 const pulselength_SI, + float_64 const w_x_SI, + float_X const phi = 90. * (PI / 180.), + float_X const beta_0 = 1.0, + float_64 const tdelay_user_SI = 0.0, + bool const auto_tdelay = true, + PolarizationType const pol = LINEAR_X); + + /** Specify your background field E(r,t) here + * + * @param cellIdx The total cell id counted from the start at timestep 0. + * @param currentStep The current time step + * @return float3_X with field normalized to amplitude in range [-1.:1.] + */ + HDINLINE float3_X operator()(DataSpace const& cellIdx, uint32_t const currentStep) const; + + /** Calculate the Ex(r,t) field here (electric field vector normal to pulse-front-tilt plane) + * + * @param pos Spatial position of the target field + * @param time Absolute time (SI, including all offsets and transformations) + * for calculating the field + * @return Ex-field component of the non-rotated TWTS field in SI units */ + HDINLINE float_T calcTWTSEx(float3_64 const& pos, float_64 const time) const; + + /** Calculate the Ey(r,t) field here (electric field vector in pulse-front-tilt plane) + * + * @param pos Spatial position of the target field + * @param time Absolute time (SI, including all offsets and transformations) + * for calculating the field + * @return Ex-field component of the non-rotated TWTS field in SI units */ + HDINLINE float_T calcTWTSEy(float3_64 const& pos, float_64 const time) const; + + /** Calculate the E-field vector of the TWTS laser in SI units. + * @tparam T_dim Specializes for the simulation dimension + * @param cellIdx The total cell id counted from the start at timestep 0 + * @return Efield vector of the rotated TWTS field in SI units */ + template + HDINLINE float3_X getTWTSEfield_Normalized( + pmacc::math::Vector const& eFieldPositions_SI, + float_64 const time) const; + + /** Calculate the E-field vector of the "in-plane polarized" TWTS laser in SI units. + * @tparam T_dim Specializes for the simulation dimension + * @param cellIdx The total cell id counted from the start at timestep 0 + * @return Efield vector of the rotated TWTS field in SI units */ + template + HDINLINE float3_X getTWTSEfield_Normalized_Ey( + pmacc::math::Vector const& eFieldPositions_SI, + float_64 const time) const; + }; + + } /* namespace twtsfast */ + } /* namespace templates */ +} /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/twtsfast/EField.tpp b/include/picongpu/fields/background/templates/twtsfast/EField.tpp new file mode 100644 index 0000000000..16e4d283b6 --- /dev/null +++ b/include/picongpu/fields/background/templates/twtsfast/EField.tpp @@ -0,0 +1,358 @@ +/* Copyright 2014-2021 Alexander Debus, Axel Huebl + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + +#include +#include "picongpu/simulation_defines.hpp" + +#include +#include +#include +#include + +#include "picongpu/fields/background/templates/twtsfast/RotateField.tpp" +#include "picongpu/fields/background/templates/twtsfast/GetInitialTimeDelay_SI.tpp" +#include "picongpu/fields/background/templates/twtsfast/getFieldPositions_SI.tpp" +#include "picongpu/fields/background/templates/twtsfast/EField.hpp" +#include "picongpu/fields/CellType.hpp" + +namespace picongpu +{ + /* Load pre-defined background field */ + namespace templates + { + /* Traveling-wave Thomson scattering laser pulse */ + namespace twtsfast + { + HINLINE + EField::EField( + float_64 const focus_y_SI, + float_64 const wavelength_SI, + float_64 const pulselength_SI, + float_64 const w_x_SI, + float_X const phi, + float_X const beta_0, + float_64 const tdelay_user_SI, + bool const auto_tdelay, + PolarizationType const pol) + : focus_y_SI(focus_y_SI) + , wavelength_SI(wavelength_SI) + , pulselength_SI(pulselength_SI) + , w_x_SI(w_x_SI) + , phi(phi) + , beta_0(beta_0) + , tdelay_user_SI(tdelay_user_SI) + , dt(SI::DELTA_T_SI) + , unit_length(UNIT_LENGTH) + , auto_tdelay(auto_tdelay) + , pol(pol) + , phiPositive(float_X(1.0)) + { + /* Note: Enviroment-objects cannot be instantiated on CUDA GPU device. Since this is done + on host (see fieldBackground.param), this is no problem. + */ + SubGrid const& subGrid = Environment::get().SubGrid(); + halfSimSize = subGrid.getGlobalDomain().size / 2; + tdelay = detail::getInitialTimeDelay_SI( + auto_tdelay, + tdelay_user_SI, + halfSimSize, + pulselength_SI, + focus_y_SI, + phi, + beta_0); + if(phi < 0.0_X) + phiPositive = float_X(-1.0); + } + + template<> + HDINLINE float3_X EField::getTWTSEfield_Normalized( + pmacc::math::Vector const& eFieldPositions_SI, + float_64 const time) const + { + float3_64 pos(float3_64::create(0.0)); + for(uint32_t i = 0; i < simDim; ++i) + pos[i] = eFieldPositions_SI[0][i]; + return float3_X(float_X(calcTWTSEx(pos, time)), 0.0_X, 0.0_X); + } + + template<> + HDINLINE float3_X EField::getTWTSEfield_Normalized_Ey( + pmacc::math::Vector const& eFieldPositions_SI, + float_64 const time) const + { + using PosVecVec = pmacc::math::Vector; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + for(uint32_t i = 0; i < simDim; ++i) + pos[k][i] = eFieldPositions_SI[k][i]; + } + + /* Calculate Ey-component with the intra-cell offset of a Ey-field */ + float_64 const Ey_Ey = calcTWTSEy(pos[1], time); + /* Calculate Ey-component with the intra-cell offset of a Ez-field */ + float_64 const Ey_Ez = calcTWTSEy(pos[2], time); + + /* Since we rotated all position vectors before calling calcTWTSEy, + * we need to back-rotate the resulting E-field vector. + * + * RotationMatrix[-(PI/2+phi)].(Ey,Ez) for rotating back the field-vectors. + */ + float_X sinPhi; + float_X cosPhi; + pmacc::math::sincos(phi, sinPhi, cosPhi); + float_X const Ey_rot = -sinPhi * float_X(Ey_Ey); + float_X const Ez_rot = -cosPhi * float_X(Ey_Ez); + + /* Finally, the E-field normalized to the peak amplitude. */ + return float3_X(0.0_X, Ey_rot, Ez_rot); + } + + template<> + HDINLINE float3_X EField::getTWTSEfield_Normalized( + pmacc::math::Vector const& eFieldPositions_SI, + float_64 const time) const + { + /* Ex->Ez, so also the grid cell offset for Ez has to be used. */ + float3_64 pos(float3_64::create(0.0)); + /* 2D (y,z) vectors are mapped on 3D (x,y,z) vectors. */ + for(uint32_t i = 0; i < DIM2; ++i) + pos[i + 1] = eFieldPositions_SI[2][i]; + return float3_X(0.0_X, 0.0_X, float_X(calcTWTSEx(pos, time))); + } + + template<> + HDINLINE float3_X EField::getTWTSEfield_Normalized_Ey( + pmacc::math::Vector const& eFieldPositions_SI, + float_64 const time) const + { + using PosVecVec = pmacc::math::Vector; + PosVecVec pos(PosVecVec::create(float3_64::create(0.0))); + + /* The 2D output of getFieldPositions_SI only returns + * the y- and z-component of a 3D vector. + */ + for(uint32_t k = 0; k < detail::numComponents; ++k) + { + for(uint32_t i = 0; i < DIM2; ++i) + pos[k][i + 1] = eFieldPositions_SI[k][i]; + } + + /* Ey->Ey, but grid cell offsets for Ex and Ey have to be used. + * + * Calculate Ey-component with the intra-cell offset of a Ey-field + */ + float_64 const Ey_Ey = calcTWTSEy(pos[1], time); + /* Calculate Ey-component with the intra-cell offset of a Ex-field */ + float_64 const Ey_Ex = calcTWTSEy(pos[0], time); + + /* Since we rotated all position vectors before calling calcTWTSEy, + * we need to back-rotate the resulting E-field vector. + * + * RotationMatrix[-(PI / 2+phi)].(Ey,Ex) for rotating back the field-vectors. + */ + float_X sinPhi; + float_X cosPhi; + pmacc::math::sincos(phi, sinPhi, cosPhi); + float_X const Ey_rot = -sinPhi * float_X(Ey_Ey); + float_X const Ex_rot = -cosPhi * float_X(Ey_Ex); + + /* Finally, the E-field normalized to the peak amplitude. */ + return float3_X(Ex_rot, Ey_rot, 0.0_X); + } + + HDINLINE float3_X EField::operator()(DataSpace const& cellIdx, uint32_t const currentStep) const + { + float_64 const time_SI = float_64(currentStep) * dt - tdelay; + traits::FieldPosition const fieldPosE; + + pmacc::math::Vector const eFieldPositions_SI + = detail::getFieldPositions_SI(cellIdx, halfSimSize, fieldPosE(), unit_length, focus_y_SI, phi); + + /* Single TWTS-Pulse */ + switch(pol) + { + case LINEAR_X: + return getTWTSEfield_Normalized(eFieldPositions_SI, time_SI); + + case LINEAR_YZ: + return getTWTSEfield_Normalized_Ey(eFieldPositions_SI, time_SI); + } + return getTWTSEfield_Normalized(eFieldPositions_SI, time_SI); // defensive default + } + + /** Calculate the Ex(r,t) field here + * + * @param pos Spatial position of the target field. + * @param time Absolute time (SI, including all offsets and transformations) for calculating + * the field */ + HDINLINE EField::float_T EField::calcTWTSEx(float3_64 const& pos, float_64 const time) const + { + using complex_T = pmacc::math::Complex; + using complex_64 = pmacc::math::Complex; + + /* Propagation speed of overlap normalized to the speed of light [Default: beta0=1.0] */ + float_T const beta0 = float_T(beta_0); + /* If phi < 0 the formulas below are not directly applicable. + * Instead phi is taken positive, but the entire pulse rotated by 180 deg around the + * z-axis of the coordinate system in this function. + */ + float_T const phiReal = float_T(math::abs(phi)); + float_T sinPhiReal; + float_T cosPhiReal; + pmacc::math::sincos(phiReal, sinPhiReal, cosPhiReal); + float_T const alphaTilt = math::atan2(float_T(1.0) - beta0 * cosPhiReal, beta0 * sinPhiReal); + /* Definition of the laser pulse front tilt angle for the laser field below. + * + * For beta0 = 1.0, this is equivalent to our standard definition. Question: Why is the + * local "phi_T" not equal in value to the object member "phiReal" or "phi"? + * Because the standard TWTS pulse is defined for beta0 = 1.0 and in the coordinate-system + * of the TWTS model phi is responsible for pulse front tilt and dispersion only. Hence + * the dispersion will (although physically correct) be slightly off the ideal TWTS + * pulse for beta0 != 1.0. This only shows that this TWTS pulse is primarily designed for + * scenarios close to beta0 = 1. + */ + float_T const phiT = float_T(2.0) * alphaTilt; + + /* Angle between the laser pulse front and the y-axis. Not used, but remains in code for + * documentation purposes. + * float_T const eta = (PI / 2) - (phiReal - alphaTilt); + */ + + float_T const cspeed = float_T(SI::SPEED_OF_LIGHT_SI / UNIT_SPEED); + float_T const lambda0 = float_T(wavelength_SI / UNIT_LENGTH); + float_T const om0 = float_T(2.0 * PI) * cspeed / lambda0; + /* factor 2 in tauG arises from definition convention in laser formula */ + float_T const tauG = float_T(pulselength_SI * 2.0 / UNIT_TIME); + /* w0 is wx here --> w0 could be replaced by wx */ + float_T const w0 = float_T(w_x_SI / UNIT_LENGTH); + float_T const rho0 = float_T(PI * w0 * w0 / lambda0); + float_T const k = float_T(2.0 * PI / lambda0); + + /* In order to calculate in single-precision and in order to account for errors in + * the approximations far from the coordinate origin, we use the wavelength-periodicity and + * the known propagation direction for realizing the laser pulse using relative coordinates + * (i.e. from a finite coordinate range) only. All these quantities have to be calculated + * in double precision. + */ + float_64 sinPhiVal; + float_64 cosPhiVal; + pmacc::math::sincos(precisionCast(phi), sinPhiVal, cosPhiVal); + float_64 const tanAlpha = (1.0 - beta_0 * cosPhiVal) / (beta_0 * sinPhiVal); + float_64 const tanFocalLine = math::tan(PI / 2.0 - phi); + float_64 const deltaT = wavelength_SI / SI::SPEED_OF_LIGHT_SI * (1.0 + tanAlpha / tanFocalLine); + float_64 const deltaY = wavelength_SI / tanFocalLine; + float_64 const deltaZ = -wavelength_SI; + float_64 const numberOfPeriods = math::floor(time / deltaT); + float_T const timeMod = float_T(time - numberOfPeriods * deltaT); + float_T const yMod = float_T(pos.y() + numberOfPeriods * deltaY); + float_T const zMod = float_T(pos.z() + numberOfPeriods * deltaZ); + + float_T const x = float_T(phiPositive * pos.x() / UNIT_LENGTH); + float_T const y = float_T(phiPositive * yMod / UNIT_LENGTH); + float_T const z = float_T(zMod / UNIT_LENGTH); + float_T const t = float_T(timeMod / UNIT_TIME); + + /* Calculating shortcuts for speeding up field calculation */ + float_T sinPhi; + float_T cosPhi; + pmacc::math::sincos(phiT, sinPhi, cosPhi); + float_T const cscPhi = float_T(1.0) / sinPhi; + float_T const sinPhi2 = math::sin(phiT / float_T(2.0)); + float_T const sin2Phi = math::sin(phiT * float_T(2.0)); + float_T const tanPhi2 = math::tan(phiT / float_T(2.0)); + + float_T const sinPhi_2 = sinPhi * sinPhi; + float_T const sinPhi_3 = sinPhi * sinPhi_2; + float_T const sinPhi_4 = sinPhi_2 * sinPhi_2; + + float_T const sinPhi2_2 = sinPhi2 * sinPhi2; + float_T const sinPhi2_4 = sinPhi2_2 * sinPhi2_2; + float_T const tanPhi2_2 = tanPhi2 * tanPhi2; + + float_T const tauG2 = tauG * tauG; + float_T const x2 = x * x; + float_T const y2 = y * y; + float_T const z2 = z * z; + + /* The "helpVar" variables decrease the nesting level of the evaluated expressions and + * thus help with formal code verification through manual code inspection. + */ + complex_T const helpVar1 = cspeed * om0 * tauG2 * sinPhi_4 + - complex_T(0, 8) * sinPhi2_4 * sinPhi * (y * cosPhi + z * sinPhi); + + complex_T const helpVar2 = complex_T(0, 1) * rho0 - y * cosPhi - z * sinPhi; + + complex_T const helpVar3 = complex_T(0, float_T(-0.5)) * cscPhi + * (complex_T(0, -8) * om0 * y * (cspeed * t - z) * sinPhi2_2 * sinPhi_4 + * (complex_T(0, 1) * rho0 - z * sinPhi) + - om0 * sinPhi_4 * sinPhi + * (-float_T(2.0) * z2 * rho0 + - cspeed * cspeed + * (k * tauG2 * x2 + float_T(2.0) * t * (t - complex_T(0, 1) * om0 * tauG2) * rho0) + + cspeed * (float_T(4.0) * t * z * rho0 - complex_T(0, 2) * om0 * tauG2 * z * rho0) + - complex_T(0, 2) * (cspeed * t - z) * (cspeed * (t - complex_T(0, 1) * om0 * tauG2) - z) + * z * sinPhi) + + float_T(2.0) * y * cosPhi * sinPhi_2 + * (complex_T(0, 4) * om0 * y * (cspeed * t - z) * sinPhi2_2 * sinPhi_2 + + om0 * (cspeed * t - z) + * (complex_T(0, 1) * cspeed * t + cspeed * om0 * tauG2 - complex_T(0, 1) * z) + * sinPhi_3 + - complex_T(0, 4) * sinPhi2_4 + * (cspeed * k * x2 - om0 * (y2 - float_T(4.0) * (cspeed * t - z) * z) * sinPhi)) + - complex_T(0, 4) * sinPhi2_4 + * (complex_T(0, -4) * om0 * y * (cspeed * t - z) * rho0 * cosPhi * sinPhi_2 + + complex_T(0, 2) + * (om0 * (y2 + float_T(2.0) * z2) * rho0 + - cspeed * z * (complex_T(0, 1) * k * x2 + float_T(2.0) * om0 * t * rho0)) + * sinPhi_3 + - float_T(2.0) * om0 * z * (y2 - float_T(2.0) * (cspeed * t - z) * z) * sinPhi_4 + + om0 * y2 * (cspeed * t - z) * sin2Phi * sin2Phi)) + / (cspeed * helpVar2 * helpVar1); + + complex_T const helpVar4 = cspeed * om0 * tauG2 + - complex_T(0, 8) * y * math::tan(float_T(PI / 2.0) - phiT) * cscPhi * cscPhi * sinPhi2_4 + - complex_T(0, 2) * z * tanPhi2_2; + + complex_T const result + = (math::exp(helpVar3) * tauG * math::sqrt(cspeed * om0 * rho0 / helpVar2)) / math::sqrt(helpVar4); + + return result.get_real(); + } + + /** Calculate the Ey(r,t) field here + * + * @param pos Spatial position of the target field. + * @param time Absolute time (SI, including all offsets and transformations) for calculating + * the field */ + HDINLINE EField::float_T EField::calcTWTSEy(float3_64 const& pos, float_64 const time) const + { + /* The field function of Ey (polarization in pulse-front-tilt plane) + * is by definition identical to Ex (polarization normal to pulse-front-tilt plane) + */ + return calcTWTSEx(pos, time); + } + + } /* namespace twtsfast */ + } /* namespace templates */ +} /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/twtsfast/GetInitialTimeDelay_SI.tpp b/include/picongpu/fields/background/templates/twtsfast/GetInitialTimeDelay_SI.tpp new file mode 100644 index 0000000000..8f99ef7fa7 --- /dev/null +++ b/include/picongpu/fields/background/templates/twtsfast/GetInitialTimeDelay_SI.tpp @@ -0,0 +1,162 @@ +/* Copyright 2014-2021 Alexander Debus + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + +#include +#include +#include + +namespace picongpu +{ + namespace templates + { + namespace twtsfast + { + /* Auxiliary functions for calculating the TWTS field */ + namespace detail + { + template + class GetInitialTimeDelay + { + public: + /** Obtain the SI time delay that later enters the Ex(r, t), By(r, t) and Bz(r, t) + * calculations as t. + * @tparam T_dim Specializes for the simulation dimension + * @param auto_tdelay calculate the time delay such that the TWTS pulse is not + * inside the simulation volume at simulation start + * timestep = 0 [default = true] + * @param tdelay_user_SI manual time delay if auto_tdelay is false + * @param halfSimSize center of simulation volume in number of cells + * @param pulselength_SI sigma of std. gauss for intensity (E^2) + * @param focus_y_SI the distance to the laser focus in y-direction [m] + * @param phi interaction angle between TWTS laser propagation vector and + * the y-axis [rad, default = 90.*(PI / 180.)] + * @param beta_0 propagation speed of overlap normalized + * to the speed of light [c, default = 1.0] + * @return time delay in SI units */ + HDINLINE float_64 operator()( + bool const auto_tdelay, + float_64 const tdelay_user_SI, + DataSpace const& halfSimSize, + float_64 const pulselength_SI, + float_64 const focus_y_SI, + float_X const phi, + float_X const beta_0) const; + }; + + template<> + HDINLINE float_64 GetInitialTimeDelay::operator()( + bool const auto_tdelay, + float_64 const tdelay_user_SI, + DataSpace const& halfSimSize, + float_64 const pulselength_SI, + float_64 const focus_y_SI, + float_X const phi, + float_X const beta_0) const + { + if(auto_tdelay) + { + /* angle between the laser pulse front and the y-axis. Good approximation for + * beta0\simeq 1. For exact relation look in TWTS core routines for Ex, By or Bz. */ + float_64 const eta = (PI / 2) - (phi / 2); + /* halfSimSize[2] --> Half-depth of simulation volume (in z); By geometric + * projection we calculate the y-distance walkoff of the TWTS-pulse. + * The abs( )-function is for correct offset for -phi<-90Deg and +phi>+90Deg. */ + float_64 const y1 + = float_64(halfSimSize[2] * picongpu::SI::CELL_DEPTH_SI) * math::abs(math::cos(eta)); + /* Fudge parameter to make sure, that TWTS pulse starts to impact simulation volume + * at low intensity values. */ + float_64 const m = 3.; + /* Approximate cross section of laser pulse through y-axis, + * scaled with "fudge factor" m. */ + float_64 const y2 = m * (pulselength_SI * picongpu::SI::SPEED_OF_LIGHT_SI) / math::cos(eta); + /* y-position of laser coordinate system origin within simulation. */ + float_64 const y3 = focus_y_SI; + /* Programmatically obtained time-delay */ + float_64 const tdelay = (y1 + y2 + y3) / (picongpu::SI::SPEED_OF_LIGHT_SI * beta_0); + + return tdelay; + } + else + return tdelay_user_SI; + } + + template<> + HDINLINE float_64 GetInitialTimeDelay::operator()( + bool const auto_tdelay, + float_64 const tdelay_user_SI, + DataSpace const& halfSimSize, + float_64 const pulselength_SI, + float_64 const focus_y_SI, + float_X const phi, + float_X const beta_0) const + { + if(auto_tdelay) + { + /* angle between the laser pulse front and the y-axis. Good approximation for + * beta0\simeq 1. For exact relation look in TWTS core routines for Ex, By or Bz. */ + float_64 const eta = (PI / 2) - (phi / 2); + /* halfSimSize[0] --> Half-depth of simulation volume (in x); By geometric + * projection we calculate the y-distance walkoff of the TWTS-pulse. + * The abs( )-function is for correct offset for -phi<-90Deg and +phi>+90Deg. */ + float_64 const y1 + = float_64(halfSimSize[0] * picongpu::SI::CELL_WIDTH_SI) * math::abs(math::cos(eta)); + /* Fudge parameter to make sure, that TWTS pulse starts to impact simulation volume + * at low intensity values. */ + float_64 const m = 3.; + /* Approximate cross section of laser pulse through y-axis, + * scaled with "fudge factor" m. */ + float_64 const y2 = m * (pulselength_SI * picongpu::SI::SPEED_OF_LIGHT_SI) / math::cos(eta); + /* y-position of laser coordinate system origin within simulation. */ + float_64 const y3 = focus_y_SI; + /* Programmatically obtained time-delay */ + float_64 const tdelay = (y1 + y2 + y3) / (picongpu::SI::SPEED_OF_LIGHT_SI * beta_0); + + return tdelay; + } + else + return tdelay_user_SI; + } + + template + HDINLINE float_64 getInitialTimeDelay_SI( + bool const auto_tdelay, + float_64 const tdelay_user_SI, + DataSpace const& halfSimSize, + float_64 const pulselength_SI, + float_64 const focus_y_SI, + float_X const phi, + float_X const beta_0) + { + return GetInitialTimeDelay()( + auto_tdelay, + tdelay_user_SI, + halfSimSize, + pulselength_SI, + focus_y_SI, + phi, + beta_0); + } + + } /* namespace detail */ + } /* namespace twtsfast */ + } /* namespace templates */ +} /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/twtsfast/RotateField.tpp b/include/picongpu/fields/background/templates/twtsfast/RotateField.tpp new file mode 100644 index 0000000000..dadb89969b --- /dev/null +++ b/include/picongpu/fields/background/templates/twtsfast/RotateField.tpp @@ -0,0 +1,117 @@ +/* Copyright 2014-2021 Alexander Debus, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + +#include +#include +#include + +namespace picongpu +{ + namespace templates + { + namespace twtsfast + { + /** Auxiliary functions for calculating the TWTS field */ + namespace detail + { + template + struct RotateField; + + template + struct RotateField, T_AngleType> + { + using result = pmacc::math::Vector; + using AngleType = T_AngleType; + HDINLINE result operator()(result const& fieldPosVector, AngleType const phi) const + { + /* Since, the laser propagation direction encloses an angle of phi with the + * simulation y-axis (i.e. direction of sliding window), the positions vectors are + * rotated around the simulation x-axis before calling the TWTS field functions. + * Note: The TWTS field functions are in non-rotated frame and only use the angle + * phi to determine the required amount of pulse front tilt. + * RotationMatrix[PI/2+phi].(y,z) (180Deg-flip at phi=90Deg since coordinate + * system in paper is oriented the other way round.) */ + AngleType sinPhi; + AngleType cosPhi; + pmacc::math::sincos(AngleType(phi), sinPhi, cosPhi); + return result( + fieldPosVector.x(), + -sinPhi * fieldPosVector.y() - cosPhi * fieldPosVector.z(), + +cosPhi * fieldPosVector.y() - sinPhi * fieldPosVector.z()); + } + }; + + template + struct RotateField, T_AngleType> + { + using result = pmacc::math::Vector; + using AngleType = T_AngleType; + HDINLINE result operator()(result const& fieldPosVector, AngleType const phi) const + { + /* Since, the laser propagation direction encloses an angle of phi with the + * simulation y-axis (i.e. direction of sliding window), the positions vectors are + * rotated around the simulation x-axis before calling the TWTS field functions. + * Note: The TWTS field functions are in non-rotated frame and only use the angle + * phi to determine the required amount of pulse front tilt. + * RotationMatrix[PI/2+phi].(y,z) (180Deg-flip at phi=90Deg since coordinate + * system in paper is oriented the other way round.) */ + + /* Rotate 90 degree around y-axis, so that TWTS laser propagates within + * the 2D (x,y)-plane. Corresponding position vector for the Ez-components + * in 2D simulations. + * 3D 3D vectors in 2D space (x,y) + * x --> z + * y --> y + * z --> -x (Since z=0 for 2D, we use the existing + * TWTS-field-function and set -x=0) + * + * Explicit implementation in 3D coordinates: + * fieldPosVector = float3_64( -fieldPosVector.z( ), //(Here: ==0) + * fieldPosVector.y( ), + * fieldPosVector.x( ) ); + * fieldPosVector = float3_64( fieldPosVector.x( ), + * -sin(phi)*fieldPosVector.y( )-cos(phi)*fieldPosVector.z(), + * +cos(phi)*fieldPosVector.y( )-sin(phi)*fieldPosVector.z() ); + * The 2D implementation here only calculates the last two components. + * Note: The x-axis of rotation is fine in 2D, because that component now contains + * the (non-existing) simulation z-coordinate. */ + AngleType sinPhi; + AngleType cosPhi; + pmacc::math::sincos(AngleType(phi), sinPhi, cosPhi); + return result( + -sinPhi * fieldPosVector.y() - cosPhi * fieldPosVector.x(), + +cosPhi * fieldPosVector.y() - sinPhi * fieldPosVector.x()); + } + }; + + template + HDINLINE typename RotateField::result rotateField( + T_Type const& fieldPosVector, + T_AngleType const phi) + { + return RotateField()(fieldPosVector, phi); + } + + } /* namespace detail */ + } /* namespace twtsfast */ + } /* namespace templates */ +} /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/twtsfast/getFieldPositions_SI.tpp b/include/picongpu/fields/background/templates/twtsfast/getFieldPositions_SI.tpp new file mode 100644 index 0000000000..a7e799e865 --- /dev/null +++ b/include/picongpu/fields/background/templates/twtsfast/getFieldPositions_SI.tpp @@ -0,0 +1,81 @@ +/* Copyright 2014-2021 Alexander Debus + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + +#include +#include +#include +#include "picongpu/fields/background/templates/twtsfast/numComponents.hpp" + +namespace picongpu +{ + namespace templates + { + namespace twtsfast + { + /** Auxiliary functions for calculating the TWTS field */ + namespace detail + { + /** Calculate the SI position vectors that later enter the Ex(r, t), By(r, t) + * and Bz(r ,t) calculations as r. + * @param cellIdx The total cell id counted from the start at timestep 0. */ + HDINLINE pmacc::math::Vector getFieldPositions_SI( + DataSpace const& cellIdx, + DataSpace const& halfSimSize, + pmacc::math::Vector const& fieldOnGridPositions, + float_64 const unit_length, + float_64 const focus_y_SI, + float_X const phi) + { + /* Note: Neither direct precisionCast on picongpu::cellSize + or casting on floatD_ does work. */ + floatD_64 const cellDim(picongpu::cellSize.shrink()); + floatD_64 const cellDimensions = cellDim * unit_length; + + /* TWTS laser coordinate origin is centered transversally and defined longitudinally by + the laser center in y (usually maximum of intensity). */ + floatD_X laserOrigin = precisionCast(halfSimSize); + laserOrigin.y() = float_X(focus_y_SI / cellDimensions.y()); + + /* For staggered fields (e.g. Yee-grid), obtain the fractional cell index components and add + * that to the total cell indices. The physical field coordinate origin is transversally + * centered with respect to the global simulation volume. + * pmacc::math::Vector fieldPositions = + * traits::FieldPosition( ); */ + pmacc::math::Vector fieldPositions = fieldOnGridPositions; + + pmacc::math::Vector fieldPositions_SI; + + for(uint32_t i = 0; i < numComponents; ++i) /* cellIdx Ex, Ey and Ez */ + { + fieldPositions[i] += (precisionCast(cellIdx) - laserOrigin); + fieldPositions_SI[i] = precisionCast(fieldPositions[i]) * cellDimensions; + + fieldPositions_SI[i] = rotateField(fieldPositions_SI[i], phi); + } + + return fieldPositions_SI; + } + + } /* namespace detail */ + } /* namespace twtsfast */ + } /* namespace templates */ +} /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/twtsfast/numComponents.hpp b/include/picongpu/fields/background/templates/twtsfast/numComponents.hpp new file mode 100644 index 0000000000..fbd4eb334c --- /dev/null +++ b/include/picongpu/fields/background/templates/twtsfast/numComponents.hpp @@ -0,0 +1,36 @@ +/* Copyright 2014-2021 Alexander Debus, Axel Huebl + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + +namespace picongpu +{ + namespace templates + { + namespace twtsfast + { + namespace detail + { + /** Number of field components used in the simulation. [Default: 3 for both 2D and 3D] */ + uint32_t const numComponents = 3; + } /* namespace detail */ + } /* namespace twtsfast*/ + } /* namespace templates */ +} /* namespace picongpu */ diff --git a/include/picongpu/fields/background/templates/twtsfast/twtsfast.hpp b/include/picongpu/fields/background/templates/twtsfast/twtsfast.hpp new file mode 100644 index 0000000000..1c42b9481e --- /dev/null +++ b/include/picongpu/fields/background/templates/twtsfast/twtsfast.hpp @@ -0,0 +1,59 @@ +/* Copyright 2014-2021 Alexander Debus + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * This background field implements a obliquely incident, cylindrically-focused, pulse-front tilted laser for some + * incidence angle phi as used for [1]. + * + * The TWTS implementation generally follows the definition of eq. (7) in [1]. In deriving the magnetic field + * components, a slowly-varying wave approximation was assumed, by neglegting the spatial derivatives of the + * 2nd omega-order TWTS-phase-terms for the B-field-component transverse to direction of propagation, and additionally + * neglect the 1st-order TWTS-phase-terms for the B-field-component longitudinal to the direction of propagation. + * + * Specifically, this TWTSfast approximation assumes a special case, where the transverse extent (but not its height wx + * or its pulse duration) of the TWTS-laser wy is assumed to be infinite. While this special case of the TWTS laser + * applies to a large range of use cases, the resulting form allows to use different spatial and time coordinates + * (timeMod, yMod and zMod), which allow long term numerical stability beyond 100000 timesteps at single precision, + * as well as for mitigating errors of the approximations far from the coordinate origin. + * + * We exploit the wavelength-periodicity and the known propagation direction for realizing the laser pulse + * using relative coordinates (i.e. from a finite coordinate range) only. All these quantities have to be calculated + * in double precision. + * + * float_64 const tanAlpha = (float_64(1.0) - beta_0 * math::cos(phi)) / (beta_0 * math::sin(phi)); + * float_64 const tanFocalLine = math::tan(PI / float_64(2.0) - phi); + * float_64 const deltaT = wavelength_SI / SI::SPEED_OF_LIGHT_SI * (float_64(1.0) + tanAlpha / tanFocalLine); + * float_64 const deltaY = wavelength_SI / tanFocalLine; + * float_64 const deltaZ = -wavelength_SI; + * float_64 const numberOfPeriods = math::floor(time / deltaT); + * float_T const timeMod = float_T(time - numberOfPeriods * deltaT); + * float_T const yMod = float_T(pos.y() + numberOfPeriods * deltaY); + * float_T const zMod = float_T(pos.z() + numberOfPeriods * deltaZ); + * + * Literature: + * [1] Steiniger et al., "Optical free-electron lasers with Traveling-Wave Thomson-Scattering", + * Journal of Physics B: Atomic, Molecular and Optical Physics, Volume 47, Number 23 (2014), + * https://doi.org/10.1088/0953-4075/47/23/234011 + */ + +#pragma once + +#include "picongpu/fields/background/templates/twtsfast/EField.hpp" +#include "picongpu/fields/background/templates/twtsfast/BField.hpp" diff --git a/include/picongpu/fields/background/templates/twtsfast/twtsfast.tpp b/include/picongpu/fields/background/templates/twtsfast/twtsfast.tpp new file mode 100644 index 0000000000..65b6a513e6 --- /dev/null +++ b/include/picongpu/fields/background/templates/twtsfast/twtsfast.tpp @@ -0,0 +1,24 @@ +/* Copyright 2014-2021 Alexander Debus + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + +#include "picongpu/fields/background/templates/twtsfast/EField.tpp" +#include "picongpu/fields/background/templates/twtsfast/BField.tpp" diff --git a/include/picongpu/fields/cellType/Centered.hpp b/include/picongpu/fields/cellType/Centered.hpp index 5d9941444a..3ba35d7bb9 100644 --- a/include/picongpu/fields/cellType/Centered.hpp +++ b/include/picongpu/fields/cellType/Centered.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -28,176 +28,156 @@ namespace picongpu { -namespace fields -{ -namespace cellType -{ - - struct Centered{}; + namespace fields + { + namespace cellType + { + struct Centered + { + }; -} //namespace fields -} //namespace cellType + } // namespace cellType + } // namespace fields -namespace traits -{ - /** position (floatD_X in case of T_simDim == simDim) in cell for - * E_x, E_y, E_z - */ - template< uint32_t T_simDim > - struct FieldPosition< - fields::cellType::Centered, - FieldE, - T_simDim - > + namespace traits { - using PosType = pmacc::math::Vector; - using ReturnType = const pmacc::math::Vector; - - /// boost::result_of hints - template struct result; + /** position (floatD_X in case of T_simDim == simDim) in cell for + * E_x, E_y, E_z + */ + template + struct FieldPosition + { + using PosType = pmacc::math::Vector; + using ReturnType = const pmacc::math::Vector; - template - struct result { - using type = ReturnType; - }; + /// boost::result_of hints + template + struct result; - HDINLINE FieldPosition() - { - } + template + struct result + { + using type = ReturnType; + }; - HDINLINE ReturnType operator()() const - { - const auto center = PosType::create( 0.5 ); - - return ReturnType::create( center ); - } - }; - - /** position (floatD_X in case of T_simDim == simDim) in cell for - * B_x, B_y, B_z - */ - template< uint32_t T_simDim > - struct FieldPosition< - fields::cellType::Centered, - FieldB, - T_simDim - > : public FieldPosition< - fields::cellType::Centered, - FieldE, - T_simDim - > - { - HDINLINE FieldPosition() - { - } - }; - - /** position (float2_X) in cell for J_x, J_y, J_z */ - template<> - struct FieldPosition< - fields::cellType::Centered, - FieldJ, - DIM2 - > - { - /** \tparam float2_X position of the component in the cell - * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! - */ - using VectorVector2D3V = const ::pmacc::math::Vector< - float2_X, - DIM3 - >; - /// boost::result_of hints - template struct result; - - template - struct result { - using type = VectorVector2D3V; - }; + HDINLINE FieldPosition() + { + } - HDINLINE FieldPosition() - { - } + HDINLINE ReturnType operator()() const + { + const auto center = PosType::create(0.5); - HDINLINE VectorVector2D3V operator()() const - { - const float2_X posJ_x( 0.5, 0.0 ); - const float2_X posJ_y( 0.0, 0.5 ); - const float2_X posJ_z( 0.0, 0.0 ); - - return VectorVector2D3V( posJ_x, posJ_y, posJ_z ); - } - }; - - /** position (float3_X) in cell for J_x, J_y, J_z - */ - template<> - struct FieldPosition< - fields::cellType::Centered, - FieldJ, - DIM3 - > - { - /** \tparam float2_X position of the component in the cell - * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! - */ - using VectorVector3D3V = const ::pmacc::math::Vector< - float3_X, - DIM3 - >; - /// boost::result_of hints - template struct result; - - template - struct result { - using type = VectorVector3D3V; + return ReturnType::create(center); + } }; - HDINLINE FieldPosition() + /** position (floatD_X in case of T_simDim == simDim) in cell for + * B_x, B_y, B_z + */ + template + struct FieldPosition + : public FieldPosition { - } + HDINLINE FieldPosition() + { + } + }; - HDINLINE VectorVector3D3V operator()() const + /** position (float2_X) in cell for J_x, J_y, J_z */ + template<> + struct FieldPosition { - const float3_X posJ_x( 0.5, 0.0, 0.0 ); - const float3_X posJ_y( 0.0, 0.5, 0.0 ); - const float3_X posJ_z( 0.0, 0.0, 0.5 ); - - return VectorVector3D3V( posJ_x, posJ_y, posJ_z ); - } - }; - - /** position (floatD_X in case of T_simDim == simDim) in cell, wrapped in - * one-component vector since it's a scalar field with only one component, for the - * scalar field FieldTmp - */ - template< uint32_t T_simDim > - struct FieldPosition< - fields::cellType::Centered, - FieldTmp, - T_simDim - > - { - using FieldPos = pmacc::math::Vector; - using ReturnType = pmacc::math::Vector; - - /// boost::result_of hints - template struct result; - - template - struct result { - using type = ReturnType; + /** \tparam float2_X position of the component in the cell + * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! + */ + using VectorVector2D3V = const ::pmacc::math::Vector; + /// boost::result_of hints + template + struct result; + + template + struct result + { + using type = VectorVector2D3V; + }; + + HDINLINE FieldPosition() + { + } + + HDINLINE VectorVector2D3V operator()() const + { + const float2_X posJ_x(0.5, 0.0); + const float2_X posJ_y(0.0, 0.5); + const float2_X posJ_z(0.0, 0.0); + + return VectorVector2D3V(posJ_x, posJ_y, posJ_z); + } }; - HDINLINE FieldPosition() + /** position (float3_X) in cell for J_x, J_y, J_z + */ + template<> + struct FieldPosition { - } + /** \tparam float2_X position of the component in the cell + * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! + */ + using VectorVector3D3V = const ::pmacc::math::Vector; + /// boost::result_of hints + template + struct result; + + template + struct result + { + using type = VectorVector3D3V; + }; + + HDINLINE FieldPosition() + { + } + + HDINLINE VectorVector3D3V operator()() const + { + const float3_X posJ_x(0.5, 0.0, 0.0); + const float3_X posJ_y(0.0, 0.5, 0.0); + const float3_X posJ_z(0.0, 0.0, 0.5); + + return VectorVector3D3V(posJ_x, posJ_y, posJ_z); + } + }; - HDINLINE ReturnType operator()() const + /** position (floatD_X in case of T_simDim == simDim) in cell, wrapped in + * one-component vector since it's a scalar field with only one component, for the + * scalar field FieldTmp + */ + template + struct FieldPosition { - return ReturnType( FieldPos::create(0.0) ); - } - }; + using FieldPos = pmacc::math::Vector; + using ReturnType = pmacc::math::Vector; + + /// boost::result_of hints + template + struct result; + + template + struct result + { + using type = ReturnType; + }; + + HDINLINE FieldPosition() + { + } + + HDINLINE ReturnType operator()() const + { + return ReturnType(FieldPos::create(0.0)); + } + }; -} // namespace traits + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/fields/cellType/Yee.hpp b/include/picongpu/fields/cellType/Yee.hpp index 320b333952..d8d6f136d7 100644 --- a/include/picongpu/fields/cellType/Yee.hpp +++ b/include/picongpu/fields/cellType/Yee.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Sergei Bastrakov +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -28,221 +28,194 @@ namespace picongpu { -namespace fields -{ -namespace cellType -{ - struct Yee{}; - -} // namespace cellType -} // namespace fields - -namespace traits -{ - /** position (float2_X) in cell for E_x, E_y, E_z - */ - template<> - struct FieldPosition< - fields::cellType::Yee, - FieldE, - DIM2 - > + namespace fields { - /** \tparam float2_X position of the component in the cell - * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! - */ - using VectorVector2D3V = const ::pmacc::math::Vector< - float2_X, - DIM3 - >; - /// boost::result_of hints - template struct result; - - template - struct result { - using type = VectorVector2D3V; - }; - - HDINLINE FieldPosition() + namespace cellType { - } + struct Yee + { + }; - HDINLINE VectorVector2D3V operator()() const - { - const float2_X posE_x( 0.5, 0.0 ); - const float2_X posE_y( 0.0, 0.5 ); - const float2_X posE_z( 0.0, 0.0 ); - - return VectorVector2D3V( posE_x, posE_y, posE_z ); - } - }; - - /** position (float3_X) in cell for E_x, E_y, E_z - */ - template<> - struct FieldPosition< - fields::cellType::Yee, - FieldE, - DIM3 - > - { - /** \tparam float2_X position of the component in the cell - * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! - */ - using VectorVector3D3V = const ::pmacc::math::Vector< - float3_X, - DIM3 - >; - - /// boost::result_of hints - template struct result; + } // namespace cellType + } // namespace fields - template - struct result { - using type = VectorVector3D3V; - }; - - HDINLINE FieldPosition() - { - } - - HDINLINE VectorVector3D3V operator()() const - { - const float3_X posE_x( 0.5, 0.0, 0.0 ); - const float3_X posE_y( 0.0, 0.5, 0.0 ); - const float3_X posE_z( 0.0, 0.0, 0.5 ); - - return VectorVector3D3V( posE_x, posE_y, posE_z ); - } - }; - - /** position (float2_X) in cell for B_x, B_y, B_z - */ - template<> - struct FieldPosition< - fields::cellType::Yee, - FieldB, - DIM2 - > + namespace traits { - /** \tparam float2_X position of the component in the cell - * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! + /** position (float2_X) in cell for E_x, E_y, E_z */ - using VectorVector2D3V = const ::pmacc::math::Vector< - float2_X, - DIM3 - >; - /// boost::result_of hints - template struct result; - - template - struct result { - using type = VectorVector2D3V; - }; - - HDINLINE FieldPosition() + template<> + struct FieldPosition { - } + /** \tparam float2_X position of the component in the cell + * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! + */ + using VectorVector2D3V = const ::pmacc::math::Vector; + /// boost::result_of hints + template + struct result; + + template + struct result + { + using type = VectorVector2D3V; + }; + + HDINLINE FieldPosition() + { + } + + HDINLINE VectorVector2D3V operator()() const + { + const float2_X posE_x(0.5, 0.0); + const float2_X posE_y(0.0, 0.5); + const float2_X posE_z(0.0, 0.0); + + return VectorVector2D3V(posE_x, posE_y, posE_z); + } + }; - HDINLINE VectorVector2D3V operator()() const - { - const float2_X posB_x( 0.0, 0.5 ); - const float2_X posB_y( 0.5, 0.0 ); - const float2_X posB_z( 0.5, 0.5 ); - - return VectorVector2D3V( posB_x, posB_y, posB_z ); - } - }; - - /** position (float3_X) in cell for B_x, B_y, B_z - */ - template<> - struct FieldPosition< - fields::cellType::Yee, - FieldB, - DIM3 - > - { - /** \tparam float2_X position of the component in the cell - * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! + /** position (float3_X) in cell for E_x, E_y, E_z */ - using VectorVector3D3V = const ::pmacc::math::Vector< - float3_X, - DIM3 - >; - - /// boost::result_of hints - template struct result; - - template - struct result { - using type = VectorVector3D3V; + template<> + struct FieldPosition + { + /** \tparam float2_X position of the component in the cell + * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! + */ + using VectorVector3D3V = const ::pmacc::math::Vector; + + /// boost::result_of hints + template + struct result; + + template + struct result + { + using type = VectorVector3D3V; + }; + + HDINLINE FieldPosition() + { + } + + HDINLINE VectorVector3D3V operator()() const + { + const float3_X posE_x(0.5, 0.0, 0.0); + const float3_X posE_y(0.0, 0.5, 0.0); + const float3_X posE_z(0.0, 0.0, 0.5); + + return VectorVector3D3V(posE_x, posE_y, posE_z); + } }; - HDINLINE FieldPosition() + /** position (float2_X) in cell for B_x, B_y, B_z + */ + template<> + struct FieldPosition { - } + /** \tparam float2_X position of the component in the cell + * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! + */ + using VectorVector2D3V = const ::pmacc::math::Vector; + /// boost::result_of hints + template + struct result; + + template + struct result + { + using type = VectorVector2D3V; + }; + + HDINLINE FieldPosition() + { + } + + HDINLINE VectorVector2D3V operator()() const + { + const float2_X posB_x(0.0, 0.5); + const float2_X posB_y(0.5, 0.0); + const float2_X posB_z(0.5, 0.5); + + return VectorVector2D3V(posB_x, posB_y, posB_z); + } + }; - HDINLINE VectorVector3D3V operator()() const - { - const float3_X posB_x( 0.0, 0.5, 0.5 ); - const float3_X posB_y( 0.5, 0.0, 0.5 ); - const float3_X posB_z( 0.5, 0.5, 0.0 ); - - return VectorVector3D3V( posB_x, posB_y, posB_z ); - } - }; - - /** position (floatD_X in case of T_simDim == simDim) in cell for - * J_x, J_y, J_z - */ - template< uint32_t T_simDim > - struct FieldPosition< - fields::cellType::Yee, - FieldJ, - T_simDim - > : public FieldPosition< - fields::cellType::Yee, - FieldE, - T_simDim - > - { - HDINLINE FieldPosition() + /** position (float3_X) in cell for B_x, B_y, B_z + */ + template<> + struct FieldPosition { - } - }; - - /** position (floatD_X in case of T_simDim == simDim) in cell, wrapped in - * one-component vector since it's a scalar field with only one component, for the - * scalar field FieldTmp - */ - template< uint32_t T_simDim > - struct FieldPosition< - fields::cellType::Yee, - FieldTmp, - T_simDim - > - { - using FieldPos = pmacc::math::Vector; - using ReturnType = pmacc::math::Vector; - - /// boost::result_of hints - template struct result; - - template - struct result { - using type = ReturnType; + /** \tparam float2_X position of the component in the cell + * \tparam DIM3 Fields (E/B/J) have 3 components, even in 1 or 2D ! + */ + using VectorVector3D3V = const ::pmacc::math::Vector; + + /// boost::result_of hints + template + struct result; + + template + struct result + { + using type = VectorVector3D3V; + }; + + HDINLINE FieldPosition() + { + } + + HDINLINE VectorVector3D3V operator()() const + { + const float3_X posB_x(0.0, 0.5, 0.5); + const float3_X posB_y(0.5, 0.0, 0.5); + const float3_X posB_z(0.5, 0.5, 0.0); + + return VectorVector3D3V(posB_x, posB_y, posB_z); + } }; - HDINLINE FieldPosition() + /** position (floatD_X in case of T_simDim == simDim) in cell for + * J_x, J_y, J_z + */ + template + struct FieldPosition + : public FieldPosition { - } + HDINLINE FieldPosition() + { + } + }; - HDINLINE ReturnType operator()() const + /** position (floatD_X in case of T_simDim == simDim) in cell, wrapped in + * one-component vector since it's a scalar field with only one component, for the + * scalar field FieldTmp + */ + template + struct FieldPosition { - return ReturnType( FieldPos::create(0.0) ); - } - }; + using FieldPos = pmacc::math::Vector; + using ReturnType = pmacc::math::Vector; + + /// boost::result_of hints + template + struct result; + + template + struct result + { + using type = ReturnType; + }; + + HDINLINE FieldPosition() + { + } + + HDINLINE ReturnType operator()() const + { + return ReturnType(FieldPos::create(0.0)); + } + }; -} // namespace traits + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/Cache.hpp b/include/picongpu/fields/currentDeposition/Cache.hpp new file mode 100644 index 0000000000..4b9ea4ec3d --- /dev/null +++ b/include/picongpu/fields/currentDeposition/Cache.hpp @@ -0,0 +1,132 @@ +/* Copyright 2020-2021 Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/algorithms/Set.hpp" + +#include + +#include +#include + + +namespace picongpu +{ + namespace currentSolver + { + namespace detail + { + /** Transparent cache implementation for the current solver + * + * @tparam T_Strategy Used strategy to reduce the scattered data [currentSolver::strategy] + * @tparam T_Sfinae Optional specialization + */ + template + struct Cache; + + template + struct Cache::type> + { + /** Create a cache + * + * @attention thread-collective operation, requires external thread synchronization + */ + template + DINLINE static auto create(T_Acc const& acc, T_FieldBox const& fieldBox, uint32_t const workerIdx) +#if(!BOOST_COMP_CLANG) + -> decltype( + CachedBox::create<0u, typename T_FieldBox::ValueType>(acc, std::declval())) +#endif + { + using ValueType = typename T_FieldBox::ValueType; + /* this memory is used by all virtual blocks */ + auto cache = CachedBox::create<0u, ValueType>(acc, T_BlockDescription{}); + + Set set(ValueType::create(0.0_X)); + ThreadCollective collectiveFill(workerIdx); + + /* initialize shared memory with zeros */ + collectiveFill(acc, set, cache); + return cache; + } + + /** Flush the cache + * + * @attention thread-collective operation, requires external thread synchronization + */ + template< + uint32_t T_numWorkers, + typename T_BlockDescription, + typename T_Acc, + typename T_FieldBox, + typename T_FieldCache> + DINLINE static void flush( + T_Acc const& acc, + T_FieldBox fieldBox, + T_FieldCache const& cachedBox, + uint32_t const workerIdx) + { + typename T_Strategy::GridReductionOp const op; + ThreadCollective collectiveAdd(workerIdx); + + /* write scatter results back to the global memory */ + collectiveAdd(acc, op, fieldBox, cachedBox); + } + }; + + template + struct Cache::type> + { + /** Create a cache + * + * @attention thread-collective operation, requires external thread synchronization + */ + template + DINLINE static auto create(T_Acc const& acc, T_FieldBox const& fieldBox, uint32_t const workerIdx) +#if(!BOOST_COMP_CLANG) + -> T_FieldBox +#endif + { + alpaka::ignore_unused(acc, workerIdx); + return fieldBox; + } + + /** Flush the cache + * + * @attention thread-collective operation, requires external thread synchronization + */ + template< + uint32_t T_numWorkers, + typename T_BlockDescription, + typename T_Acc, + typename T_FieldBox, + typename T_FieldCache> + DINLINE static void flush( + T_Acc const& acc, + T_FieldBox fieldBox, + T_FieldCache const& cachedBox, + uint32_t const workerIdx) + { + alpaka::ignore_unused(acc, fieldBox, cachedBox, workerIdx); + } + }; + } // namespace detail + } // namespace currentSolver +} // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/Deposit.hpp b/include/picongpu/fields/currentDeposition/Deposit.hpp new file mode 100644 index 0000000000..144083afcf --- /dev/null +++ b/include/picongpu/fields/currentDeposition/Deposit.hpp @@ -0,0 +1,125 @@ +/* Copyright 2020-2021 Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/particles/traits/GetCurrentSolver.hpp" +#include "picongpu/traits/GetMargin.hpp" + +#include +#include +#include +#include + + +namespace picongpu +{ + namespace currentSolver + { + /** Executes the current deposition kernel + * + * @tparam T_Strategy Used strategy to reduce the scattered data [currentSolver::strategy] + * @tparam T_Sfinae Optional specialization + */ + template + struct Deposit; + + template + struct Deposit::type> + { + /** Execute the current deposition with a checker board + * + * The stride between the supercells for the checker board will be automatically + * adjusted, based on the species shape. + */ + template< + uint32_t T_area, + uint32_t T_numWorkers, + typename T_CellDescription, + typename T_DepositionKernel, + typename T_FrameSolver, + typename T_JBox, + typename T_ParticleBox> + void execute( + T_CellDescription const& cellDescription, + T_DepositionKernel const& depositionKernel, + T_FrameSolver const& frameSolver, + T_JBox const& jBox, + T_ParticleBox const& parBox) const + { + /* The needed stride for the stride mapper depends on the stencil width. + * If the upper and lower margin of the stencil fits into one supercell + * a double checker board (stride 2) is needed. + * The round up sum of margins is the number of supercells to skip. + */ + using MarginPerDim = typename pmacc::math::CT::add< + typename GetMargin::LowerMargin, + typename GetMargin::UpperMargin>::type; + using MaxMargin = typename pmacc::math::CT::max::type; + using SuperCellMinSize = typename pmacc::math::CT::min::type; + + /* number of supercells which must be skipped to avoid overlapping areas + * between different blocks in the kernel + */ + constexpr uint32_t skipSuperCells + = (MaxMargin::value + SuperCellMinSize::value - 1u) / SuperCellMinSize::value; + StrideMapping< + T_area, + skipSuperCells + 1u, // stride 1u means each supercell is used + MappingDesc> + mapper(cellDescription); + + do + { + PMACC_KERNEL(depositionKernel) + (mapper.getGridDim(), T_numWorkers)(jBox, parBox, frameSolver, mapper); + } while(mapper.next()); + } + }; + + template + struct Deposit::type> + { + /** Execute the current deposition for each supercell + * + * All supercells will be processed in parallel. + */ + template< + uint32_t T_area, + uint32_t T_numWorkers, + typename T_CellDescription, + typename T_DepositionKernel, + typename T_FrameSolver, + typename T_JBox, + typename T_ParticleBox> + void execute( + T_CellDescription const& cellDescription, + T_DepositionKernel const& depositionKernel, + T_FrameSolver const& frameSolver, + T_JBox const& jBox, + T_ParticleBox const& parBox) const + { + AreaMapping mapper(cellDescription); + + PMACC_KERNEL(depositionKernel)(mapper.getGridDim(), T_numWorkers)(jBox, parBox, frameSolver, mapper); + } + }; + + } // namespace currentSolver +} // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/EmZ/DepositCurrent.hpp b/include/picongpu/fields/currentDeposition/EmZ/DepositCurrent.hpp index c7fd53a68c..3d2a40751d 100644 --- a/include/picongpu/fields/currentDeposition/EmZ/DepositCurrent.hpp +++ b/include/picongpu/fields/currentDeposition/EmZ/DepositCurrent.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -21,7 +21,6 @@ #include #include -#include #include "picongpu/fields/currentDeposition/EmZ/EmZ.def" #include "picongpu/fields/currentDeposition/Esirkepov/Line.hpp" @@ -29,320 +28,224 @@ namespace picongpu { -namespace currentSolver -{ -namespace emz -{ - using namespace pmacc; - - template< - typename ParticleAssign - > - struct BaseMethods + namespace currentSolver { - /** evaluate particle shape - * @param line element with previous and current position of the particle - * @param gridPoint used grid point to evaluate assignment shape - * @param d dimension range {0,1,2} means {x,y,z} - * different to Esirkepov paper, here we use C style - * @{ - */ - - /** evaluate shape for the first particle S0 (see paper) */ - DINLINE float_X - S0( - const Line< floatD_X >& line, - const float_X gridPoint, - const uint32_t d - ) const - { - return ParticleAssign( )( gridPoint - line.m_pos0[d] ); - } - - /** evaluate shape for the second particle */ - DINLINE float_X - S1( - const Line< floatD_X >& line, - const float_X gridPoint, - const uint32_t d - ) const + namespace emz { - return ParticleAssign( )( gridPoint - line.m_pos1[d] ); - } - /*! @} */ + using namespace pmacc; - /** calculate DS (see paper) - * @param line element with previous and current position of the particle - * @param gridPoint used grid point to evaluate assignment shape - * @param d dimension range {0,1,2} means {x,y,z}] - * different to Esirkepov paper, here we use C style - */ - DINLINE float_X - DS( - const Line& line, - const float_X gridPoint, - const uint32_t d - ) const - { - return ParticleAssign( )( gridPoint - line.m_pos1[d] ) - ParticleAssign( )( gridPoint - line.m_pos0[d] ); - } - }; + template + struct BaseMethods + { + /** evaluate particle shape + * @param line element with previous and current position of the particle + * @param gridPoint used grid point to evaluate assignment shape + * @param d dimension range {0,1,2} means {x,y,z} + * different to Esirkepov paper, here we use C style + * @{ + */ + + /** evaluate shape for the first particle S0 (see paper) */ + DINLINE float_X S0(const Line& line, const float_X gridPoint, const uint32_t d) const + { + return ParticleAssign()(gridPoint - line.m_pos0[d]); + } - template< - typename ParticleAssign, - int T_begin, - int T_end - > - struct DepositCurrent< - ParticleAssign, - T_begin, - T_end, - DIM3 - > : public BaseMethods< ParticleAssign > - { - template< - typename T_Cursor, - typename T_Acc - > - DINLINE void - operator()( - T_Acc const & acc, - const T_Cursor& cursorJ, - const Line< float3_X >& line, - const float_X chargeDensity, - const float_X - ) const - { - /** - * \brief the following three calls separate the 3D current deposition - * into three independent 1D calls, each for one direction and current component. - * Therefore the coordinate system has to be rotated so that the z-direction - * is always specific. - */ - using namespace cursor::tools; - cptCurrent1D( - acc, - twistVectorFieldAxes< pmacc::math::CT::Int < 1, 2, 0 > >( cursorJ ), - rotateOrigin< 1, 2, 0 >( line ), - cellSize.x( ) * chargeDensity / DELTA_T - ); - cptCurrent1D( - acc, - twistVectorFieldAxes< pmacc::math::CT::Int < 2, 0, 1 > >( cursorJ ), - rotateOrigin< 2, 0, 1 >( line ), - cellSize.y( ) * chargeDensity / DELTA_T - ); - cptCurrent1D( - acc, - cursorJ, - line, - cellSize.z( ) * chargeDensity / DELTA_T - ); - } + /** evaluate shape for the second particle */ + DINLINE float_X S1(const Line& line, const float_X gridPoint, const uint32_t d) const + { + return ParticleAssign()(gridPoint - line.m_pos1[d]); + } + /*! @} */ + + /** calculate DS (see paper) + * @param line element with previous and current position of the particle + * @param gridPoint used grid point to evaluate assignment shape + * @param d dimension range {0,1,2} means {x,y,z}] + * different to Esirkepov paper, here we use C style + */ + DINLINE float_X DS(const Line& line, const float_X gridPoint, const uint32_t d) const + { + return ParticleAssign()(gridPoint - line.m_pos1[d]) - ParticleAssign()(gridPoint - line.m_pos0[d]); + } + }; - /** deposites current in z-direction - * - * \param cursorJ cursor pointing at the current density field of the particle's cell - * \param line trajectory of the virtual particle - * \param currentSurfaceDensity surface density - */ - template< - typename CursorJ, - typename T_Line, - typename T_Acc - > - DINLINE void - cptCurrent1D( - T_Acc const & acc, - CursorJ cursorJ, - const T_Line& line, - const float_X currentSurfaceDensity - ) const - { - if( line.m_pos0[2] == line.m_pos1[2] ) - return; - /* pick every cell in the xy-plane that is overlapped by particle's - * form factor and deposit the current for the cells above and beneath - * that cell and for the cell itself. - */ - for( int i = T_begin ; i < T_end ; ++i ) + template + struct DepositCurrent + : public BaseMethods { - const float_X s0i = this->S0( line, i, 0 ); - const float_X dsi = this->S1( line, i, 0 ) - s0i; - for( int j = T_begin ; j < T_end ; ++j ) + template + DINLINE void operator()( + T_Acc const& acc, + const T_Cursor& cursorJ, + const Line& line, + const float_X chargeDensity, + const float_X) const { - const float_X s0j = this->S0( line, j, 1 ); - const float_X dsj = this->S1( line, j, 1 ) - s0j; - - float_X tmp = - -currentSurfaceDensity * ( - s0i * s0j + - float_X( 0.5 ) * ( dsi * s0j + s0i * dsj ) + - ( float_X( 1.0 ) / float_X( 3.0 ) ) * dsj * dsi - ); + /** + * \brief the following three calls separate the 3D current deposition + * into three independent 1D calls, each for one direction and current component. + * Therefore the coordinate system has to be rotated so that the z-direction + * is always specific. + */ + using namespace cursor::tools; + cptCurrent1D( + acc, + twistVectorFieldAxes>(cursorJ), + rotateOrigin<1, 2, 0>(line), + cellSize.x() * chargeDensity / DELTA_T); + cptCurrent1D( + acc, + twistVectorFieldAxes>(cursorJ), + rotateOrigin<2, 0, 1>(line), + cellSize.y() * chargeDensity / DELTA_T); + cptCurrent1D(acc, cursorJ, line, cellSize.z() * chargeDensity / DELTA_T); + } - float_X accumulated_J = float_X( 0.0 ); - for( int k = T_begin ; k < T_end - 1 ; ++k ) + /** deposites current in z-direction + * + * \param cursorJ cursor pointing at the current density field of the particle's cell + * \param line trajectory of the virtual particle + * \param currentSurfaceDensity surface density + */ + template + DINLINE void cptCurrent1D( + T_Acc const& acc, + CursorJ cursorJ, + const T_Line& line, + const float_X currentSurfaceDensity) const + { + if(line.m_pos0[2] == line.m_pos1[2]) + return; + /* pick every cell in the xy-plane that is overlapped by particle's + * form factor and deposit the current for the cells above and beneath + * that cell and for the cell itself. + */ + for(int i = T_begin; i < T_end; ++i) { - /* This is the implementation of the FORTRAN W(i,j,k,3)/ C style W(i,j,k,2) version from - * Esirkepov paper. All coordinates are rotated before thus we can - * always use C style W(i,j,k,2). - */ - const float_X W = this->DS( line, k, 2 ) * tmp; - accumulated_J += W; - atomicAdd( - &( (*cursorJ( i, j, k ) ).z( ) ), - accumulated_J, - ::alpaka::hierarchy::Threads{} - ); + const float_X s0i = this->S0(line, i, 0); + const float_X dsi = this->S1(line, i, 0) - s0i; + for(int j = T_begin; j < T_end; ++j) + { + const float_X s0j = this->S0(line, j, 1); + const float_X dsj = this->S1(line, j, 1) - s0j; + + float_X tmp = -currentSurfaceDensity + * (s0i * s0j + float_X(0.5) * (dsi * s0j + s0i * dsj) + + (float_X(1.0) / float_X(3.0)) * dsj * dsi); + + float_X accumulated_J = float_X(0.0); + for(int k = T_begin; k < T_end - 1; ++k) + { + /* This is the implementation of the FORTRAN W(i,j,k,3)/ C style W(i,j,k,2) version + * from Esirkepov paper. All coordinates are rotated before thus we can always use C + * style W(i,j,k,2). + */ + const float_X W = this->DS(line, k, 2) * tmp; + accumulated_J += W; + auto const atomicOp = T_AtomicAddOp{}; + atomicOp(acc, (*cursorJ(i, j, k)).z(), accumulated_J); + } + } } } - } - } - }; - - template< - typename ParticleAssign, - int T_begin, - int T_end - > - struct DepositCurrent< - ParticleAssign, - T_begin, - T_end, - DIM2 - > : public BaseMethods< ParticleAssign > - { - template< - typename T_Cursor, - typename T_Acc - > - DINLINE void - operator()( - T_Acc const & acc, - const T_Cursor& cursorJ, - const Line< float2_X >& line, - const float_X chargeDensity, - const float_X velocityZ - ) const - { - using namespace cursor::tools; - cptCurrent1D( - acc, - cursorJ, - line, - cellSize.x( ) * chargeDensity / DELTA_T - ); - cptCurrent1D( - acc, - twistVectorFieldAxes< pmacc::math::CT::Int < 1, 0 > >( cursorJ ), - rotateOrigin < 1, 0 > ( line ), - cellSize.y( ) * chargeDensity / DELTA_T - ); - cptCurrentZ( - acc, - cursorJ, - line, - velocityZ * chargeDensity - ); - } + }; - /** deposites current in x-direction - * - * \param cursorJ cursor pointing at the current density field of the particle's cell - * \param line trajectory of the virtual particle - * \param currentSurfaceDensity surface density - */ - template< - typename CursorJ, - typename T_Line, - typename T_Acc - > - DINLINE void - cptCurrent1D( - T_Acc const & acc, - CursorJ cursorJ, - const T_Line& line, - const float_X currentSurfaceDensity - ) const - { - if( line.m_pos0[0] == line.m_pos1[0] ) - return; - - for( int j = T_begin; j < T_end; ++j ) + template + struct DepositCurrent + : public BaseMethods { - const float_X s0j = this->S0( line, j, 1 ); - const float_X dsj = this->S1( line, j, 1 ) - s0j; - - float_X tmp = -currentSurfaceDensity * - ( - s0j + - float_X( 0.5 ) * dsj - ); - - float_X accumulated_J = float_X( 0.0 ); - for( int i = T_begin; i < T_end - 1; ++i ) + template + DINLINE void operator()( + T_Acc const& acc, + const T_Cursor& cursorJ, + const Line& line, + const float_X chargeDensity, + const float_X velocityZ) const { - /* This is the implementation of the FORTRAN W(i,j,k,1)/ C style W(i,j,k,0) version from - * Esirkepov paper. All coordinates are rotated before thus we can - * always use C style W(i,j,k,0). - */ - const float_X W = this->DS( line, i, 0 ) * tmp; - accumulated_J += W; - atomicAdd( - &( ( *cursorJ( i, j ) ).x( ) ), - accumulated_J, - ::alpaka::hierarchy::Threads{} - ); + using namespace cursor::tools; + cptCurrent1D(acc, cursorJ, line, cellSize.x() * chargeDensity / DELTA_T); + cptCurrent1D( + acc, + twistVectorFieldAxes>(cursorJ), + rotateOrigin<1, 0>(line), + cellSize.y() * chargeDensity / DELTA_T); + cptCurrentZ(acc, cursorJ, line, velocityZ * chargeDensity); } - } - } - /** deposites current in z-direction - * - * \param cursorJ cursor pointing at the current density field of the particle's cell - * \param line trajectory of the virtual particle - * \param currentSurfaceDensityZ surface density in z direction - */ - template< - typename CursorJ, - typename T_Line, - typename T_Acc - > - DINLINE void - cptCurrentZ( - T_Acc const & acc, - CursorJ cursorJ, - const T_Line& line, - const float_X currentSurfaceDensityZ - ) const - { - if( currentSurfaceDensityZ == float_X( 0.0 ) ) - return; + /** deposites current in x-direction + * + * \param cursorJ cursor pointing at the current density field of the particle's cell + * \param line trajectory of the virtual particle + * \param currentSurfaceDensity surface density + */ + template + DINLINE void cptCurrent1D( + T_Acc const& acc, + CursorJ cursorJ, + const T_Line& line, + const float_X currentSurfaceDensity) const + { + if(line.m_pos0[0] == line.m_pos1[0]) + return; - for( int j = T_begin; j < T_end; ++j ) - { - const float_X s0j = this->S0( line, j, 1 ); - const float_X dsj = this->S1( line, j, 1 ) - s0j; - for( int i = T_begin; i < T_end; ++i ) + for(int j = T_begin; j < T_end; ++j) + { + const float_X s0j = this->S0(line, j, 1); + const float_X dsj = this->S1(line, j, 1) - s0j; + + float_X tmp = -currentSurfaceDensity * (s0j + float_X(0.5) * dsj); + + float_X accumulated_J = float_X(0.0); + for(int i = T_begin; i < T_end - 1; ++i) + { + /* This is the implementation of the FORTRAN W(i,j,k,1)/ C style W(i,j,k,0) version from + * Esirkepov paper. All coordinates are rotated before thus we can + * always use C style W(i,j,k,0). + */ + const float_X W = this->DS(line, i, 0) * tmp; + accumulated_J += W; + auto const atomicOp = T_AtomicAddOp{}; + atomicOp(acc, (*cursorJ(i, j)).x(), accumulated_J); + } + } + } + + /** deposites current in z-direction + * + * \param cursorJ cursor pointing at the current density field of the particle's cell + * \param line trajectory of the virtual particle + * \param currentSurfaceDensityZ surface density in z direction + */ + template + DINLINE void cptCurrentZ( + T_Acc const& acc, + CursorJ cursorJ, + const T_Line& line, + const float_X currentSurfaceDensityZ) const { - const float_X s0i = this->S0( line, i, 0 ); - const float_X dsi = this->S1( line, i, 0 ) - s0i; - float_X W = s0i * this->S0( line, j, 1 ) + - float_X( 0.5 ) * ( dsi * s0j + s0i * dsj ) + - ( float_X( 1.0 ) / float_X( 3.0 ) ) * dsi * dsj; + if(currentSurfaceDensityZ == float_X(0.0)) + return; - const float_X j_z = W * currentSurfaceDensityZ; - atomicAdd( - &( ( *cursorJ( i, j ) ).z( ) ), - j_z, - ::alpaka::hierarchy::Threads{} - ); + for(int j = T_begin; j < T_end; ++j) + { + const float_X s0j = this->S0(line, j, 1); + const float_X dsj = this->S1(line, j, 1) - s0j; + for(int i = T_begin; i < T_end; ++i) + { + const float_X s0i = this->S0(line, i, 0); + const float_X dsi = this->S1(line, i, 0) - s0i; + float_X W = s0i * this->S0(line, j, 1) + float_X(0.5) * (dsi * s0j + s0i * dsj) + + (float_X(1.0) / float_X(3.0)) * dsi * dsj; + + const float_X j_z = W * currentSurfaceDensityZ; + auto const atomicOp = T_AtomicAddOp{}; + atomicOp(acc, (*cursorJ(i, j)).z(), j_z); + } + } } - } - } - }; + }; -} // namespace emz -} // namespace currentSolver + } // namespace emz + } // namespace currentSolver } // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/EmZ/EmZ.def b/include/picongpu/fields/currentDeposition/EmZ/EmZ.def index 252fe57bcf..d5f224fd91 100644 --- a/include/picongpu/fields/currentDeposition/EmZ/EmZ.def +++ b/include/picongpu/fields/currentDeposition/EmZ/EmZ.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -17,76 +17,75 @@ * If not, see . */ - #pragma once #include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/currentDeposition/Strategy.def" namespace picongpu { -namespace currentSolver -{ - -namespace emz -{ - template< - typename ParticleAssign, - int T_begin, - int T_end, - uint32_t T_dim = simDim - > - struct DepositCurrent; -} //namespace emz + namespace currentSolver + { + namespace emz + { + template + struct DepositCurrent; + } // namespace emz -/** EmZ (Esirkepov meets ZigZag) current deposition - * - * Deposit the particle current with a mixed algorithm based on Esirkepov and - * the ZigZag way splitting. - * EmZ supports arbitrary symmetric shapes and 2D/3D cartesian grids. - * - * ZigZag publications: - * 1. order paper: "A new charge conservation method in electromagnetic - * particle-in-cell simulations", Comput. Phys. Commun. (2003) - * T. Umeda, Y. Omura, T. Tominaga, H. Matsumoto - * DOI: 10.1016/S0010-4655(03)00437-5 - * 2. order paper: "Charge conservation methods for computing current densities - * in electromagnetic particle-in-cell simulations", - * Proceedings of ISSS. Vol. 7. 2005 - * T. Umeda, Y. Omura, H. Matsumoto - * 3. order paper: "High-Order Interpolation Algorithms for Charge Conservation - * in Particle-in-Cell Simulation", Commun. Comput. Phys 13 (2013) - * Jinqing Yu, Xiaolin Jin, Weimin Zhou, Bin Li, Yuqiu Gu - * DOI:10.1109/ICCIS.2012.159 - * - * \tparam T_ParticleShape the particle shape for the species, \see picongpu::particles::shapes - * - */ -template< typename ParticleShape > -struct EmZ; + /** EmZ (Esirkepov meets ZigZag) current deposition + * + * Deposit the particle current with a mixed algorithm based on Esirkepov and + * the ZigZag way splitting. + * EmZ supports arbitrary symmetric shapes and 2D/3D cartesian grids. + * + * ZigZag publications: + * 1. order paper: "A new charge conservation method in electromagnetic + * particle-in-cell simulations", Comput. Phys. Commun. (2003) + * T. Umeda, Y. Omura, T. Tominaga, H. Matsumoto + * DOI: 10.1016/S0010-4655(03)00437-5 + * 2. order paper: "Charge conservation methods for computing current densities + * in electromagnetic particle-in-cell simulations", + * Proceedings of ISSS. Vol. 7. 2005 + * T. Umeda, Y. Omura, H. Matsumoto + * 3. order paper: "High-Order Interpolation Algorithms for Charge Conservation + * in Particle-in-Cell Simulation", Commun. Comput. Phys 13 (2013) + * Jinqing Yu, Xiaolin Jin, Weimin Zhou, Bin Li, Yuqiu Gu + * DOI:10.1109/ICCIS.2012.159 + * + * @tparam T_ParticleShape the particle shape for the species [picongpu::particles::shapes] + * @tparam T_Strategy Used strategy to reduce the scattered data [currentSolver::strategy] + * + */ + template> + struct EmZ; -} //namespace currentSolver + namespace traits + { + template + struct GetStrategy> + { + using type = T_Strategy; + }; + } // namespace traits -namespace traits -{ + } // namespace currentSolver -/*Get margin of a solver - * class must define a LowerMargin and UpperMargin - */ -template< typename ParticleShape > -struct GetMargin< - picongpu::currentSolver::EmZ< - ParticleShape - > -> -{ -private: - typedef picongpu::currentSolver::EmZ< ParticleShape > Solver; -public: - typedef typename Solver::LowerMargin LowerMargin; - typedef typename Solver::UpperMargin UpperMargin; -}; + namespace traits + { + /*Get margin of a solver + * class must define a LowerMargin and UpperMargin + */ + template + struct GetMargin> + { + private: + using Solver = picongpu::currentSolver::EmZ; -} //namespace traits + public: + using LowerMargin = typename Solver::LowerMargin; + using UpperMargin = typename Solver::UpperMargin; + }; -} //namespace picongpu + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/EmZ/EmZ.hpp b/include/picongpu/fields/currentDeposition/EmZ/EmZ.hpp index 4168f42a77..679999c96b 100644 --- a/include/picongpu/fields/currentDeposition/EmZ/EmZ.hpp +++ b/include/picongpu/fields/currentDeposition/EmZ/EmZ.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera * * This file is part of PIConGPU. * @@ -26,174 +26,126 @@ #include "picongpu/fields/currentDeposition/EmZ/DepositCurrent.hpp" #include "picongpu/fields/currentDeposition/Esirkepov/Line.hpp" -namespace picongpu -{ -namespace currentSolver -{ -template< - typename T_ParticleShape -> -struct EmZ +namespace picongpu { - using ParticleAssign = typename T_ParticleShape::ChargeAssignmentOnSupport; - static constexpr int supp = ParticleAssign::support; - - static constexpr int currentLowerMargin = supp / 2 + 1 - (supp + 1) % 2; - static constexpr int currentUpperMargin = (supp + 1) / 2 + 1; - typedef typename pmacc::math::CT::make_Int::type LowerMargin; - typedef typename pmacc::math::CT::make_Int::type UpperMargin; - - PMACC_CASSERT_MSG( - __EmZ_supercell_or_number_of_guard_supercells_is_too_small_for_stencil, - pmacc::math::CT::min< - typename pmacc::math::CT::mul< - SuperCellSize, - GuardSize - >::type - >::type::value >= currentLowerMargin && - pmacc::math::CT::min< - typename pmacc::math::CT::mul< - SuperCellSize, - GuardSize - >::type - >::type::value >= currentUpperMargin - ); - - - static constexpr int begin = -currentLowerMargin + 1; - static constexpr int end = begin + supp; - - - /** deposit the current of a particle - * - * @tparam DataBoxJ any pmacc DataBox - * - * @param dataBoxJ box shifted to the cell of particle - * @param posEnd position of the particle after it is pushed - * @param velocity velocity of the particle - * @param charge charge of the particle - * @param deltaTime time of one time step - */ - template< - typename DataBoxJ, - typename T_Acc - > - DINLINE void - operator()( - T_Acc const & acc, - DataBoxJ dataBoxJ, - floatD_X const posEnd, - float3_X const velocity, - float_X const charge, - float_X const/* deltaTime */ - ) + namespace currentSolver { - floatD_X deltaPos; - for ( uint32_t d = 0; d < simDim; ++d ) - deltaPos[d] = ( velocity[d] * DELTA_T ) / cellSize[d]; + template + struct EmZ + { + using ParticleAssign = typename T_ParticleShape::ChargeAssignmentOnSupport; + static constexpr int supp = ParticleAssign::support; + + static constexpr int currentLowerMargin = supp / 2 + 1 - (supp + 1) % 2; + static constexpr int currentUpperMargin = (supp + 1) / 2 + 1; + typedef typename pmacc::math::CT::make_Int::type LowerMargin; + typedef typename pmacc::math::CT::make_Int::type UpperMargin; + + PMACC_CASSERT_MSG( + __EmZ_supercell_or_number_of_guard_supercells_is_too_small_for_stencil, + pmacc::math::CT::min::type>::type::value + >= currentLowerMargin + && pmacc::math::CT::min::type>::type::value + >= currentUpperMargin); + + + static constexpr int begin = -currentLowerMargin + 1; + static constexpr int end = begin + supp; + + + /** deposit the current of a particle + * + * @tparam DataBoxJ any pmacc DataBox + * + * @param dataBoxJ box shifted to the cell of particle + * @param posEnd position of the particle after it is pushed + * @param velocity velocity of the particle + * @param charge charge of the particle + * @param deltaTime time of one time step + */ + template + DINLINE void operator()( + T_Acc const& acc, + DataBoxJ dataBoxJ, + floatD_X const posEnd, + float3_X const velocity, + float_X const charge, + float_X const /* deltaTime */ + ) + { + floatD_X deltaPos; + for(uint32_t d = 0; d < simDim; ++d) + deltaPos[d] = (velocity[d] * DELTA_T) / cellSize[d]; + + /*note: all positions are normalized to the grid*/ + const floatD_X posStart(posEnd - deltaPos); + + DataSpace I[2]; + floatD_X relayPoint; + + /* calculate the relay point for the trajectory splitting */ + for(uint32_t d = 0; d < simDim; ++d) + { + constexpr bool isSupportEven = (supp % 2 == 0); + relayPoint[d] = RelayPoint()(I[0][d], I[1][d], posStart[d], posEnd[d]); + } + + Line line; + const float_X chargeDensity = charge / CELL_VOLUME; + + /* Esirkepov implementation for the current deposition */ + emz::DepositCurrent deposit; + + /* calculate positions for the second virtual particle */ + for(uint32_t d = 0; d < simDim; ++d) + { + line.m_pos0[d] = calc_InCellPos(posStart[d], I[0][d]); + line.m_pos1[d] = calc_InCellPos(relayPoint[d], I[0][d]); + } + + const bool twoParticlesNeeded = I[0] != I[1]; + + deposit( + acc, + dataBoxJ.shift(I[0]).toCursor(), + line, + chargeDensity, + velocity.z() * (twoParticlesNeeded ? float_X(0.5) : float_X(1.0))); + + /* detect if there is a second virtual particle */ + if(twoParticlesNeeded) + { + /* calculate positions for the second virtual particle */ + for(uint32_t d = 0; d < simDim; ++d) + { + /* switched start and end point */ + line.m_pos1[d] = calc_InCellPos(posEnd[d], I[1][d]); + line.m_pos0[d] = calc_InCellPos(relayPoint[d], I[1][d]); + } + deposit(acc, dataBoxJ.shift(I[1]).toCursor(), line, chargeDensity, velocity.z() * float_X(0.5)); + } + } - /*note: all positions are normalized to the grid*/ - const floatD_X posStart( posEnd - deltaPos ); + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "EmZ"); + return propList; + } - DataSpace I[2]; - floatD_X relayPoint; - /* calculate the relay point for the trajectory splitting */ - for ( uint32_t d = 0; d < simDim; ++d ) - { - constexpr bool isSupportEven = ( supp % 2 == 0 ); - relayPoint[d] = RelayPoint< isSupportEven >()( - I[0][d], - I[1][d], - posStart[d], - posEnd[d] - ); - } - - Line< floatD_X > line; - const float_X chargeDensity = charge / CELL_VOLUME; - - /* Esirkepov implementation for the current deposition */ - emz::DepositCurrent< - ParticleAssign, - begin, - end - > deposit; - - /* calculate positions for the second virtual particle */ - for (uint32_t d = 0; d < simDim; ++d) - { - line.m_pos0[d] = calc_InCellPos( - posStart[d], - I[0][d] - ); - line.m_pos1[d] = calc_InCellPos( - relayPoint[d], - I[0][d] - ); - } - - const bool twoParticlesNeeded = I[0] != I[1]; - - deposit( - acc, - dataBoxJ.shift( I[0] ).toCursor(), - line, - chargeDensity, - velocity.z() * ( twoParticlesNeeded ? float_X(0.5) : float_X(1.0) ) - ); - - /* detect if there is a second virtual particle */ - if( twoParticlesNeeded ) - { - /* calculate positions for the second virtual particle */ - for (uint32_t d = 0; d < simDim; ++d) + /** get normalized in cell particle position + * + * @param x position of the particle + * @param i shift of grid (only integral positions are allowed) + * @return in cell position + */ + DINLINE float_X calc_InCellPos(const float_X x, const float_X i) const { - /* switched start and end point */ - line.m_pos1[d] = calc_InCellPos( - posEnd[d], - I[1][d] - ); - line.m_pos0[d] = calc_InCellPos( - relayPoint[d], - I[1][d] - ); + return x - i; } - deposit( - acc, - dataBoxJ.shift( I[1] ).toCursor(), - line, - chargeDensity, - velocity.z() * float_X(0.5) - ); - } - } - - static pmacc::traits::StringProperty - getStringProperties() - { - pmacc::traits::StringProperty propList( "name", "EmZ" ); - return propList; - } - - - /** get normalized in cell particle position - * - * @param x position of the particle - * @param i shift of grid (only integral positions are allowed) - * @return in cell position - */ - DINLINE float_X - calc_InCellPos( - const float_X x, - const float_X i - ) const - { - return x - i; - } -}; + }; -} //namespace currentSolver + } // namespace currentSolver -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov.def b/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov.def index 585d65842c..26d312976f 100644 --- a/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov.def +++ b/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -17,54 +17,79 @@ * If not, see . */ - #pragma once #include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/currentDeposition/Strategy.def" namespace picongpu { -namespace currentSolver -{ -using namespace pmacc; + namespace currentSolver + { + /** + * Implements the current deposition algorithm from T.Zh. Esirkepov + * + * for an arbitrary particle assign function given as a template parameter. + * See available shapes at "intermediateLib/particleShape". + * paper: "Exact charge conservation scheme for Particle-in-Cell simulation + * with an arbitrary form-factor" + * + * @tparam T_ParticleShape the particle shape for the species, [picongpu::particles::shapes] + * @tparam T_Strategy Used strategy to reduce the scattered data [currentSolver::strategy] + * @tparam T_Dim Implementation for 2D or 3D + */ + template< + typename T_ParticleShape, + typename T_Strategy = traits::GetDefaultStrategy_t<>, + uint32_t T_dim = simDim> + struct Esirkepov; + /** Paper like implementation of Esirkepov current deposition + * + * The implementation uses an non optimized stencil width and is therefore over + * 4x slower than the other Esirkepov implementation. + * @attention this solver is only for testing + * + * @tparam T_ParticleShape the particle shape for the species, [picongpu::particles::shapes] + * @tparam T_Strategy Used strategy to reduce the scattered data [currentSolver::strategy] + */ + template> + struct EsirkepovNative; -/** - * Implements the current deposition algorithm from T.Zh. Esirkepov - * - * for an arbitrary particle assign function given as a template parameter. - * See available shapes at "intermediateLib/particleShape". - * paper: "Exact charge conservation scheme for Particle-in-Cell simulation - * with an arbitrary form-factor" - * - * \tparam T_ParticleShape the particle shape for the species, \see picongpu::particles::shapes - * \tparam T_Dim Implementation for 2D or 3D - */ -template -struct Esirkepov; + namespace traits + { + template + struct GetStrategy> + { + using type = T_Strategy; + }; -template -struct EsirkepovNative; + template + struct GetStrategy> + { + using type = T_Strategy; + }; -} //namespace currentSolver + } // namespace traits + } // namespace currentSolver -namespace traits -{ + namespace traits + { + /*Get margin of a solver + * class must define a LowerMargin and UpperMargin + */ + template + struct GetMargin> + { + private: + using Solver = picongpu::currentSolver::Esirkepov; -/*Get margin of a solver - * class must define a LowerMargin and UpperMargin - */ -template -struct GetMargin > -{ -private: - typedef picongpu::currentSolver::Esirkepov Solver; -public: - typedef typename Solver::LowerMargin LowerMargin; - typedef typename Solver::UpperMargin UpperMargin; -}; + public: + typedef typename Solver::LowerMargin LowerMargin; + typedef typename Solver::UpperMargin UpperMargin; + }; -} //namespace traits + } // namespace traits -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov.hpp b/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov.hpp index 63d220d226..8e28e0427f 100644 --- a/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov.hpp +++ b/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -24,7 +24,6 @@ #include #include #include -#include #include "picongpu/fields/currentDeposition/Esirkepov/Esirkepov.def" #include "picongpu/fields/currentDeposition/Esirkepov/Line.hpp" @@ -33,252 +32,216 @@ namespace picongpu { -namespace currentSolver -{ -using namespace pmacc; - -template -struct Esirkepov -{ - using ParticleAssign = typename T_ParticleShape::ChargeAssignment; - static constexpr int supp = ParticleAssign::support; - - static constexpr int currentLowerMargin = supp / 2 + 1 - (supp + 1) % 2; - static constexpr int currentUpperMargin = (supp + 1) / 2 + 1; - typedef pmacc::math::CT::Int LowerMargin; - typedef pmacc::math::CT::Int UpperMargin; - - PMACC_CASSERT_MSG( - __Esirkepov_supercell_or_number_of_guard_supercells_is_too_small_for_stencil, - pmacc::math::CT::min< - typename pmacc::math::CT::mul< - SuperCellSize, - GuardSize - >::type - >::type::value >= currentLowerMargin && - pmacc::math::CT::min< - typename pmacc::math::CT::mul< - SuperCellSize, - GuardSize - >::type - >::type::value >= currentUpperMargin - ); - - float_X charge; - - /* At the moment Esirkepov only supports Yee cells where W is defined at origin (0,0,0) - * - * \todo: please fix me that we can use CenteredCell - */ - template< - typename DataBoxJ, - typename PosType, - typename VelType, - typename ChargeType, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - DataBoxJ dataBoxJ, - const PosType pos, - const VelType velocity, - const ChargeType charge, - const float_X deltaTime - ) + namespace currentSolver { - this->charge = charge; - const float3_X deltaPos = float3_X(velocity.x() * deltaTime / cellSize.x(), - velocity.y() * deltaTime / cellSize.y(), - velocity.z() * deltaTime / cellSize.z()); - const PosType oldPos = pos - deltaPos; - Line line(oldPos, pos); - - DataSpace gridShift; - - /* Define in which direction the particle leaves the cell. - * It is not relevant whether the particle leaves the cell via - * the positive or negative cell border. - * - * 0 == stay in cell - * 1 == leave cell - */ - DataSpace leaveCell; - - /* calculate the offset for the virtual coordinate system */ - for(int d=0; d + struct Esirkepov { - int iStart; - int iEnd; - constexpr bool isSupportEven = ( supp % 2 == 0 ); - RelayPoint< isSupportEven >()( - iStart, - iEnd, - line.m_pos0[d], - line.m_pos1[d] - ); - gridShift[d] = iStart < iEnd ? iStart : iEnd; // integer min function - /* particle is leaving the cell */ - leaveCell[d] = iStart != iEnd ? 1 : 0; - /* shift the particle position to the virtual coordinate system */ - line.m_pos0[d] -= gridShift[d]; - line.m_pos1[d] -= gridShift[d]; - } - /* shift current field to the virtual coordinate system */ - auto cursorJ = dataBoxJ.shift(gridShift).toCursor(); - /** - * \brief the following three calls separate the 3D current deposition - * into three independent 1D calls, each for one direction and current component. - * Therefore the coordinate system has to be rotated so that the z-direction - * is always specific. - */ - using namespace cursor::tools; - cptCurrent1D( - acc, - DataSpace(leaveCell.y(),leaveCell.z(),leaveCell.x()), - twistVectorFieldAxes >(cursorJ), - rotateOrigin < 1, 2, 0 > (line), - cellSize.x() - ); - cptCurrent1D( - acc, - DataSpace(leaveCell.z(),leaveCell.x(),leaveCell.y()), - twistVectorFieldAxes >(cursorJ), - rotateOrigin < 2, 0, 1 > (line), - cellSize.y() - ); - cptCurrent1D( - acc, - leaveCell, - cursorJ, - line, - cellSize.z() - ); - } - - /** - * deposites current in z-direction - * - * \param leaveCell vector with information if the particle is leaving the cell - * (for each direction, 0 means stays in cell and 1 means leaves cell) - * \param cursorJ cursor pointing at the current density field of the particle's cell - * \param line trajectory of the particle from to last to the current time step - * \param cellEdgeLength length of edge of the cell in z-direction - */ - template< - typename CursorJ, - typename T_Acc - > - DINLINE void cptCurrent1D( - T_Acc const & acc, - const DataSpace& leaveCell, - CursorJ cursorJ, - const Line& line, - const float_X cellEdgeLength - ) - { - /* skip calculation if the particle is not moving in z direction */ - if(line.m_pos0[2] == line.m_pos1[2]) - return; - - constexpr int begin = -currentLowerMargin + 1; - constexpr int end = begin + supp; - - /* We multiply with `cellEdgeLength` due to the fact that the attribute for the - * in-cell particle `position` (and it's change in DELTA_T) is normalize to [0,1) - */ - const float_X currentSurfaceDensity = this->charge * (float_X(1.0) / float_X(CELL_VOLUME * DELTA_T)) * cellEdgeLength; + using ParticleAssign = typename T_ParticleShape::ChargeAssignment; + static constexpr int supp = ParticleAssign::support; + + static constexpr int currentLowerMargin = supp / 2 + 1 - (supp + 1) % 2; + static constexpr int currentUpperMargin = (supp + 1) / 2 + 1; + typedef pmacc::math::CT::Int LowerMargin; + typedef pmacc::math::CT::Int UpperMargin; + + PMACC_CASSERT_MSG( + __Esirkepov_supercell_or_number_of_guard_supercells_is_too_small_for_stencil, + pmacc::math::CT::min::type>::type::value + >= currentLowerMargin + && pmacc::math::CT::min::type>::type::value + >= currentUpperMargin); + + float_X charge; + + /* At the moment Esirkepov only supports Yee cells where W is defined at origin (0,0,0) + * + * \todo: please fix me that we can use CenteredCell + */ + template + DINLINE void operator()( + T_Acc const& acc, + DataBoxJ dataBoxJ, + const PosType pos, + const VelType velocity, + const ChargeType charge, + const float_X deltaTime) + { + this->charge = charge; + const float3_X deltaPos = float3_X( + velocity.x() * deltaTime / cellSize.x(), + velocity.y() * deltaTime / cellSize.y(), + velocity.z() * deltaTime / cellSize.z()); + const PosType oldPos = pos - deltaPos; + Line line(oldPos, pos); + + DataSpace gridShift; + + /* Define in which direction the particle leaves the cell. + * It is not relevant whether the particle leaves the cell via + * the positive or negative cell border. + * + * 0 == stay in cell + * 1 == leave cell + */ + DataSpace leaveCell; + + /* calculate the offset for the virtual coordinate system */ + for(int d = 0; d < simDim; ++d) + { + int iStart; + int iEnd; + constexpr bool isSupportEven = (supp % 2 == 0); + RelayPoint()(iStart, iEnd, line.m_pos0[d], line.m_pos1[d]); + gridShift[d] = iStart < iEnd ? iStart : iEnd; // integer min function + /* particle is leaving the cell */ + leaveCell[d] = iStart != iEnd ? 1 : 0; + /* shift the particle position to the virtual coordinate system */ + line.m_pos0[d] -= gridShift[d]; + line.m_pos1[d] -= gridShift[d]; + } + /* shift current field to the virtual coordinate system */ + auto cursorJ = dataBoxJ.shift(gridShift).toCursor(); + /** + * \brief the following three calls separate the 3D current deposition + * into three independent 1D calls, each for one direction and current component. + * Therefore the coordinate system has to be rotated so that the z-direction + * is always specific. + */ + using namespace cursor::tools; + cptCurrent1D( + acc, + DataSpace(leaveCell.y(), leaveCell.z(), leaveCell.x()), + twistVectorFieldAxes>(cursorJ), + rotateOrigin<1, 2, 0>(line), + cellSize.x()); + cptCurrent1D( + acc, + DataSpace(leaveCell.z(), leaveCell.x(), leaveCell.y()), + twistVectorFieldAxes>(cursorJ), + rotateOrigin<2, 0, 1>(line), + cellSize.y()); + cptCurrent1D(acc, leaveCell, cursorJ, line, cellSize.z()); + } - /* pick every cell in the xy-plane that is overlapped by particle's - * form factor and deposit the current for the cells above and beneath - * that cell and for the cell itself. - * - * for loop optimization (help the compiler to generate better code): - * - use a loop with a static range - * - skip invalid indexes with a if condition around the full loop body - * ( this helps the compiler to mask threads without work ) - */ - for( int i = begin ; i < end + 1; ++i ) - if( i < end + leaveCell[0] ) + /** + * deposites current in z-direction + * + * \param leaveCell vector with information if the particle is leaving the cell + * (for each direction, 0 means stays in cell and 1 means leaves cell) + * \param cursorJ cursor pointing at the current density field of the particle's cell + * \param line trajectory of the particle from to last to the current time step + * \param cellEdgeLength length of edge of the cell in z-direction + */ + template + DINLINE void cptCurrent1D( + T_Acc const& acc, + const DataSpace& leaveCell, + CursorJ cursorJ, + const Line& line, + const float_X cellEdgeLength) { - const float_X s0i = S0( line, i, 0 ); - const float_X dsi = S1( line, i, 0 ) - s0i; - for( int j = begin ; j < end + 1; ++j ) - if( j < end + leaveCell[1] ) + /* skip calculation if the particle is not moving in z direction */ + if(line.m_pos0[2] == line.m_pos1[2]) + return; + + constexpr int begin = -currentLowerMargin + 1; + constexpr int end = begin + supp; + + /* We multiply with `cellEdgeLength` due to the fact that the attribute for the + * in-cell particle `position` (and it's change in DELTA_T) is normalize to [0,1) + */ + const float_X currentSurfaceDensity + = this->charge * (float_X(1.0) / float_X(CELL_VOLUME * DELTA_T)) * cellEdgeLength; + + /* pick every cell in the xy-plane that is overlapped by particle's + * form factor and deposit the current for the cells above and beneath + * that cell and for the cell itself. + * + * for loop optimization (help the compiler to generate better code): + * - use a loop with a static range + * - skip invalid indexes with a if condition around the full loop body + * ( this helps the compiler to mask threads without work ) + */ + for(int i = begin; i < end + 1; ++i) + if(i < end + leaveCell[0]) { - const float_X s0j = S0( line, j, 1 ); - const float_X dsj = S1( line, j, 1 ) - s0j; + const float_X s0i = S0(line, i, 0); + const float_X dsi = S1(line, i, 0) - s0i; + for(int j = begin; j < end + 1; ++j) + if(j < end + leaveCell[1]) + { + const float_X s0j = S0(line, j, 1); + const float_X dsj = S1(line, j, 1) - s0j; - float_X tmp = - -currentSurfaceDensity * ( - s0i * s0j + - float_X( 0.5 ) * ( dsi * s0j + s0i * dsj ) + - ( float_X( 1.0 ) / float_X( 3.0 ) ) * dsj * dsi - ); + float_X tmp = -currentSurfaceDensity + * (s0i * s0j + float_X(0.5) * (dsi * s0j + s0i * dsj) + + (float_X(1.0) / float_X(3.0)) * dsj * dsi); - float_X accumulated_J = float_X( 0.0 ); + float_X accumulated_J = float_X(0.0); - /* attention: inner loop has no upper bound `end + 1` because - * the current for the point `end` is always zero, - * therefore we skip the calculation - */ - for( int k = begin ; k < end; ++k ) - if( k < end + leaveCell[2] - 1 ) - { - /* This is the implementation of the FORTRAN W(i,j,k,3)/ C style W(i,j,k,2) version from - * Esirkepov paper. All coordinates are rotated before thus we can - * always use C style W(i,j,k,2). + /* attention: inner loop has no upper bound `end + 1` because + * the current for the point `end` is always zero, + * therefore we skip the calculation */ - const float_X W = DS( line, k, 2 ) * tmp; - accumulated_J += W; - atomicAdd( &( ( *cursorJ( i, j, k ) ).z() ), accumulated_J, ::alpaka::hierarchy::Threads{} ); + for(int k = begin; k < end; ++k) + if(k < end + leaveCell[2] - 1) + { + /* This is the implementation of the FORTRAN W(i,j,k,3)/ C style W(i,j,k,2) + * version from Esirkepov paper. All coordinates are rotated before thus we can + * always use C style W(i,j,k,2). + */ + const float_X W = DS(line, k, 2) * tmp; + accumulated_J += W; + auto const atomicOp = typename T_Strategy::BlockReductionOp{}; + atomicOp(acc, (*cursorJ(i, j, k)).z(), accumulated_J); + } } } } - } - - /** calculate S0 (see paper) - * @param line element with previous and current position of the particle - * @param gridPoint used grid point to evaluate assignment shape - * @param d dimension range {0,1,2} means {x,y,z} - * different to Esirkepov paper, here we use C style - */ - DINLINE float_X S0(const Line& line, const float_X gridPoint, const uint32_t d) - { - return ParticleAssign()(gridPoint - line.m_pos0[d]); - } + /** calculate S0 (see paper) + * @param line element with previous and current position of the particle + * @param gridPoint used grid point to evaluate assignment shape + * @param d dimension range {0,1,2} means {x,y,z} + * different to Esirkepov paper, here we use C style + */ + DINLINE float_X S0(const Line& line, const float_X gridPoint, const uint32_t d) + { + return ParticleAssign()(gridPoint - line.m_pos0[d]); + } - /** calculate S1 (see paper) - * @param line element with previous and current position of the particle - * @param gridPoint used grid point to evaluate assignment shape - * @param d dimension range {0,1,2} means {x,y,z} - * different to Esirkepov paper, here we use C style - */ - DINLINE float_X S1(const Line& line, const float_X gridPoint, const uint32_t d) - { - return ParticleAssign()(gridPoint - line.m_pos1[d]); - } + /** calculate S1 (see paper) + * @param line element with previous and current position of the particle + * @param gridPoint used grid point to evaluate assignment shape + * @param d dimension range {0,1,2} means {x,y,z} + * different to Esirkepov paper, here we use C style + */ + DINLINE float_X S1(const Line& line, const float_X gridPoint, const uint32_t d) + { + return ParticleAssign()(gridPoint - line.m_pos1[d]); + } - /** calculate DS (see paper) - * @param line element with previous and current position of the particle - * @param gridPoint used grid point to evaluate assignment shape - * @param d dimension range {0,1,2} means {x,y,z}] - * different to Esirkepov paper, here we use C style - */ - DINLINE float_X DS(const Line& line, const float_X gridPoint, const uint32_t d) - { - return ParticleAssign()(gridPoint - line.m_pos1[d]) - ParticleAssign()(gridPoint - line.m_pos0[d]); - } + /** calculate DS (see paper) + * @param line element with previous and current position of the particle + * @param gridPoint used grid point to evaluate assignment shape + * @param d dimension range {0,1,2} means {x,y,z}] + * different to Esirkepov paper, here we use C style + */ + DINLINE float_X DS(const Line& line, const float_X gridPoint, const uint32_t d) + { + return ParticleAssign()(gridPoint - line.m_pos1[d]) - ParticleAssign()(gridPoint - line.m_pos0[d]); + } - static pmacc::traits::StringProperty getStringProperties() - { - pmacc::traits::StringProperty propList( "name", "Esirkepov" ); - return propList; - } -}; + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "Esirkepov"); + return propList; + } + }; -} //namespace currentSolver + } // namespace currentSolver -} //namespace picongpu +} // namespace picongpu #include "picongpu/fields/currentDeposition/Esirkepov/Esirkepov2D.hpp" diff --git a/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov2D.hpp b/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov2D.hpp index 9391726dab..195f201c4a 100644 --- a/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov2D.hpp +++ b/include/picongpu/fields/currentDeposition/Esirkepov/Esirkepov2D.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2014-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -23,7 +23,6 @@ #include #include #include -#include #include "picongpu/fields/currentDeposition/Esirkepov/Esirkepov.hpp" #include "picongpu/fields/currentDeposition/Esirkepov/Line.hpp" @@ -32,283 +31,233 @@ namespace picongpu { -namespace currentSolver -{ -using namespace pmacc; - -/** - * Implements the current deposition algorithm from T.Zh. Esirkepov - * - * for an arbitrary particle assign function given as a template parameter. - * See available shapes at "intermediateLib/particleShape". - * paper: "Exact charge conservation scheme for Particle-in-Cell simulation - * with an arbitrary form-factor" - */ -template -struct Esirkepov -{ - using ParticleAssign = typename T_ParticleShape::ChargeAssignment; - static constexpr int supp = ParticleAssign::support; - - static constexpr int currentLowerMargin = supp / 2 + 1 - (supp + 1) % 2; - static constexpr int currentUpperMargin = (supp + 1) / 2 + 1; - typedef typename pmacc::math::CT::make_Int::type LowerMargin; - typedef typename pmacc::math::CT::make_Int::type UpperMargin; - - PMACC_CASSERT_MSG( - __Esirkepov2D_supercell_or_number_of_guard_supercells_is_too_small_for_stencil, - pmacc::math::CT::min< - typename pmacc::math::CT::mul< - SuperCellSize, - GuardSize - >::type - >::type::value >= currentLowerMargin && - pmacc::math::CT::min< - typename pmacc::math::CT::mul< - SuperCellSize, - GuardSize - >::type - >::type::value >= currentUpperMargin - ); - - static constexpr int begin = -currentLowerMargin + 1; - static constexpr int end = begin + supp; - - float_X charge; - - template< - typename DataBoxJ, - typename PosType, - typename VelType, - typename ChargeType, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - DataBoxJ dataBoxJ, - const PosType pos, - const VelType velocity, - const ChargeType charge, - const float_X deltaTime - ) + namespace currentSolver { - this->charge = charge; - const float2_X deltaPos = float2_X(velocity.x() * deltaTime / cellSize.x(), - velocity.y() * deltaTime / cellSize.y()); - const PosType oldPos = pos - deltaPos; - Line line(oldPos, pos); - - DataSpace gridShift; - /* Define in which direction the particle leaves the cell. - * It is not important whether the particle move over the positive or negative - * cell border. + /** + * Implements the current deposition algorithm from T.Zh. Esirkepov * - * 0 == stay in cell - * 1 == leave cell + * for an arbitrary particle assign function given as a template parameter. + * See available shapes at "intermediateLib/particleShape". + * paper: "Exact charge conservation scheme for Particle-in-Cell simulation + * with an arbitrary form-factor" */ - DataSpace leaveCell; - - /* calculate the offset for the virtual coordinate system */ - for(int d=0; d + struct Esirkepov { - int iStart; - int iEnd; - constexpr bool isSupportEven = ( supp % 2 == 0 ); - RelayPoint< isSupportEven >()( - iStart, - iEnd, - line.m_pos0[d], - line.m_pos1[d] - ); - gridShift[d] = iStart < iEnd ? iStart : iEnd; // integer min function - /* particle is leaving the cell */ - leaveCell[d] = iStart != iEnd ? 1 : 0; - /* shift the particle position to the virtual coordinate system */ - line.m_pos0[d] -= gridShift[d]; - line.m_pos1[d] -= gridShift[d]; - } - /* shift current field to the virtual coordinate system */ - auto cursorJ = dataBoxJ.shift(gridShift).toCursor(); - - /** - * \brief the following three calls separate the 3D current deposition - * into three independent 1D calls, each for one direction and current component. - * Therefore the coordinate system has to be rotated so that the z-direction - * is always specific. - */ - - using namespace cursor::tools; - cptCurrent1D( - acc, - leaveCell, - cursorJ, - line, - cellSize.x() - ); - cptCurrent1D( - acc, - DataSpace( - leaveCell[1], - leaveCell[0] - ), - twistVectorFieldAxes >(cursorJ), - rotateOrigin < 1, 0 > (line), - cellSize.y() - ); - cptCurrentZ( - acc, - leaveCell, - cursorJ, - line, - velocity.z() - ); - } - - /** - * deposites current in z-direction - * \param leaveCell vector with information if the particle is leaving the cell - * (for each direction, 0 means stays in cell and 1 means leaves cell) - * \param cursorJ cursor pointing at the current density field of the particle's cell - * \param line trajectory of the particle from to last to the current time step - * \param cellEdgeLength length of edge of the cell in z-direction - * - * @{ - */ - template< - typename CursorJ, - typename T_Acc - > - DINLINE void cptCurrent1D( - T_Acc const & acc, - const DataSpace& leaveCell, - CursorJ cursorJ, - const Line& line, - const float_X cellEdgeLength - ) - { - /* skip calculation if the particle is not moving in x direction */ - if(line.m_pos0[0] == line.m_pos1[0]) - return; + using ParticleAssign = typename T_ParticleShape::ChargeAssignment; + static constexpr int supp = ParticleAssign::support; + + static constexpr int currentLowerMargin = supp / 2 + 1 - (supp + 1) % 2; + static constexpr int currentUpperMargin = (supp + 1) / 2 + 1; + typedef typename pmacc::math::CT::make_Int::type LowerMargin; + typedef typename pmacc::math::CT::make_Int::type UpperMargin; + + PMACC_CASSERT_MSG( + __Esirkepov2D_supercell_or_number_of_guard_supercells_is_too_small_for_stencil, + pmacc::math::CT::min::type>::type::value + >= currentLowerMargin + && pmacc::math::CT::min::type>::type::value + >= currentUpperMargin); + + static constexpr int begin = -currentLowerMargin + 1; + static constexpr int end = begin + supp; + + float_X charge; + + template + DINLINE void operator()( + T_Acc const& acc, + DataBoxJ dataBoxJ, + const PosType pos, + const VelType velocity, + const ChargeType charge, + const float_X deltaTime) + { + this->charge = charge; + const float2_X deltaPos + = float2_X(velocity.x() * deltaTime / cellSize.x(), velocity.y() * deltaTime / cellSize.y()); + const PosType oldPos = pos - deltaPos; + Line line(oldPos, pos); + + DataSpace gridShift; + /* Define in which direction the particle leaves the cell. + * It is not important whether the particle move over the positive or negative + * cell border. + * + * 0 == stay in cell + * 1 == leave cell + */ + DataSpace leaveCell; + + /* calculate the offset for the virtual coordinate system */ + for(int d = 0; d < simDim; ++d) + { + int iStart; + int iEnd; + constexpr bool isSupportEven = (supp % 2 == 0); + RelayPoint()(iStart, iEnd, line.m_pos0[d], line.m_pos1[d]); + gridShift[d] = iStart < iEnd ? iStart : iEnd; // integer min function + /* particle is leaving the cell */ + leaveCell[d] = iStart != iEnd ? 1 : 0; + /* shift the particle position to the virtual coordinate system */ + line.m_pos0[d] -= gridShift[d]; + line.m_pos1[d] -= gridShift[d]; + } + /* shift current field to the virtual coordinate system */ + auto cursorJ = dataBoxJ.shift(gridShift).toCursor(); + + /** + * \brief the following three calls separate the 3D current deposition + * into three independent 1D calls, each for one direction and current component. + * Therefore the coordinate system has to be rotated so that the z-direction + * is always specific. + */ - /* We multiply with `cellEdgeLength` due to the fact that the attribute for the - * in-cell particle `position` (and it's change in DELTA_T) is normalize to [0,1) - */ - const float_X currentSurfaceDensity = this->charge * ( float_X( 1.0 ) / float_X( CELL_VOLUME * DELTA_T ) ) * cellEdgeLength; + using namespace cursor::tools; + cptCurrent1D(acc, leaveCell, cursorJ, line, cellSize.x()); + cptCurrent1D( + acc, + DataSpace(leaveCell[1], leaveCell[0]), + twistVectorFieldAxes>(cursorJ), + rotateOrigin<1, 0>(line), + cellSize.y()); + cptCurrentZ(acc, leaveCell, cursorJ, line, velocity.z()); + } - for( int j = begin; j < end + 1; ++j ) - if( j < end + leaveCell[1] ) + /** + * deposites current in z-direction + * \param leaveCell vector with information if the particle is leaving the cell + * (for each direction, 0 means stays in cell and 1 means leaves cell) + * \param cursorJ cursor pointing at the current density field of the particle's cell + * \param line trajectory of the particle from to last to the current time step + * \param cellEdgeLength length of edge of the cell in z-direction + * + * @{ + */ + template + DINLINE void cptCurrent1D( + T_Acc const& acc, + const DataSpace& leaveCell, + CursorJ cursorJ, + const Line& line, + const float_X cellEdgeLength) { - const float_X s0j = S0( line, j, 1 ); - const float_X dsj = S1( line, j, 1 ) - s0j; + /* skip calculation if the particle is not moving in x direction */ + if(line.m_pos0[0] == line.m_pos1[0]) + return; - float_X tmp = -currentSurfaceDensity * - ( - s0j + - float_X( 0.5 ) * dsj - ); - - float_X accumulated_J = float_X(0.0); - /* attention: inner loop has no upper bound `end + 1` because - * the current for the point `end` is always zero, - * therefore we skip the calculation + /* We multiply with `cellEdgeLength` due to the fact that the attribute for the + * in-cell particle `position` (and it's change in DELTA_T) is normalize to [0,1) */ - for( int i = begin; i < end; ++i ) - if( i < end + leaveCell[0] - 1 ) + const float_X currentSurfaceDensity + = this->charge * (float_X(1.0) / float_X(CELL_VOLUME * DELTA_T)) * cellEdgeLength; + + for(int j = begin; j < end + 1; ++j) + if(j < end + leaveCell[1]) { - /* This is the implementation of the FORTRAN W(i,j,k,1)/ C style W(i,j,k,0) version from - * Esirkepov paper. All coordinates are rotated before thus we can - * always use C style W(i,j,k,0). + const float_X s0j = S0(line, j, 1); + const float_X dsj = S1(line, j, 1) - s0j; + + float_X tmp = -currentSurfaceDensity * (s0j + float_X(0.5) * dsj); + + float_X accumulated_J = float_X(0.0); + /* attention: inner loop has no upper bound `end + 1` because + * the current for the point `end` is always zero, + * therefore we skip the calculation */ - const float_X W = DS( line, i, 0 ) * tmp; - accumulated_J += W; - atomicAdd( &( ( *cursorJ( i, j ) ).x() ), accumulated_J, ::alpaka::hierarchy::Threads{} ); + for(int i = begin; i < end; ++i) + if(i < end + leaveCell[0] - 1) + { + /* This is the implementation of the FORTRAN W(i,j,k,1)/ C style W(i,j,k,0) version + * from Esirkepov paper. All coordinates are rotated before thus we can always use C + * style W(i,j,k,0). + */ + const float_X W = DS(line, i, 0) * tmp; + accumulated_J += W; + auto const atomicOp = typename T_Strategy::BlockReductionOp{}; + atomicOp(acc, (*cursorJ(i, j)).x(), accumulated_J); + } } } - } - - template< - typename CursorJ, - typename T_Acc - > - DINLINE void cptCurrentZ( - T_Acc const & acc, - const DataSpace& leaveCell, - CursorJ cursorJ, - const Line& line, - const float_X v_z - ) - { - if( v_z == float_X( 0.0 ) ) - return; - - const float_X currentSurfaceDensityZ = this->charge * ( float_X( 1.0 ) / float_X( CELL_VOLUME ) ) * v_z; - - for( int j = begin; j < end + 1; ++j ) - if( j < end + leaveCell[1] ) + template + DINLINE void cptCurrentZ( + T_Acc const& acc, + const DataSpace& leaveCell, + CursorJ cursorJ, + const Line& line, + const float_X v_z) { - const float_X s0j = S0( line, j, 1 ); - const float_X dsj = S1( line, j, 1 ) - s0j; + if(v_z == float_X(0.0)) + return; - for( int i = begin; i < end + 1; ++i ) - if( i < end + leaveCell[0] ) - { - const float_X s0i = S0( line, i, 0 ); - const float_X dsi = S1( line, i, 0 ) - s0i; - float_X W = s0i * S0( line, j, 1 ) + - float_X( 0.5 ) * ( dsi * s0j + s0i * dsj ) + - ( float_X( 1.0 ) / float_X( 3.0 ) ) * dsi * dsj; + const float_X currentSurfaceDensityZ = this->charge * (float_X(1.0) / float_X(CELL_VOLUME)) * v_z; - const float_X j_z = W * currentSurfaceDensityZ; - atomicAdd( &( ( *cursorJ( i, j ) ).z() ), j_z, ::alpaka::hierarchy::Threads{} ); + for(int j = begin; j < end + 1; ++j) + if(j < end + leaveCell[1]) + { + const float_X s0j = S0(line, j, 1); + const float_X dsj = S1(line, j, 1) - s0j; + + for(int i = begin; i < end + 1; ++i) + if(i < end + leaveCell[0]) + { + const float_X s0i = S0(line, i, 0); + const float_X dsi = S1(line, i, 0) - s0i; + float_X W = s0i * S0(line, j, 1) + float_X(0.5) * (dsi * s0j + s0i * dsj) + + (float_X(1.0) / float_X(3.0)) * dsi * dsj; + + const float_X j_z = W * currentSurfaceDensityZ; + auto const atomicOp = typename T_Strategy::BlockReductionOp{}; + atomicOp(acc, (*cursorJ(i, j)).z(), j_z); + } } } - } - - /** - * @} - */ - /** calculate S0 (see paper) - * @param line element with previous and current position of the particle - * @param gridPoint used grid point to evaluate assignment shape - * @param d dimension range {0,1} means {x,y} - * different to Esirkepov paper, here we use C style - */ - DINLINE float_X S0(const Line& line, const float_X gridPoint, const uint32_t d) - { - return ParticleAssign()(gridPoint - line.m_pos0[d]); - } + /** + * @} + */ + + /** calculate S0 (see paper) + * @param line element with previous and current position of the particle + * @param gridPoint used grid point to evaluate assignment shape + * @param d dimension range {0,1} means {x,y} + * different to Esirkepov paper, here we use C style + */ + DINLINE float_X S0(const Line& line, const float_X gridPoint, const uint32_t d) + { + return ParticleAssign()(gridPoint - line.m_pos0[d]); + } - /** calculate S1 (see paper) - * @param line element with previous and current position of the particle - * @param gridPoint used grid point to evaluate assignment shape - * @param d dimension range {0,1,2} means {x,y,z} - * different to Esirkepov paper, here we use C style - */ - DINLINE float_X S1(const Line& line, const float_X gridPoint, const uint32_t d) - { - return ParticleAssign()(gridPoint - line.m_pos1[d]); - } + /** calculate S1 (see paper) + * @param line element with previous and current position of the particle + * @param gridPoint used grid point to evaluate assignment shape + * @param d dimension range {0,1,2} means {x,y,z} + * different to Esirkepov paper, here we use C style + */ + DINLINE float_X S1(const Line& line, const float_X gridPoint, const uint32_t d) + { + return ParticleAssign()(gridPoint - line.m_pos1[d]); + } - /** calculate DS (see paper) - * @param line element with previous and current position of the particle - * @param gridPoint used grid point to evaluate assignment shape - * @param d dimension range {0,1} means {x,y} - * different to Esirkepov paper, here we use C style - */ - DINLINE float_X DS(const Line& line, const float_X gridPoint, const uint32_t d) - { - return ParticleAssign()(gridPoint - line.m_pos1[d]) - ParticleAssign()(gridPoint - line.m_pos0[d]); - } + /** calculate DS (see paper) + * @param line element with previous and current position of the particle + * @param gridPoint used grid point to evaluate assignment shape + * @param d dimension range {0,1} means {x,y} + * different to Esirkepov paper, here we use C style + */ + DINLINE float_X DS(const Line& line, const float_X gridPoint, const uint32_t d) + { + return ParticleAssign()(gridPoint - line.m_pos1[d]) - ParticleAssign()(gridPoint - line.m_pos0[d]); + } - static pmacc::traits::StringProperty getStringProperties() - { - pmacc::traits::StringProperty propList( "name", "Esirkepov" ); - return propList; - } -}; + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "Esirkepov"); + return propList; + } + }; -} //namespace currentSolver + } // namespace currentSolver -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/Esirkepov/EsirkepovNative.hpp b/include/picongpu/fields/currentDeposition/Esirkepov/EsirkepovNative.hpp index 86465f6435..fc9639679e 100644 --- a/include/picongpu/fields/currentDeposition/Esirkepov/EsirkepovNative.hpp +++ b/include/picongpu/fields/currentDeposition/Esirkepov/EsirkepovNative.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -25,191 +25,174 @@ #include #include #include -#include #include "picongpu/fields/currentDeposition/Esirkepov/Line.hpp" namespace picongpu { -namespace currentSolver -{ -using namespace pmacc; - -/** - * Implements the current deposition algorithm from T.Zh. Esirkepov - * - * for an arbitrary particle assign function given as a template parameter. - * See available shapes at "intermediateLib/particleShape". - * paper: "Exact charge conservation scheme for Particle-in-Cell simulation - * with an arbitrary form-factor" - */ -template -struct EsirkepovNative -{ - using ParticleAssign = typename T_ParticleShape::ChargeAssignment; - static constexpr int supp = ParticleAssign::support; - - static constexpr int currentLowerMargin = supp / 2 + 1; - static constexpr int currentUpperMargin = (supp + 1) / 2 + 1; - typedef pmacc::math::CT::Int LowerMargin; - typedef pmacc::math::CT::Int UpperMargin; - - PMACC_CASSERT_MSG( - __EsirkepovNative_supercell_or_number_of_guard_supercells_is_too_small_for_stencil, - pmacc::math::CT::min< - typename pmacc::math::CT::mul< - SuperCellSize, - GuardSize - >::type - >::type::value >= currentLowerMargin && - pmacc::math::CT::min< - typename pmacc::math::CT::mul< - SuperCellSize, - GuardSize - >::type - >::type::value >= currentUpperMargin - ); - - /* iterate over all grid points */ - static constexpr int begin = -currentLowerMargin; - static constexpr int end = currentUpperMargin + 1; - - float_X charge; - - /* At the moment Esirkepov only supports Yee cells where W is defined at origin (0,0,0) - * - * \todo: please fix me that we can use CenteredCell - */ - template< - typename DataBoxJ, - typename PosType, - typename VelType, - typename ChargeType, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - DataBoxJ dataBoxJ, - const PosType pos, - const VelType velocity, - const ChargeType charge, const float_X deltaTime - ) + namespace currentSolver { - this->charge = charge; - const float3_X deltaPos = float3_X(velocity.x() * deltaTime / cellSize.x(), - velocity.y() * deltaTime / cellSize.y(), - velocity.z() * deltaTime / cellSize.z()); - const PosType oldPos = pos - deltaPos; - Line line(oldPos, pos); - auto cursorJ = dataBoxJ.toCursor(); - /** - * \brief the following three calls separate the 3D current deposition - * into three independent 1D calls, each for one direction and current component. - * Therefore the coordinate system has to be rotated so that the z-direction - * is always specific. - */ - - using namespace cursor::tools; - cptCurrent1D(acc, twistVectorFieldAxes >(cursorJ), rotateOrigin < 1, 2, 0 > (line), cellSize.x()); - cptCurrent1D(acc, twistVectorFieldAxes >(cursorJ), rotateOrigin < 2, 0, 1 > (line), cellSize.y()); - cptCurrent1D(acc, cursorJ, line, cellSize.z()); - } - - /** - * deposites current in z-direction - * \param cursorJ cursor pointing at the current density field of the particle's cell - * \param line trajectory of the particle from to last to the current time step - * \param cellEdgeLength length of edge of the cell in z-direction - */ - template< - typename CursorJ, - typename T_Acc - > - DINLINE void cptCurrent1D( - T_Acc const & acc, - CursorJ cursorJ, - const Line& line, - const float_X cellEdgeLength - ) - { - /* pick every cell in the xy-plane that is overlapped by particle's - * form factor and deposit the current for the cells above and beneath - * that cell and for the cell itself. + * Implements the current deposition algorithm from T.Zh. Esirkepov + * + * for an arbitrary particle assign function given as a template parameter. + * See available shapes at "intermediateLib/particleShape". + * paper: "Exact charge conservation scheme for Particle-in-Cell simulation + * with an arbitrary form-factor" */ - for (int i = begin; i < end; ++i) + template + struct EsirkepovNative { - for (int j = begin; j < end; ++j) + using ParticleAssign = typename T_ParticleShape::ChargeAssignment; + static constexpr int supp = ParticleAssign::support; + + static constexpr int currentLowerMargin = supp / 2 + 1; + static constexpr int currentUpperMargin = (supp + 1) / 2 + 1; + typedef pmacc::math::CT::Int LowerMargin; + typedef pmacc::math::CT::Int UpperMargin; + + PMACC_CASSERT_MSG( + __EsirkepovNative_supercell_or_number_of_guard_supercells_is_too_small_for_stencil, + pmacc::math::CT::min::type>::type::value + >= currentLowerMargin + && pmacc::math::CT::min::type>::type::value + >= currentUpperMargin); + + /* iterate over all grid points */ + static constexpr int begin = -currentLowerMargin; + static constexpr int end = currentUpperMargin + 1; + + float_X charge; + + /* At the moment Esirkepov only supports Yee cells where W is defined at origin (0,0,0) + * + * \todo: please fix me that we can use CenteredCell + */ + template + DINLINE void operator()( + T_Acc const& acc, + DataBoxJ dataBoxJ, + const PosType pos, + const VelType velocity, + const ChargeType charge, + const float_X deltaTime) { - float_X tmp = - S0(line, i, 1) * S0(line, j, 2) + - float_X(0.5) * DS(line, i, 1) * S0(line, j, 2) + - float_X(0.5) * S0(line, i, 1) * DS(line, j, 2) + - (float_X(1.0) / float_X(3.0)) * DS(line, i, 1) * DS(line, j, 2); - - float_X accumulated_J = float_X(0.0); - for (int k = begin; k < end; ++k) + this->charge = charge; + const float3_X deltaPos = velocity * deltaTime / cellSize; + const PosType oldPos = pos - deltaPos; + const Line line(oldPos, pos); + auto cursorJ = dataBoxJ.toCursor(); + + /** + * \brief the following three calls separate the 3D current deposition + * into three independent 1D calls, each for one direction and current component. + * Therefore the coordinate system has to be rotated so that the z-direction + * is always specific. + */ + + using namespace cursor::tools; + cptCurrent1D( + acc, + twistVectorFieldAxes>(cursorJ), + rotateOrigin<1, 2, 0>(line), + cellSize.x()); + cptCurrent1D( + acc, + twistVectorFieldAxes>(cursorJ), + rotateOrigin<2, 0, 1>(line), + cellSize.y()); + cptCurrent1D(acc, cursorJ, line, cellSize.z()); + } + + /** + * deposites current in z-direction + * \param cursorJ cursor pointing at the current density field of the particle's cell + * \param line trajectory of the particle from to last to the current time step + * \param cellEdgeLength length of edge of the cell in z-direction + */ + template + DINLINE void cptCurrent1D( + T_Acc const& acc, + CursorJ cursorJ, + const Line& line, + const float_X cellEdgeLength) + { + /* pick every cell in the xy-plane that is overlapped by particle's + * form factor and deposit the current for the cells above and beneath + * that cell and for the cell itself. + */ + for(int i = begin; i < end; ++i) { - float_X W = DS(line, k, 3) * tmp; - /* We multiply with `cellEdgeLength` due to the fact that the attribute for the - * in-cell particle `position` (and it's change in DELTA_T) is normalize to [0,1) */ - accumulated_J += -this->charge * (float_X(1.0) / float_X(CELL_VOLUME * DELTA_T)) * W * cellEdgeLength; - atomicAdd(&((*cursorJ(i, j, k)).z()), accumulated_J, ::alpaka::hierarchy::Threads{}); + for(int j = begin; j < end; ++j) + { + float_X tmp = S0(line, i, 1) * S0(line, j, 2) + float_X(0.5) * DS(line, i, 1) * S0(line, j, 2) + + float_X(0.5) * S0(line, i, 1) * DS(line, j, 2) + + (float_X(1.0) / float_X(3.0)) * DS(line, i, 1) * DS(line, j, 2); + + float_X accumulated_J = float_X(0.0); + for(int k = begin; k < end; ++k) + { + const float_X W = DS(line, k, 3) * tmp; + /* We multiply with `cellEdgeLength` due to the fact that the attribute for the + * in-cell particle `position` (and it's change in DELTA_T) is normalize to [0,1) */ + accumulated_J += -this->charge * (float_X(1.0) / float_X(CELL_VOLUME * DELTA_T)) * W + * cellEdgeLength; + auto const atomicOp = typename T_Strategy::BlockReductionOp{}; + atomicOp(acc, (*cursorJ(i, j, k)).z(), accumulated_J); + } + } } } - } - - } - /** calculate S0 (see paper) - * @param line element with previous and current position of the particle - * @param gridPoint used grid point to evaluate assignment shape - * @param d dimension range {1,2,3} means {x,y,z} - * same like in Esirkepov paper (FORTAN style) - */ - DINLINE float_X S0(const Line& line, const float_X gridPoint, const float_X d) - { - return ParticleAssign()(gridPoint - line.m_pos0[d - 1]); - } - - /** calculate DS (see paper) - * @param line element with previous and current position of the particle - * @param gridPoint used grid point to evaluate assignment shape - * @param d dimension range {1,2,3} means {x,y,z} - * same like in Esirkepov paper (FORTAN style) - */ - DINLINE float_X DS(const Line& line, const float_X gridPoint, const float_X d) - { - return ParticleAssign()(gridPoint - line.m_pos1[d - 1]) - ParticleAssign()(gridPoint - line.m_pos0[d - 1]); - } + /** calculate S0 (see paper) + * @param line element with previous and current position of the particle + * @param gridPoint used grid point to evaluate assignment shape + * @param d dimension range {1,2,3} means {x,y,z} + * same like in Esirkepov paper (FORTAN style) + */ + DINLINE float_X S0(const Line& line, const float_X gridPoint, const float_X d) + { + return ParticleAssign()(gridPoint - line.m_pos0[d - 1]); + } - static pmacc::traits::StringProperty getStringProperties() - { - pmacc::traits::StringProperty propList( "name", "Esirkepov" ); - propList["param"] = "native implementation"; - return propList; - } -}; + /** calculate DS (see paper) + * @param line element with previous and current position of the particle + * @param gridPoint used grid point to evaluate assignment shape + * @param d dimension range {1,2,3} means {x,y,z} + * same like in Esirkepov paper (FORTAN style) + */ + DINLINE float_X DS(const Line& line, const float_X gridPoint, const float_X d) + { + return ParticleAssign()(gridPoint - line.m_pos1[d - 1]) + - ParticleAssign()(gridPoint - line.m_pos0[d - 1]); + } -} //namespace currentSolver + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "Esirkepov"); + propList["param"] = "native implementation"; + return propList; + } + }; -namespace traits -{ + } // namespace currentSolver -/*Get margin of a solver - * class must define a LowerMargin and UpperMargin - */ -template -struct GetMargin > -{ -private: - typedef picongpu::currentSolver::EsirkepovNative Solver; -public: - typedef typename Solver::LowerMargin LowerMargin; - typedef typename Solver::UpperMargin UpperMargin; -}; + namespace traits + { + /*Get margin of a solver + * class must define a LowerMargin and UpperMargin + */ + template + struct GetMargin> + { + private: + using Solver = picongpu::currentSolver::EsirkepovNative; -} //namespace traits + public: + using LowerMargin = typename Solver::LowerMargin; + using UpperMargin = typename Solver::UpperMargin; + }; -} //namespace picongpu + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/Esirkepov/Line.hpp b/include/picongpu/fields/currentDeposition/Esirkepov/Line.hpp index 60b2ad8aa0..4fd8b59f4f 100644 --- a/include/picongpu/fields/currentDeposition/Esirkepov/Line.hpp +++ b/include/picongpu/fields/currentDeposition/Esirkepov/Line.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once #include "picongpu/simulation_defines.hpp" @@ -26,78 +25,79 @@ namespace picongpu { -namespace currentSolver -{ -using namespace pmacc; - -template -struct Line -{ - using type = T_Type; - - type m_pos0; - type m_pos1; - - DINLINE Line() + namespace currentSolver { - } - - DINLINE Line(const type& pos0, const type & pos1) : m_pos0(pos0), m_pos1(pos1) - { - } - - DINLINE Line& operator-=(const type & rhs) - { - m_pos0 -= rhs; - m_pos1 -= rhs; - return *this; - } -}; - -template -DINLINE Line operator-(const Line& lhs, const T_Type& rhs) -{ - return Line(lhs.m_pos0 - rhs, lhs.m_pos1 - rhs); -} - -template -DINLINE Line operator-(const T_Type& lhs, const Line& rhs) -{ - return Line(lhs - rhs.m_pos0, lhs - rhs.m_pos1); -} - -///auxillary function to rotate a vector - -template -DINLINE float3_X rotateOrigin(const float3_X& vec) -{ - return float3_X(vec[newXAxis], vec[newYAxis], vec[newZAxis]); -} - -template -DINLINE float2_X rotateOrigin(const float2_X& vec) -{ - return float2_X(vec[newXAxis], vec[newYAxis]); -} -///auxillary function to rotate a line - -template -DINLINE Line rotateOrigin(const Line& line) -{ - Line result(rotateOrigin (line.m_pos0), - rotateOrigin (line.m_pos1)); - return result; -} - -template -DINLINE Line rotateOrigin(const Line& line) -{ - Line result(rotateOrigin (line.m_pos0), - rotateOrigin (line.m_pos1)); - return result; -} - -} //namespace currentSolver - -} //namespace picongpu - + using namespace pmacc; + + template + struct Line + { + using type = T_Type; + + type m_pos0; + type m_pos1; + + DINLINE Line() + { + } + + DINLINE Line(const type& pos0, const type& pos1) : m_pos0(pos0), m_pos1(pos1) + { + } + + DINLINE Line& operator-=(const type& rhs) + { + m_pos0 -= rhs; + m_pos1 -= rhs; + return *this; + } + }; + + template + DINLINE Line operator-(const Line& lhs, const T_Type& rhs) + { + return Line(lhs.m_pos0 - rhs, lhs.m_pos1 - rhs); + } + + template + DINLINE Line operator-(const T_Type& lhs, const Line& rhs) + { + return Line(lhs - rhs.m_pos0, lhs - rhs.m_pos1); + } + + /// auxillary function to rotate a vector + + template + DINLINE float3_X rotateOrigin(const float3_X& vec) + { + return float3_X(vec[newXAxis], vec[newYAxis], vec[newZAxis]); + } + + template + DINLINE float2_X rotateOrigin(const float2_X& vec) + { + return float2_X(vec[newXAxis], vec[newYAxis]); + } + /// auxillary function to rotate a line + + template + DINLINE Line rotateOrigin(const Line& line) + { + Line result( + rotateOrigin(line.m_pos0), + rotateOrigin(line.m_pos1)); + return result; + } + + template + DINLINE Line rotateOrigin(const Line& line) + { + Line result( + rotateOrigin(line.m_pos0), + rotateOrigin(line.m_pos1)); + return result; + } + + } // namespace currentSolver + +} // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/RelayPoint.hpp b/include/picongpu/fields/currentDeposition/RelayPoint.hpp index 285f7d7671..731bc68c0f 100644 --- a/include/picongpu/fields/currentDeposition/RelayPoint.hpp +++ b/include/picongpu/fields/currentDeposition/RelayPoint.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera * * This file is part of PIConGPU. * @@ -23,68 +23,56 @@ namespace picongpu { -namespace currentSolver -{ - template< bool isEven > - struct RelayPoint + namespace currentSolver { - /** calculate virtual point were we split our particle trajectory - * - * The relay point calculation differs from the ZigZag paper version in the point - * that the trajectory of a particle which does not leave the cell is not split. - * The relay point for a particle which does not leave the cell is set to the - * current position `x_2` - * - * If `i_1 == i_2` than the trajectory is not split. - * - * This function assumes that the shape in later steps is always evaluated - * at grid integral points. - * - * @param i_1[out] offset to shift the coordinate system for the first - * particle at position x_1 - * @param i_2[out] offset to shift the coordinate system for the second - * particle at position x_2 - * @param x_1 begin position of the particle trajectory - * @param x_2 end position of the particle trajectory - * @return relay point for particle trajectory - */ - DINLINE float_X - operator( )( - int& i_1, - int& i_2, - const float_X x_1, - const float_X x_2 - ) const + template + struct RelayPoint { - using namespace pmacc; - i_1 = math::floor( x_1 ); - i_2 = math::floor( x_2 ); + /** calculate virtual point were we split our particle trajectory + * + * The relay point calculation differs from the ZigZag paper version in the point + * that the trajectory of a particle which does not leave the cell is not split. + * The relay point for a particle which does not leave the cell is set to the + * current position `x_2` + * + * If `i_1 == i_2` than the trajectory is not split. + * + * This function assumes that the shape in later steps is always evaluated + * at grid integral points. + * + * @param i_1[out] offset to shift the coordinate system for the first + * particle at position x_1 + * @param i_2[out] offset to shift the coordinate system for the second + * particle at position x_2 + * @param x_1 begin position of the particle trajectory + * @param x_2 end position of the particle trajectory + * @return relay point for particle trajectory + */ + DINLINE float_X operator()(int& i_1, int& i_2, const float_X x_1, const float_X x_2) const + { + using namespace pmacc; + i_1 = math::floor(x_1); + i_2 = math::floor(x_2); - return i_1 == i_2 ? x_2 : math::max( i_1, i_2 ); - } - }; + return i_1 == i_2 ? x_2 : math::max(i_1, i_2); + } + }; - template<> - struct RelayPoint< false > - { - /** calculate virtual point were we split our particle trajectory - * - * @see RelayPoint< >::operator( ) description - */ - DINLINE float_X - operator( )( - int& i_1, - int& i_2, - const float_X x_1, - const float_X x_2 - ) const + template<> + struct RelayPoint { - i_1 = math::float2int_rd( x_1 + float_X( 0.5 ) ); - i_2 = math::float2int_rd( x_2 + float_X( 0.5 ) ); + /** calculate virtual point were we split our particle trajectory + * + * @see RelayPoint< >::operator( ) description + */ + DINLINE float_X operator()(int& i_1, int& i_2, const float_X x_1, const float_X x_2) const + { + i_1 = pmacc::math::float2int_rd(x_1 + float_X(0.5)); + i_2 = pmacc::math::float2int_rd(x_2 + float_X(0.5)); - return i_1 == i_2 ? x_2 : float_X( i_1 + i_2 )/float_X( 2.0 ); - } - }; + return i_1 == i_2 ? x_2 : float_X(i_1 + i_2) / float_X(2.0); + } + }; -} // namespace currentSolver + } // namespace currentSolver } // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/Solver.def b/include/picongpu/fields/currentDeposition/Solver.def index 43c90bdf7f..354c6cbad9 100644 --- a/include/picongpu/fields/currentDeposition/Solver.def +++ b/include/picongpu/fields/currentDeposition/Solver.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -18,9 +18,10 @@ */ +#include "picongpu/fields/currentDeposition/Strategy.def" #include "picongpu/fields/currentDeposition/Esirkepov/Esirkepov.def" #include "picongpu/fields/currentDeposition/EmZ/EmZ.def" -#if(SIMDIM==DIM3) -#include "picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.def" +#if(SIMDIM == DIM3) +# include "picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.def" #endif diff --git a/include/picongpu/fields/currentDeposition/Solver.hpp b/include/picongpu/fields/currentDeposition/Solver.hpp index 3f4bbc6908..064a5883bd 100644 --- a/include/picongpu/fields/currentDeposition/Solver.hpp +++ b/include/picongpu/fields/currentDeposition/Solver.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,6 +22,6 @@ #include "picongpu/fields/currentDeposition/Esirkepov/EsirkepovNative.hpp" #include "picongpu/fields/currentDeposition/EmZ/EmZ.hpp" -#if(SIMDIM==DIM3) -#include "picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.hpp" +#if(SIMDIM == DIM3) +# include "picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.hpp" #endif diff --git a/include/picongpu/fields/currentDeposition/Strategy.def b/include/picongpu/fields/currentDeposition/Strategy.def new file mode 100644 index 0000000000..629276bc51 --- /dev/null +++ b/include/picongpu/fields/currentDeposition/Strategy.def @@ -0,0 +1,216 @@ +/* Copyright 2020-2021 Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include +#include +#include + + +namespace picongpu +{ + namespace currentSolver + { + namespace strategy + { + namespace detail + { + /** Validate and adjust worker multiplier + * + * @param multiplicator Number used as multiplier to oversubscribe the number of threads for the + * compute current task/kernel. + * @return valid multiplier + */ + constexpr int validateAndAdjustWorkerMultiplier(int const multiplicator) + { +#if BOOST_COMP_HIP && PIC_COMPUTE_CURRENT_THREAD_LIMITER + // HIP-clang creates wrong results if more threads than particles in a frame will be used + return 1; +#else + return multiplicator >= 1 ? multiplicator : 1; +#endif + } + } // namespace detail + + /** Work on strided supercell domains with local caching strategy + * + * The current for each particle will be reduced with atomic operations into a supercell + * local cache. The cache will be flushed to the global memory without atomics. + * The device local domain of fieldJ will be decomposed with a checker board. + * + * Suggestion: Use this strategy if atomic operations to global memory are slow. + * To utilize the device fully you should have enough supercells + * - 2D: minimum multiprocessor count * 9 * 4 + * - 3D: minimum multiprocessor count * 27 * 4 + * + * @{ + */ + struct StridedCachedSupercells + { + static constexpr bool useBlockCache = true; + static constexpr bool stridedMapping = true; + using BlockReductionOp = nvidia::functors::Atomic<::alpaka::AtomicAdd, ::alpaka::hierarchy::Threads>; + using GridReductionOp = nvidia::functors::Add; + static constexpr int workerMultiplier = 1; + }; + + /** @tparam T_workerMultiplier Oversubscribe the number of workers used to compute the current by the given + * multiplier. Can be used to optimize the device occupancy. + */ + template + struct StridedCachedSupercellsScaled + { + static constexpr bool useBlockCache = true; + static constexpr bool stridedMapping = true; + using BlockReductionOp = nvidia::functors::Atomic<::alpaka::AtomicAdd, ::alpaka::hierarchy::Threads>; + using GridReductionOp = nvidia::functors::Add; + static constexpr int workerMultiplier = detail::validateAndAdjustWorkerMultiplier(T_workerMultiplier); + }; + + /** @} */ + + /** Local caching strategy + * + * The current for each particle will be reduced with atomic operations into a supercell + * local cache. The cache will be flushed with atomic operations to the global memory. + * + * Suggestion: Use this strategy if block local and global atomics are fast. + * + * @{ + */ + struct CachedSupercells + { + static constexpr bool useBlockCache = true; + static constexpr bool stridedMapping = false; + using BlockReductionOp = nvidia::functors::Atomic<::alpaka::AtomicAdd, ::alpaka::hierarchy::Threads>; + using GridReductionOp = nvidia::functors::Atomic<::alpaka::AtomicAdd, ::alpaka::hierarchy::Blocks>; + static constexpr int workerMultiplier = 1; + }; + + /** @tparam T_workerMultiplier Oversubscribe the number of workers used to compute the current by the given + * multiplier. Can be used to optimize the device occupancy. + */ + template + struct CachedSupercellsScaled + { + static constexpr bool useBlockCache = true; + static constexpr bool stridedMapping = false; + using BlockReductionOp = nvidia::functors::Atomic<::alpaka::AtomicAdd, ::alpaka::hierarchy::Threads>; + using GridReductionOp = nvidia::functors::Atomic<::alpaka::AtomicAdd, ::alpaka::hierarchy::Blocks>; + static constexpr int workerMultiplier = detail::validateAndAdjustWorkerMultiplier(T_workerMultiplier); + }; + + /** @} */ + + /** Non cached strategy + * + * The current for each particle will be reduced with atomic operations directly + * to the global memory. + * + * Suggestion: Use this strategy if global atomics are fast and random memory access + * to a large range in memory is not a bottle neck. + * + * @{ + */ + struct NonCachedSupercells + { + static constexpr bool useBlockCache = false; + static constexpr bool stridedMapping = false; + using BlockReductionOp = nvidia::functors::Atomic<::alpaka::AtomicAdd, ::alpaka::hierarchy::Blocks>; + // dummy which produces a compile time error if used + using GridReductionOp = void; + static constexpr int workerMultiplier = 1; + }; + + /** @tparam T_workerMultiplier Oversubscribe the number of workers used to compute the current by the given + * multiplier. Can be used to optimize the device occupancy. + */ + template + struct NonCachedSupercellsScaled + { + static constexpr bool useBlockCache = false; + static constexpr bool stridedMapping = false; + using BlockReductionOp = nvidia::functors::Atomic<::alpaka::AtomicAdd, ::alpaka::hierarchy::Blocks>; + // dummy which produces a compile time error if used + using GridReductionOp = void; + static constexpr int workerMultiplier = detail::validateAndAdjustWorkerMultiplier(T_workerMultiplier); + }; + + /** @} */ + + } // namespace strategy + + namespace traits + { + /** Get current deposition strategy from a solver + * + * @tparam T_Solver type to derive the strategy + * @treturn ::type strategy description + */ + template + struct GetStrategy; + + /** Get current deposition strategy from a solver + * + * @see GetStrategy + */ + template + using GetStrategy_t = typename GetStrategy::type; + + /** Default strategy for the current deposition + * + * Default will be selected based on the cupla accelerator. + * + * @tparam T_Acc the accelerator type + */ + template + struct GetDefaultStrategy + { + using type = strategy::StridedCachedSupercells; + }; + + /** Default strategy for the current deposition + * + * @see GetDefaultStrategy + */ + template + using GetDefaultStrategy_t = typename GetDefaultStrategy::type; + +#if(ALPAKA_ACC_GPU_CUDA_ENABLED == 1) + template + struct GetDefaultStrategy> + { + // GPU Utilization is higher compared to `StridedCachedSupercells` + using type = strategy::CachedSupercells; + }; +#endif + +#if(ALPAKA_ACC_GPU_HIP_ENABLED == 1) + template + struct GetDefaultStrategy> + { + // GPU Utilization is higher compared to `StridedCachedSupercells` + using type = strategy::CachedSupercells; + }; +#endif + + } // namespace traits + } // namespace currentSolver +} // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.def b/include/picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.def index cda17a43af..cacdafa945 100644 --- a/include/picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.def +++ b/include/picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -17,23 +17,39 @@ * If not, see . */ - #pragma once #include #include "picongpu/simulation_defines.hpp" #include "picongpu/particles/shapes/CIC.hpp" +#include "picongpu/fields/currentDeposition/Strategy.def" namespace picongpu { -namespace currentSolver -{ -using namespace pmacc; - -template -struct VillaBune; - -} //namespace currentSolver - -} //namespace picongpu + namespace currentSolver + { + /** Current deposition algorithm from J. Villasenor and O. Buneman + * + * paper: J. Villasenor and O. Buneman. Rigorous charge conservation for local + * electromagnetic field solvers. Computer Physics Communications, 69:306, 1992. + * https://doi.org/10.1016/0010-4655(92)90169-Y + * + * @tparam T_ParticleShape the particle shape for the species, supports only [picongpu::particles::shapes::CIC] + * @tparam T_Strategy Used strategy to reduce the scattered data [currentSolver::strategy] + */ + template< + typename T_ParticleShape = picongpu::particles::shapes::CIC, + typename T_Strategy = traits::GetDefaultStrategy_t<>> + struct VillaBune; + + namespace traits + { + template + struct GetStrategy> + { + using type = T_Strategy; + }; + } // namespace traits + } // namespace currentSolver +} // namespace picongpu diff --git a/include/picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.hpp b/include/picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.hpp index f0bb7cbe97..a9d33928bc 100644 --- a/include/picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.hpp +++ b/include/picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -20,274 +20,282 @@ #pragma once #include +#include "picongpu/fields/currentDeposition/VillaBune/CurrentVillaBune.def" #include "picongpu/simulation_defines.hpp" +#include "picongpu/particles/shapes/CIC.hpp" #include #include -#include -#include "picongpu/particles/shapes/CIC.hpp" +#include namespace picongpu { -namespace currentSolver -{ -using namespace pmacc; - -template -struct VillaBune -{ - template - DINLINE void operator()(const T_Acc& acc, - BoxJ& boxJ_par, /*box which is shifted to particles cell*/ - const PosType pos, - const VelType velocity, - const ChargeType charge, const float_X deltaTime) - { - /* VillaBune: field to particle interpolation _requires_ the CIC shape */ - PMACC_CASSERT_MSG_TYPE(currentSolverVillaBune_requires_shapeCIC_in_particleConfig, - T_ParticleShape, - T_ParticleShape::support == 2); - - // normalize deltaPos to innerCell units [0.; 1.) - // that means: dx_real = v.x() * dt - // dx_inCell = v.x() * dt / cellSize.x() - const float3_X deltaPos( - velocity.x() * deltaTime / cellSize.x(), - velocity.y() * deltaTime / cellSize.y(), - velocity.z() * deltaTime / cellSize.z()); - - const PosType oldPos = (PosType) (precisionCast (pos) - deltaPos); - - addCurrentSplitX(acc, oldPos, pos, charge, boxJ_par, deltaTime); - } - - static pmacc::traits::StringProperty getStringProperties() - { - pmacc::traits::StringProperty propList( "name", "VillaBune" ); - return propList; - } - -private: - //Splits the [oldPos,newPos] beam into two beams at the x-boundary of the cell - //if necessary - - template< - typename Buffer, - typename T_Acc - > - DINLINE void addCurrentSplitX( - T_Acc const & acc, - const float3_X& oldPos, - const float3_X& newPos, - const float_X charge, - Buffer & mem, - const float_X deltaTime - ) + namespace currentSolver { - - if (math::float2int_rd(oldPos.x()) != math::float2int_rd(newPos.x())) + template + struct VillaBune { - const float3_X interPos = intersectXPlane(oldPos, newPos, - math::max(math::float2int_rd(oldPos.x()), math::float2int_rd(newPos.x()))); - addCurrentSplitY(acc, oldPos, interPos, charge, mem, deltaTime); - addCurrentSplitY(acc, interPos, newPos, charge, mem, deltaTime); - return; - } - addCurrentSplitY(acc, oldPos, newPos, charge, mem, deltaTime); - } - - template< - typename Buffer, - typename T_Acc - > - DINLINE void addCurrentToSingleCell( - T_Acc const & acc, - float3_X meanPos, - const float3_X& deltaPos, - const float_X charge, - Buffer & memIn, - const float_X deltaTime - ) - { - //shift to the cell meanPos belongs to - //because meanPos may exceed the range [0,1) - DataSpace off(math::float2int_rd(meanPos.x()), - math::float2int_rd(meanPos.y()), - math::float2int_rd(meanPos.z())); - - auto mem = memIn.shift(off); - - //fit meanPos into the range [0,1) - meanPos.x() -= math::floor(meanPos.x()); - meanPos.y() -= math::floor(meanPos.y()); - meanPos.z() -= math::floor(meanPos.z()); - - //for the formulas used in here see Villasenor/Buneman paper page 314 - const float_X tmp = deltaPos.x() * deltaPos.y() * deltaPos.z() * (float_X(1.0) / float_X(12.0)); - - // j = rho * v - // = rho * dr / dt - //const float_X rho = charge * (1.0 / (CELL_WIDTH * CELL_HEIGHT * CELL_DEPTH)); - //const float_X rho_dt = rho * (1.0 / deltaTime); - - // now carefully: - // deltaPos is in "inCell" coordinates, that means: - // deltaPos.x() = deltaPos_real.x() / cellSize.x() - // to calculate the current density in realUnits it is - // j.x() = rho * deltaPos_real.x() / dt - // = rho * deltaPos.x() * cellSize.x() / dt - // So put adding the constant directly to rho results in: - // const float_X rho_dtX = rho * CELL_WIDTH; - // const float_X rho_dtY = rho * CELL_HEIGHT; - // const float_X rho_dtZ = rho * CELL_DEPTH; - - // This is exactly the same like: - // j = Q / A / t - // j.x() = Q.x() * (1.0 / (CELL_HEIGHT * CELL_DEPTH * deltaTime)); - // j.y() = Q.y() * (1.0 / (CELL_WIDTH * CELL_DEPTH * deltaTime)); - // j.z() = Q.z() * (1.0 / (CELL_WIDTH * CELL_HEIGHT * deltaTime)); - // with the difference, that (imagine a moving quader) - // Q.x() = charge * deltaPos_real.x() / cellsize.x() - // = charge * deltaPos.x() / 1.0 - // - const float_X rho_dtX = charge * (float_X(1.0) / (CELL_HEIGHT * CELL_DEPTH * deltaTime)); - const float_X rho_dtY = charge * (float_X(1.0) / (CELL_WIDTH * CELL_DEPTH * deltaTime)); - const float_X rho_dtZ = charge * (float_X(1.0) / (CELL_WIDTH * CELL_HEIGHT * deltaTime)); - - atomicAdd(&(mem[1][1][0].x()), rho_dtX * (deltaPos.x() * meanPos.y() * meanPos.z() + tmp), ::alpaka::hierarchy::Threads{}); - atomicAdd(&(mem[1][0][0].x()), rho_dtX * (deltaPos.x() * (float_X(1.0) - meanPos.y()) * meanPos.z() - tmp), ::alpaka::hierarchy::Threads{}); - atomicAdd(&(mem[0][1][0].x()), rho_dtX * (deltaPos.x() * meanPos.y() * (float_X(1.0) - meanPos.z()) - tmp), ::alpaka::hierarchy::Threads{}); - atomicAdd(&(mem[0][0][0].x()), rho_dtX * (deltaPos.x() * (float_X(1.0) - meanPos.y()) * (float_X(1.0) - meanPos.z()) + tmp), ::alpaka::hierarchy::Threads{}); - - atomicAdd(&(mem[1][0][1].y()), rho_dtY * (deltaPos.y() * meanPos.z() * meanPos.x() + tmp), ::alpaka::hierarchy::Threads{}); - atomicAdd(&(mem[0][0][1].y()), rho_dtY * (deltaPos.y() * (float_X(1.0) - meanPos.z()) * meanPos.x() - tmp), ::alpaka::hierarchy::Threads{}); - atomicAdd(&(mem[1][0][0].y()), rho_dtY * (deltaPos.y() * meanPos.z() * (float_X(1.0) - meanPos.x()) - tmp), ::alpaka::hierarchy::Threads{}); - atomicAdd(&(mem[0][0][0].y()), rho_dtY * (deltaPos.y() * (float_X(1.0) - meanPos.z()) * (float_X(1.0) - meanPos.x()) + tmp), ::alpaka::hierarchy::Threads{}); - - atomicAdd(&(mem[0][1][1].z()), rho_dtZ * (deltaPos.z() * meanPos.x() * meanPos.y() + tmp), ::alpaka::hierarchy::Threads{}); - atomicAdd(&(mem[0][1][0].z()), rho_dtZ * (deltaPos.z() * (float_X(1.0) - meanPos.x()) * meanPos.y() - tmp), ::alpaka::hierarchy::Threads{}); - atomicAdd(&(mem[0][0][1].z()), rho_dtZ * (deltaPos.z() * meanPos.x() * (float_X(1.0) - meanPos.y()) - tmp), ::alpaka::hierarchy::Threads{}); - atomicAdd(&(mem[0][0][0].z()), rho_dtZ * (deltaPos.z() * (float_X(1.0) - meanPos.x()) * (float_X(1.0) - meanPos.y()) + tmp), ::alpaka::hierarchy::Threads{}); - - } - - //calculates the intersection point of the [pos1,pos2] beam with an y,z-plane at position x0 - - DINLINE float3_X intersectXPlane(const float3_X& pos1, const float3_X& pos2, const float_X x0) - { - const float_X t = (x0 - pos1.x()) / (pos2.x() - pos1.x()); - - return float3_X(x0, pos1.y() + t * (pos2.y() - pos1.y()), pos1.z() + t * (pos2.z() - pos1.z())); - } - - DINLINE float3_X intersectYPlane(const float3_X& pos1, const float3_X& pos2, const float_X y0) - { - const float_X t = (y0 - pos1.y()) / (pos2.y() - pos1.y()); - - return float3_X(pos1.x() + t * (pos2.x() - pos1.x()), y0, pos1.z() + t * (pos2.z() - pos1.z())); - } - - DINLINE float3_X intersectZPlane(const float3_X& pos1, const float3_X& pos2, const float_X z0) - { - const float_X t = (z0 - pos1.z()) / (pos2.z() - pos1.z()); - - return float3_X(pos1.x() + t * (pos2.x() - pos1.x()), pos1.y() + t * (pos2.y() - pos1.y()), z0); - } - - //Splits the [oldPos,newPos] beam into two beams at the z-boundary of the cell - //if necessary - - template< - typename Buffer, - typename T_Acc - > - DINLINE void addCurrentSplitZ( - T_Acc const & acc, - const float3_X &oldPos, - const float3_X &newPos, - const float_X charge, - Buffer & mem, - const float_X deltaTime - ) - { - - if (math::float2int_rd(oldPos.z()) != math::float2int_rd(newPos.z())) - { - const float3_X interPos = intersectZPlane(oldPos, newPos, - math::max(math::float2int_rd(oldPos.z()), math::float2int_rd(newPos.z()))); - float3_X deltaPos = interPos - oldPos; - float3_X meanPos = oldPos + float_X(0.5) * deltaPos; - addCurrentToSingleCell(acc, meanPos, deltaPos, charge, mem, deltaTime); - - deltaPos = newPos - interPos; - meanPos = interPos + float_X(0.5) * deltaPos; - addCurrentToSingleCell(acc, meanPos, deltaPos, charge, mem, deltaTime); - return; - } - const float3_X deltaPos = newPos - oldPos; - const float3_X meanPos = oldPos + float_X(0.5) * deltaPos; - addCurrentToSingleCell(acc, meanPos, deltaPos, charge, mem, deltaTime); - } - - //Splits the [oldPos,newPos] beam into two beams at the y-boundary of the cell - //if necessary - - template< - typename Buffer, - typename T_Acc - > - DINLINE void addCurrentSplitY( - T_Acc const & acc, - const float3_X& oldPos, - const float3_X& newPos, - const float_X charge, - Buffer & mem, - const float_X deltaTime - ) + template + DINLINE void operator()( + const T_Acc& acc, + BoxJ& boxJ_par, /*box which is shifted to particles cell*/ + const PosType pos, + const VelType velocity, + const ChargeType charge, + const float_X deltaTime) + { + /* VillaBune: field to particle interpolation _requires_ the CIC shape */ + PMACC_CASSERT_MSG_TYPE( + currentSolverVillaBune_requires_shapeCIC_in_particleConfig, + T_ParticleShape, + std::is_same::value); + + // normalize deltaPos to innerCell units [0.; 1.) + // that means: dx_real = v.x() * dt + // dx_inCell = v.x() * dt / cellSize.x() + const float3_X deltaPos( + velocity.x() * deltaTime / cellSize.x(), + velocity.y() * deltaTime / cellSize.y(), + velocity.z() * deltaTime / cellSize.z()); + + const PosType oldPos = (PosType)(precisionCast(pos) - deltaPos); + + addCurrentSplitX(acc, oldPos, pos, charge, boxJ_par, deltaTime); + } + + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "VillaBune"); + return propList; + } + + private: + // Splits the [oldPos,newPos] beam into two beams at the x-boundary of the cell + // if necessary + + template + DINLINE void addCurrentSplitX( + T_Acc const& acc, + const float3_X& oldPos, + const float3_X& newPos, + const float_X charge, + Buffer& mem, + const float_X deltaTime) + { + if(pmacc::math::float2int_rd(oldPos.x()) != pmacc::math::float2int_rd(newPos.x())) + { + const float3_X interPos = intersectXPlane( + oldPos, + newPos, + math::max(pmacc::math::float2int_rd(oldPos.x()), pmacc::math::float2int_rd(newPos.x()))); + addCurrentSplitY(acc, oldPos, interPos, charge, mem, deltaTime); + addCurrentSplitY(acc, interPos, newPos, charge, mem, deltaTime); + return; + } + addCurrentSplitY(acc, oldPos, newPos, charge, mem, deltaTime); + } + + template + DINLINE void addCurrentToSingleCell( + T_Acc const& acc, + float3_X meanPos, + const float3_X& deltaPos, + const float_X charge, + Buffer& memIn, + const float_X deltaTime) + { + // shift to the cell meanPos belongs to + // because meanPos may exceed the range [0,1) + DataSpace off( + pmacc::math::float2int_rd(meanPos.x()), + pmacc::math::float2int_rd(meanPos.y()), + pmacc::math::float2int_rd(meanPos.z())); + + auto mem = memIn.shift(off); + + // fit meanPos into the range [0,1) + meanPos.x() -= math::floor(meanPos.x()); + meanPos.y() -= math::floor(meanPos.y()); + meanPos.z() -= math::floor(meanPos.z()); + + // for the formulas used in here see Villasenor/Buneman paper page 314 + const float_X tmp = deltaPos.x() * deltaPos.y() * deltaPos.z() * (float_X(1.0) / float_X(12.0)); + + // j = rho * v + // = rho * dr / dt + // const float_X rho = charge * (1.0 / (CELL_WIDTH * CELL_HEIGHT * CELL_DEPTH)); + // const float_X rho_dt = rho * (1.0 / deltaTime); + + // now carefully: + // deltaPos is in "inCell" coordinates, that means: + // deltaPos.x() = deltaPos_real.x() / cellSize.x() + // to calculate the current density in realUnits it is + // j.x() = rho * deltaPos_real.x() / dt + // = rho * deltaPos.x() * cellSize.x() / dt + // So put adding the constant directly to rho results in: + // const float_X rho_dtX = rho * CELL_WIDTH; + // const float_X rho_dtY = rho * CELL_HEIGHT; + // const float_X rho_dtZ = rho * CELL_DEPTH; + + // This is exactly the same like: + // j = Q / A / t + // j.x() = Q.x() * (1.0 / (CELL_HEIGHT * CELL_DEPTH * deltaTime)); + // j.y() = Q.y() * (1.0 / (CELL_WIDTH * CELL_DEPTH * deltaTime)); + // j.z() = Q.z() * (1.0 / (CELL_WIDTH * CELL_HEIGHT * deltaTime)); + // with the difference, that (imagine a moving quader) + // Q.x() = charge * deltaPos_real.x() / cellsize.x() + // = charge * deltaPos.x() / 1.0 + // + const float_X rho_dtX = charge * (float_X(1.0) / (CELL_HEIGHT * CELL_DEPTH * deltaTime)); + const float_X rho_dtY = charge * (float_X(1.0) / (CELL_WIDTH * CELL_DEPTH * deltaTime)); + const float_X rho_dtZ = charge * (float_X(1.0) / (CELL_WIDTH * CELL_HEIGHT * deltaTime)); + + auto const atomicOp = typename T_Strategy::BlockReductionOp{}; + + atomicOp(acc, mem[1][1][0].x(), rho_dtX * (deltaPos.x() * meanPos.y() * meanPos.z() + tmp)); + atomicOp( + acc, + mem[1][0][0].x(), + rho_dtX * (deltaPos.x() * (float_X(1.0) - meanPos.y()) * meanPos.z() - tmp)); + atomicOp( + acc, + mem[0][1][0].x(), + rho_dtX * (deltaPos.x() * meanPos.y() * (float_X(1.0) - meanPos.z()) - tmp)); + atomicOp( + acc, + mem[0][0][0].x(), + rho_dtX * (deltaPos.x() * (float_X(1.0) - meanPos.y()) * (float_X(1.0) - meanPos.z()) + tmp)); + + atomicOp(acc, mem[1][0][1].y(), rho_dtY * (deltaPos.y() * meanPos.z() * meanPos.x() + tmp)); + atomicOp( + acc, + mem[0][0][1].y(), + rho_dtY * (deltaPos.y() * (float_X(1.0) - meanPos.z()) * meanPos.x() - tmp)); + atomicOp( + acc, + mem[1][0][0].y(), + rho_dtY * (deltaPos.y() * meanPos.z() * (float_X(1.0) - meanPos.x()) - tmp)); + atomicOp( + acc, + mem[0][0][0].y(), + rho_dtY * (deltaPos.y() * (float_X(1.0) - meanPos.z()) * (float_X(1.0) - meanPos.x()) + tmp)); + + atomicOp(acc, mem[0][1][1].z(), rho_dtZ * (deltaPos.z() * meanPos.x() * meanPos.y() + tmp)); + atomicOp( + acc, + mem[0][1][0].z(), + rho_dtZ * (deltaPos.z() * (float_X(1.0) - meanPos.x()) * meanPos.y() - tmp)); + atomicOp( + acc, + mem[0][0][1].z(), + rho_dtZ * (deltaPos.z() * meanPos.x() * (float_X(1.0) - meanPos.y()) - tmp)); + atomicOp( + acc, + mem[0][0][0].z(), + rho_dtZ * (deltaPos.z() * (float_X(1.0) - meanPos.x()) * (float_X(1.0) - meanPos.y()) + tmp)); + } + + // calculates the intersection point of the [pos1,pos2] beam with an y,z-plane at position x0 + + DINLINE float3_X intersectXPlane(const float3_X& pos1, const float3_X& pos2, const float_X x0) + { + const float_X t = (x0 - pos1.x()) / (pos2.x() - pos1.x()); + + return float3_X(x0, pos1.y() + t * (pos2.y() - pos1.y()), pos1.z() + t * (pos2.z() - pos1.z())); + } + + DINLINE float3_X intersectYPlane(const float3_X& pos1, const float3_X& pos2, const float_X y0) + { + const float_X t = (y0 - pos1.y()) / (pos2.y() - pos1.y()); + + return float3_X(pos1.x() + t * (pos2.x() - pos1.x()), y0, pos1.z() + t * (pos2.z() - pos1.z())); + } + + DINLINE float3_X intersectZPlane(const float3_X& pos1, const float3_X& pos2, const float_X z0) + { + const float_X t = (z0 - pos1.z()) / (pos2.z() - pos1.z()); + + return float3_X(pos1.x() + t * (pos2.x() - pos1.x()), pos1.y() + t * (pos2.y() - pos1.y()), z0); + } + + // Splits the [oldPos,newPos] beam into two beams at the z-boundary of the cell + // if necessary + + template + DINLINE void addCurrentSplitZ( + T_Acc const& acc, + const float3_X& oldPos, + const float3_X& newPos, + const float_X charge, + Buffer& mem, + const float_X deltaTime) + { + if(pmacc::math::float2int_rd(oldPos.z()) != pmacc::math::float2int_rd(newPos.z())) + { + const float3_X interPos = intersectZPlane( + oldPos, + newPos, + math::max(pmacc::math::float2int_rd(oldPos.z()), pmacc::math::float2int_rd(newPos.z()))); + float3_X deltaPos = interPos - oldPos; + float3_X meanPos = oldPos + float_X(0.5) * deltaPos; + addCurrentToSingleCell(acc, meanPos, deltaPos, charge, mem, deltaTime); + + deltaPos = newPos - interPos; + meanPos = interPos + float_X(0.5) * deltaPos; + addCurrentToSingleCell(acc, meanPos, deltaPos, charge, mem, deltaTime); + return; + } + const float3_X deltaPos = newPos - oldPos; + const float3_X meanPos = oldPos + float_X(0.5) * deltaPos; + addCurrentToSingleCell(acc, meanPos, deltaPos, charge, mem, deltaTime); + } + + // Splits the [oldPos,newPos] beam into two beams at the y-boundary of the cell + // if necessary + + template + DINLINE void addCurrentSplitY( + T_Acc const& acc, + const float3_X& oldPos, + const float3_X& newPos, + const float_X charge, + Buffer& mem, + const float_X deltaTime) + { + if(pmacc::math::float2int_rd(oldPos.y()) != pmacc::math::float2int_rd(newPos.y())) + { + const float3_X interPos = intersectYPlane( + oldPos, + newPos, + math::max(pmacc::math::float2int_rd(oldPos.y()), pmacc::math::float2int_rd(newPos.y()))); + addCurrentSplitZ(acc, oldPos, interPos, charge, mem, deltaTime); + addCurrentSplitZ(acc, interPos, newPos, charge, mem, deltaTime); + return; + } + addCurrentSplitZ(acc, oldPos, newPos, charge, mem, deltaTime); + } + }; + + } // namespace currentSolver + + namespace traits { - - if (math::float2int_rd(oldPos.y()) != math::float2int_rd(newPos.y())) + template + struct GetMargin> { - const float3_X interPos = intersectYPlane(oldPos, newPos, - math::max(math::float2int_rd(oldPos.y()), math::float2int_rd(newPos.y()))); - addCurrentSplitZ(acc, oldPos, interPos, charge, mem, deltaTime); - addCurrentSplitZ(acc, interPos, newPos, charge, mem, deltaTime); - return; - } - addCurrentSplitZ(acc, oldPos, newPos, charge, mem, deltaTime); - } - -}; - -} //namespace currentSolver - -namespace traits -{ - -template -struct GetMargin > -{ - typedef ::pmacc::math::CT::Int < 1, 1, 1 > LowerMargin; - typedef ::pmacc::math::CT::Int < 2, 2, 2 > UpperMargin; - - /** maximum margin size of LowerMargin and UpperMargin */ - static constexpr int maxMargin = 2; - - PMACC_CASSERT_MSG( - __VillaBune_supercell_or_number_of_guard_supercells_is_too_small_for_stencil, - pmacc::math::CT::min< - typename pmacc::math::CT::mul< - SuperCellSize, - GuardSize - >::type - >::type::value >= maxMargin - ); -}; - -} //namespace traits + using LowerMargin = ::pmacc::math::CT::Int<1, 1, 1>; + using UpperMargin = ::pmacc::math::CT::Int<2, 2, 2>; -} //namespace picongpu + /** maximum margin size of LowerMargin and UpperMargin */ + static constexpr int maxMargin = 2; + PMACC_CASSERT_MSG( + __VillaBune_supercell_or_number_of_guard_supercells_is_too_small_for_stencil, + pmacc::math::CT::min::type>::type::value + >= maxMargin); + }; + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/fields/currentInterpolation/Binomial/Binomial.def b/include/picongpu/fields/currentInterpolation/Binomial/Binomial.def index aacb2bce75..c4d927165d 100644 --- a/include/picongpu/fields/currentInterpolation/Binomial/Binomial.def +++ b/include/picongpu/fields/currentInterpolation/Binomial/Binomial.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -22,15 +22,14 @@ namespace picongpu { -namespace currentInterpolation -{ - - /** 2nd order Binomial filter - * - * Smooths the current before assignment in staggered grid. - * Updates E & breaks local charge conservation slightly. - */ - struct Binomial; + namespace currentInterpolation + { + /** 2nd order Binomial filter functor + * + * Smooths the current before assignment in staggered grid. + * Updates E & breaks local charge conservation slightly. + */ + struct Binomial; -} // namespace currentInterpolation + } // namespace currentInterpolation } // namespace picongpu diff --git a/include/picongpu/fields/currentInterpolation/Binomial/Binomial.hpp b/include/picongpu/fields/currentInterpolation/Binomial/Binomial.hpp index 922822de9e..5202bfbfe0 100644 --- a/include/picongpu/fields/currentInterpolation/Binomial/Binomial.hpp +++ b/include/picongpu/fields/currentInterpolation/Binomial/Binomial.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl, Benjamin Worpitz, Klaus Steiniger +/* Copyright 2015-2021 Axel Huebl, Benjamin Worpitz, Klaus Steiniger * * This file is part of PIConGPU. * @@ -27,201 +27,164 @@ namespace picongpu { -namespace currentInterpolation -{ -namespace detail -{ - - template< uint32_t T_dim > - struct Binomial; - - - //! Specialization for 3D - template< > - struct Binomial< DIM3 > + namespace currentInterpolation { - static constexpr uint32_t dim = DIM3; - - using LowerMargin = typename pmacc::math::CT::make_Int< - dim, - 1 - >::type ; - using UpperMargin = LowerMargin; - - template< - typename T_DataBoxE, - typename T_DataBoxB, - typename T_DataBoxJ - > - HDINLINE void operator()( - T_DataBoxE fieldE, - T_DataBoxB const, - T_DataBoxJ const fieldJ - ) + namespace detail { - using TypeJ = typename T_DataBoxJ::ValueType; - using DS = DataSpace< dim >; - - // weighting for original value, i.e. center element of a cell - constexpr float_X M = 8.0; - // weighting for nearest neighbours, i.e. cells sharing a face with the center cell - constexpr float_X S = 4.0; - // weighting for next to nearest neighbours, i.e. cells sharing an edge with the center cell - constexpr float_X D = 2.0; - // weighting for farthest neighbours, i.e. cells sharing a corner with the center cell - constexpr float_X T = 1.0; - - TypeJ averagedJ = - // sum far neighbours, i.e. corner elements, weighting T - T * ( - fieldJ( DS( -1, -1, -1 ) ) + fieldJ( DS( +1, -1, -1 ) ) + fieldJ( DS( -1, +1, -1 ) ) + fieldJ( DS( +1, +1, -1 ) ) + - fieldJ( DS( -1, -1, +1 ) ) + fieldJ( DS( +1, -1, +1 ) ) + fieldJ( DS( -1, +1, +1 ) ) + fieldJ( DS( +1, +1, +1 ) ) - ) + - // sum next to nearest neighbours, i.e. edge elements, weighting D - D * ( - fieldJ( DS( -1, -1, 0 ) ) + fieldJ( DS( +1, -1, 0 ) ) + fieldJ( DS( -1, +1, 0 ) ) + fieldJ( DS( +1, +1, 0 ) ) + - fieldJ( DS( -1, 0, -1 ) ) + fieldJ( DS( +1, 0, -1 ) ) + fieldJ( DS( -1, 0, +1 ) ) + fieldJ( DS( +1, 0, +1 ) ) + - fieldJ( DS( 0, -1, -1 ) ) + fieldJ( DS( 0, +1, -1 ) ) + fieldJ( DS( 0, -1, +1 ) ) + fieldJ( DS( 0, +1, +1 ) ) - ) + - // sum next neighbours, i.e. face elements, weighting S - S * ( - fieldJ( DS( -1, 0, 0 ) ) + fieldJ( DS( +1, 0, 0 ) ) + - fieldJ( DS( 0, -1, 0 ) ) + fieldJ( DS( 0, +1, 0 ) ) + - fieldJ( DS( 0, 0, -1 ) ) + fieldJ( DS( 0, 0, +1 ) ) - ) + - // add original value, i.e. center element, weighting M - M * ( - fieldJ( DS( 0, 0, 0 ) ) - ); - - /* calc average by normalizing weighted sum In 3D there are: - * - original value with weighting M - * - 6 nearest neighbours with weighting S - * - 12 next to nearest neighbours with weighting D - * - 8 farthest neighbours with weighting T - */ - constexpr float_X inverseDivisor = 1._X / ( M + 6._X * S + 12._X * D + 8._X * T ); - averagedJ *= inverseDivisor; - - constexpr float_X deltaT = DELTA_T; - *fieldE -= averagedJ * ( 1._X / EPS0 ) * deltaT; - } - }; - - - //! Specialization for 2D - template< > - struct Binomial< DIM2 > - { - static constexpr uint32_t dim = DIM2; - - using LowerMargin = typename pmacc::math::CT::make_Int< - dim, - 1 - >::type ; - using UpperMargin = LowerMargin; - - template< - typename T_DataBoxE, - typename T_DataBoxB, - typename T_DataBoxJ - > - HDINLINE void operator()( - T_DataBoxE fieldE, - T_DataBoxB const, - T_DataBoxJ const fieldJ - ) + template + struct Binomial; + + //! Specialization for 3D + template<> + struct Binomial + { + static constexpr uint32_t dim = DIM3; + + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = LowerMargin; + + template + HDINLINE void operator()(T_DataBoxE fieldE, T_DataBoxB const, T_DataBoxJ const fieldJ) + { + using TypeJ = typename T_DataBoxJ::ValueType; + using DS = DataSpace; + + // weighting for original value, i.e. center element of a cell + constexpr float_X M = 8.0; + // weighting for nearest neighbours, i.e. cells sharing a face with the center cell + constexpr float_X S = 4.0; + // weighting for next to nearest neighbours, i.e. cells sharing an edge with the center cell + constexpr float_X D = 2.0; + // weighting for farthest neighbours, i.e. cells sharing a corner with the center cell + constexpr float_X T = 1.0; + + TypeJ averagedJ = + // sum far neighbours, i.e. corner elements, weighting T + T + * (fieldJ(DS(-1, -1, -1)) + fieldJ(DS(+1, -1, -1)) + fieldJ(DS(-1, +1, -1)) + + fieldJ(DS(+1, +1, -1)) + fieldJ(DS(-1, -1, +1)) + fieldJ(DS(+1, -1, +1)) + + fieldJ(DS(-1, +1, +1)) + fieldJ(DS(+1, +1, +1))) + + + // sum next to nearest neighbours, i.e. edge elements, weighting D + D + * (fieldJ(DS(-1, -1, 0)) + fieldJ(DS(+1, -1, 0)) + fieldJ(DS(-1, +1, 0)) + + fieldJ(DS(+1, +1, 0)) + fieldJ(DS(-1, 0, -1)) + fieldJ(DS(+1, 0, -1)) + + fieldJ(DS(-1, 0, +1)) + fieldJ(DS(+1, 0, +1)) + fieldJ(DS(0, -1, -1)) + + fieldJ(DS(0, +1, -1)) + fieldJ(DS(0, -1, +1)) + fieldJ(DS(0, +1, +1))) + + + // sum next neighbours, i.e. face elements, weighting S + S + * (fieldJ(DS(-1, 0, 0)) + fieldJ(DS(+1, 0, 0)) + fieldJ(DS(0, -1, 0)) + + fieldJ(DS(0, +1, 0)) + fieldJ(DS(0, 0, -1)) + fieldJ(DS(0, 0, +1))) + + + // add original value, i.e. center element, weighting M + M * (fieldJ(DS(0, 0, 0))); + + /* calc average by normalizing weighted sum In 3D there are: + * - original value with weighting M + * - 6 nearest neighbours with weighting S + * - 12 next to nearest neighbours with weighting D + * - 8 farthest neighbours with weighting T + */ + constexpr float_X inverseDivisor = 1._X / (M + 6._X * S + 12._X * D + 8._X * T); + averagedJ *= inverseDivisor; + + constexpr float_X deltaT = DELTA_T; + *fieldE -= averagedJ * (1._X / EPS0) * deltaT; + } + }; + + + //! Specialization for 2D + template<> + struct Binomial + { + static constexpr uint32_t dim = DIM2; + + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = LowerMargin; + + template + HDINLINE void operator()(T_DataBoxE fieldE, T_DataBoxB const, T_DataBoxJ const fieldJ) + { + using TypeJ = typename T_DataBoxJ::ValueType; + using DS = DataSpace; + + // weighting for original value, i.e. center element of a cell + constexpr float_X M = 4.0; + // weighting for nearest neighbours, i.e. cells sharing an edge with the center cell + constexpr float_X S = 2.0; + // weighting for next to nearest neighbours, i.e. cells sharing a corner with the center cell + constexpr float_X D = 1.0; + + TypeJ averagedJ = + // sum next to nearest neighbours, i.e. corner neighbors, weighting D + D * (fieldJ(DS(-1, -1)) + fieldJ(DS(+1, -1)) + fieldJ(DS(-1, +1)) + fieldJ(DS(+1, +1))) + + // sum next neighbours, i.e. edge neighbors, weighting S + S * (fieldJ(DS(-1, 0)) + fieldJ(DS(+1, 0)) + fieldJ(DS(0, -1)) + fieldJ(DS(0, +1))) + + // add original value, i.e. center cell, weighting M + M * (fieldJ(DS(0, 0))); + + /* calc average by normalizing weighted sum + * In 2D there are: + * - original value with weighting M + * - 4 nearest neighbours with weighting S + * - 4 next to nearest neighbours with weighting D + */ + constexpr float_X inverseDivisor = 1._X / (M + 4._X * S + 4._X * D); + averagedJ *= inverseDivisor; + + constexpr float_X deltaT = DELTA_T; + *fieldE -= averagedJ * (1._X / EPS0) * deltaT; + } + }; + + } // namespace detail + + + /** Smoothing the current density before passing it to the field solver + * + * This technique mitigates numerical Cherenkov effects and short wavelength + * instabilities as it effectively implements a low pass filter which + * damps high frequency noise (near the Nyquist frequency) in the + * current distribution. + * + * A description and a two-dimensional implementation of this filter + * is given in + * CK Birdsall, AB Langdon. Plasma Physics via Computer Simulation. Appendix C. Taylor & Francis, 2004. + * It is a 2D version of the commonly used one-dimensional three points filter with binomial coefficients + * + * The three-dimensional extension of the above two-dimensional smoothing scheme + * uses all 26 neighbors of a cell. + */ + struct Binomial : public detail::Binomial { - using TypeJ = typename T_DataBoxJ::ValueType; - using DS = DataSpace< dim >; - - // weighting for original value, i.e. center element of a cell - constexpr float_X M = 4.0; - // weighting for nearest neighbours, i.e. cells sharing an edge with the center cell - constexpr float_X S = 2.0; - // weighting for next to nearest neighbours, i.e. cells sharing a corner with the center cell - constexpr float_X D = 1.0; - - TypeJ averagedJ = - // sum next to nearest neighbours, i.e. corner neighbors, weighting D - D * ( - fieldJ( DS( -1, -1 ) ) + fieldJ( DS( +1, -1 ) ) + - fieldJ( DS( -1, +1 ) ) + fieldJ( DS( +1, +1 ) ) - ) + - // sum next neighbours, i.e. edge neighbors, weighting S - S * ( - fieldJ( DS( -1, 0 ) ) + fieldJ( DS( +1, 0 ) ) + - fieldJ( DS( 0, -1 ) ) + fieldJ( DS( 0, +1 ) ) - ) + - // add original value, i.e. center cell, weighting M - M * ( - fieldJ( DS( 0, 0 ) ) - ); - - /* calc average by normalizing weighted sum - * In 2D there are: - * - original value with weighting M - * - 4 nearest neighbours with weighting S - * - 4 next to nearest neighbours with weighting D - */ - constexpr float_X inverseDivisor = 1._X / ( M + 4._X * S + 4._X * D ); - averagedJ *= inverseDivisor; - - constexpr float_X deltaT = DELTA_T; - *fieldE -= averagedJ * ( 1._X / EPS0 ) * deltaT; - } - }; - -} // namespace detail - - - /** Smoothing the current density before passing it to the field solver - * - * This technique mitigates numerical Cherenkov effects and short wavelength - * instabilities as it effectively implements a low pass filter which - * damps high frequency noise (near the Nyquist frequency) in the - * current distribution. - * - * A description and a two-dimensional implementation of this filter - * is given in - * CK Birdsall, AB Langdon. Plasma Physics via Computer Simulation. Appendix C. Taylor & Francis, 2004. - * It is a 2D version of the commonly used one-dimensional three points filter with binomial coefficients - * - * The three-dimensional extension of the above two-dimensional smoothing scheme - * uses all 26 neighbors of a cell. - */ - struct Binomial : public detail::Binomial< simDim > - { - static pmacc::traits::StringProperty getStringProperties() - { - pmacc::traits::StringProperty propList( - "name", - "Binomial" - ); - propList[ "param" ] = "period=1;numPasses=1;compensator=false"; - return propList; - } - }; - -} // namespace currentInterpolation - -namespace traits -{ + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "Binomial"); + propList["param"] = "period=1;numPasses=1;compensator=false"; + return propList; + } + }; - /* Get margin of the current interpolation - * - * This class defines a LowerMargin and an UpperMargin. - */ - template< > - struct GetMargin< picongpu::currentInterpolation::Binomial > + } // namespace currentInterpolation + + namespace traits { - private: - using MyInterpolation = picongpu::currentInterpolation::Binomial; + /* Get margin of the current interpolation + * + * This class defines a LowerMargin and an UpperMargin. + */ + template<> + struct GetMargin + { + private: + using MyInterpolation = picongpu::currentInterpolation::Binomial; - public: - using LowerMargin = typename MyInterpolation::LowerMargin; - using UpperMargin = typename MyInterpolation::UpperMargin; - }; + public: + using LowerMargin = typename MyInterpolation::LowerMargin; + using UpperMargin = typename MyInterpolation::UpperMargin; + }; -} // namespace traits + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/fields/currentInterpolation/CurrentInterpolation.def b/include/picongpu/fields/currentInterpolation/CurrentInterpolation.def index c91dcf199e..3bfcb0debf 100644 --- a/include/picongpu/fields/currentInterpolation/CurrentInterpolation.def +++ b/include/picongpu/fields/currentInterpolation/CurrentInterpolation.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -20,4 +20,3 @@ #include "picongpu/fields/currentInterpolation/None/None.def" #include "picongpu/fields/currentInterpolation/Binomial/Binomial.def" -#include "picongpu/fields/currentInterpolation/NoneDS/NoneDS.def" diff --git a/include/picongpu/fields/currentInterpolation/CurrentInterpolation.hpp b/include/picongpu/fields/currentInterpolation/CurrentInterpolation.hpp index ef4f76ab80..7812888cab 100644 --- a/include/picongpu/fields/currentInterpolation/CurrentInterpolation.hpp +++ b/include/picongpu/fields/currentInterpolation/CurrentInterpolation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -17,7 +17,77 @@ * If not, see . */ +#pragma once #include "picongpu/fields/currentInterpolation/None/None.hpp" #include "picongpu/fields/currentInterpolation/Binomial/Binomial.hpp" -#include "picongpu/fields/currentInterpolation/NoneDS/NoneDS.hpp" + +#include +#include + + +namespace picongpu +{ + namespace currentInterpolation + { + /** Singleton to represent current interpolation kind + * + * It does not perform interpolation itself, that is done by functors None and Binomial. + * Provides run-time utilities to get margin values and string properties. + * + * Note: for now it is called CurrentInterpolationInfo to not conflict with CurrentInterpolation type alias + * used in standard .param files. Will be renamed to just CurrentInterpolation after a transition to a + * run-time parameter + */ + struct CurrentInterpolationInfo + { + public: + //! Supported interpolation kinds + enum class Kind + { + None, + Binomial + }; + + //! Interpolation kind used in the simulation + Kind kind = Kind::None; + + //! Get the single instance of the current interpolation object + static CurrentInterpolationInfo& get() + { + static CurrentInterpolationInfo instance; + return instance; + } + + //! Get string properties + static pmacc::traits::StringProperty getStringProperties() + { + return get().kind == Kind::None ? None::getStringProperties() : Binomial::getStringProperties(); + } + + //! Get the lower margin of the used interpolation functor + static pmacc::math::Vector getLowerMargin() + { + return get().kind == Kind::None ? None::LowerMargin::toRT() : Binomial::LowerMargin::toRT(); + } + + //! Get the upper margin of the used interpolation functor + static pmacc::math::Vector getUpperMargin() + { + return get().kind == Kind::None ? None::UpperMargin::toRT() : Binomial::UpperMargin::toRT(); + } + + //! Copy construction is forbidden + CurrentInterpolationInfo(CurrentInterpolationInfo const&) = delete; + + //! Assignment is forbidden + CurrentInterpolationInfo& operator=(CurrentInterpolationInfo const&) = delete; + + private: + CurrentInterpolationInfo() = default; + ~CurrentInterpolationInfo() = default; + }; + + } // namespace currentInterpolation + +} // namespace picongpu diff --git a/include/picongpu/fields/currentInterpolation/None/None.def b/include/picongpu/fields/currentInterpolation/None/None.def index 13ca3bc81d..dca886acde 100644 --- a/include/picongpu/fields/currentInterpolation/None/None.def +++ b/include/picongpu/fields/currentInterpolation/None/None.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -22,15 +22,14 @@ namespace picongpu { -namespace currentInterpolation -{ - - /* None interpolated current assignment - * - * Default for staggered grids/Yee-scheme. - * Updates field E only. - */ - struct None; + namespace currentInterpolation + { + /* None interpolated current assignment functor + * + * Default for staggered grids/Yee-scheme. + * Updates field E only. + */ + struct None; -} // namespace currentInterpolation + } // namespace currentInterpolation } // namespace picongpu diff --git a/include/picongpu/fields/currentInterpolation/None/None.hpp b/include/picongpu/fields/currentInterpolation/None/None.hpp index b9ab590204..6254f32941 100644 --- a/include/picongpu/fields/currentInterpolation/None/None.hpp +++ b/include/picongpu/fields/currentInterpolation/None/None.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl, Benjamin Worpitz +/* Copyright 2015-2021 Axel Huebl, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -26,65 +26,49 @@ namespace picongpu { -namespace currentInterpolation -{ - - struct None + namespace currentInterpolation { - static constexpr uint32_t dim = simDim; - - using LowerMargin = typename pmacc::math::CT::make_Int< - dim, - 0 - >::type; - using UpperMargin = LowerMargin; - - template< - typename T_DataBoxE, - typename T_DataBoxB, - typename T_DataBoxJ - > - HDINLINE void operator()( - T_DataBoxE fieldE, - T_DataBoxB const, - T_DataBoxJ const fieldJ - ) + struct None { - DataSpace< dim > const self; + static constexpr uint32_t dim = simDim; - constexpr float_X deltaT = DELTA_T; - fieldE( self ) -= fieldJ( self ) * ( float_X( 1.0 ) / EPS0 ) * deltaT; - } + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = LowerMargin; - static pmacc::traits::StringProperty getStringProperties( ) - { - pmacc::traits::StringProperty propList( - "name", - "none" - ); - return propList; - } - }; + template + HDINLINE void operator()(T_DataBoxE fieldE, T_DataBoxB const, T_DataBoxJ const fieldJ) + { + DataSpace const self; -} // namespace currentInterpolation + constexpr float_X deltaT = DELTA_T; + fieldE(self) -= fieldJ(self) * (float_X(1.0) / EPS0) * deltaT; + } -namespace traits -{ + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "none"); + return propList; + } + }; - /* Get margin of the current interpolation - * - * This class defines a LowerMargin and an UpperMargin. - */ - template< > - struct GetMargin< picongpu::currentInterpolation::None > + } // namespace currentInterpolation + + namespace traits { - private: - using MyInterpolation = picongpu::currentInterpolation::None; + /* Get margin of the current interpolation + * + * This class defines a LowerMargin and an UpperMargin. + */ + template<> + struct GetMargin + { + private: + using MyInterpolation = picongpu::currentInterpolation::None; - public: - using LowerMargin = typename MyInterpolation::LowerMargin; - using UpperMargin = typename MyInterpolation::UpperMargin; - }; + public: + using LowerMargin = typename MyInterpolation::LowerMargin; + using UpperMargin = typename MyInterpolation::UpperMargin; + }; -} // namespace traits + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/fields/currentInterpolation/NoneDS/NoneDS.def b/include/picongpu/fields/currentInterpolation/NoneDS/NoneDS.def deleted file mode 100644 index 3a7386ccfa..0000000000 --- a/include/picongpu/fields/currentInterpolation/NoneDS/NoneDS.def +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright 2015-2020 Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -namespace picongpu -{ -namespace currentInterpolation -{ - - /* The standard interpolation for Directional Splitting - * - * Experimental assignment for all-centered cells used in directional splitting. - * Updates E & B at the same time. - */ - struct NoneDS; - -} // namespace currentInterpolation -} // namespace picongpu diff --git a/include/picongpu/fields/currentInterpolation/NoneDS/NoneDS.hpp b/include/picongpu/fields/currentInterpolation/NoneDS/NoneDS.hpp deleted file mode 100644 index 5ba1c18245..0000000000 --- a/include/picongpu/fields/currentInterpolation/NoneDS/NoneDS.hpp +++ /dev/null @@ -1,253 +0,0 @@ -/* Copyright 2015-2020 Axel Huebl, Benjamin Worpitz - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/simulation_defines.hpp" -#include "picongpu/fields/currentInterpolation/None/None.def" -#include "picongpu/algorithms/DifferenceToUpper.hpp" -#include "picongpu/algorithms/LinearInterpolateWithUpper.hpp" -#include "picongpu/fields/MaxwellSolver/Yee/Curl.hpp" - -#include -#include - - -namespace picongpu -{ -namespace currentInterpolation -{ -namespace detail -{ - template - struct LinearInterpolateComponentPlaneUpper - { - static constexpr uint32_t dim = T_simDim; - - /* UpperMargin is actually 0 in direction of T_plane */ - using LowerMargin = typename pmacc::math::CT::make_Int< - dim, - 0 - >::type; - using UpperMargin = typename pmacc::math::CT::make_Int< - dim, - 1 - >::type; - - template - HDINLINE float_X operator()( DataBox const & field ) const - { - DataSpace< dim > const self; - DataSpace< dim > up; - up[(T_plane + 1) % dim] = 1; - - using Avg = LinearInterpolateWithUpper< dim >; - - typename Avg::template GetInterpolatedValue< ( T_plane + 2 ) % dim > const avg; - - return float_X( 0.5 ) * ( avg( field )[ T_plane ] + avg( field.shift( up ) )[ T_plane ] ); - } - }; - - /* shift a databox along a specific direction - * - * returns the identity (assume periodic symmetry) if direction is not - * available, such as in a 2D simulation - * - * \todo accept a full CT::Vector and shift if possible - * \todo call with CT::Vector of correct dimensionality that was created - * with AssignIfInRange... - * - * \tparam T_simDim maximum dimensionality of the mesh - * \tparam T_direction (0)X (1)Y or (2)Z for the direction one wants to - * shift to - * \tparam isShiftAble auto-filled value that decides if this direction - * is actually non-existent == periodic - */ - template< - uint32_t T_simDim, - uint32_t T_direction, - bool isShiftAble = ( T_direction < T_simDim ) - > - struct ShiftMeIfYouCan - { - static constexpr uint32_t dim = T_simDim; - static constexpr uint32_t dir = T_direction; - - HDINLINE ShiftMeIfYouCan() - { - } - - template< typename T_DataBox > - HDINLINE T_DataBox operator()( T_DataBox const & dataBox ) const - { - DataSpace< dim > shift; - shift[ dir ] = 1; - return dataBox.shift( shift ); - } - }; - - template< - uint32_t T_simDim, - uint32_t T_direction - > - struct ShiftMeIfYouCan< - T_simDim, - T_direction, - false - > - { - HDINLINE ShiftMeIfYouCan() - { - } - - template< typename T_DataBox > - HDINLINE T_DataBox operator()( T_DataBox const & dataBox ) const - { - return dataBox; - } - }; - - /* that is not a "real" yee curl, but it looks a bit like it */ - template< typename Difference > - struct ShiftCurl - { - using LowerMargin = typename Difference::OffsetOrigin; - using UpperMargin = typename Difference::OffsetEnd; - - template - HDINLINE typename DataBox::ValueType operator()( DataBox const & mem ) const - { - typename Difference::template GetDifference< 0 > const Dx; - typename Difference::template GetDifference< 1 > const Dy; - typename Difference::template GetDifference< 2 > const Dz; - - ShiftMeIfYouCan< - simDim, - 0 - > const sx; - ShiftMeIfYouCan< - simDim, - 1 - > const sy; - ShiftMeIfYouCan< - simDim, - 2 - > const sz; - - return float3_X( - Dy( sx( mem ) ).z( ) - Dz( sx( mem ) ).y( ), - Dz( sy( mem ) ).x( ) - Dx( sy( mem ) ).z( ), - Dx( sz( mem ) ).y( ) - Dy( sz( mem ) ).x( ) - ); - } - }; -} // namespace detail - - struct NoneDS - { - static constexpr uint32_t dim = simDim; - - typedef typename pmacc::math::CT::make_Int::type LowerMargin; - typedef typename pmacc::math::CT::make_Int::type UpperMargin; - - template< - typename T_DataBoxE, - typename T_DataBoxB, - typename T_DataBoxJ - > - HDINLINE void operator()( - T_DataBoxE fieldE, - T_DataBoxB fieldB, - T_DataBoxJ const fieldJ - ) - { - using TypeJ = typename T_DataBoxJ::ValueType; - using ComponentJ = typename GetComponentsType< TypeJ >::type; - - DataSpace< dim > const self; - - constexpr ComponentJ deltaT = DELTA_T; - ComponentJ const constE = ( float_X( 1.0 ) / EPS0 ) * deltaT; - ComponentJ const constB = ( float_X( 0.25 ) / EPS0 ) * deltaT * deltaT; - - detail::LinearInterpolateComponentPlaneUpper< - dim, - 0 - > const avgX; - ComponentJ const jXavg = avgX( fieldJ ); - detail::LinearInterpolateComponentPlaneUpper< - dim, - 1 - > const avgY; - ComponentJ const jYavg = avgY( fieldJ ); - detail::LinearInterpolateComponentPlaneUpper< - dim, - 2 - > const avgZ; - ComponentJ const jZavg = avgZ( fieldJ ); - - TypeJ const jAvgE = TypeJ( - jXavg, - jYavg, - jZavg - ); - fieldE( self ) -= jAvgE * constE; - - using CurlRight = fields::maxwellSolver::yee::Curl< DifferenceToUpper< dim > >; - using ShiftCurlRight = detail::ShiftCurl< DifferenceToUpper< dim > >; - CurlRight curl; - ShiftCurlRight shiftCurl; - - TypeJ const jAvgB = curl( fieldJ ) + shiftCurl( fieldJ ); - fieldB(self) += jAvgB * constB; - } - - static pmacc::traits::StringProperty getStringProperties() - { - pmacc::traits::StringProperty propList( - "name", - "none" - ); - return propList; - } - }; - -} // namespace currentInterpolation - -namespace traits -{ - - /* Get margin of the current interpolation - * - * This class defines a LowerMargin and an UpperMargin. - */ - template< > - struct GetMargin< picongpu::currentInterpolation::NoneDS > - { - private: - using MyInterpolation = picongpu::currentInterpolation::NoneDS; - - public: - using LowerMargin = typename MyInterpolation::LowerMargin; - using UpperMargin = typename MyInterpolation::UpperMargin; - }; - -} // namespace traits -} // namespace picongpu diff --git a/include/picongpu/fields/differentiation/BackwardDerivative.hpp b/include/picongpu/fields/differentiation/BackwardDerivative.hpp new file mode 100644 index 0000000000..7b8b1cfe70 --- /dev/null +++ b/include/picongpu/fields/differentiation/BackwardDerivative.hpp @@ -0,0 +1,82 @@ +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/differentiation/Derivative.def" +#include "picongpu/fields/differentiation/Traits.hpp" + +#include +#include + +#include + + +namespace picongpu +{ + namespace fields + { + namespace differentiation + { + /** Functor for backward difference derivative along the given direction + * + * Computes (current - lower) / step, previously called DifferenceToLower. + * + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + struct BackwardDerivativeFunctor + { + //! Lower margin + using LowerMargin = typename pmacc::math::CT::make_BasisVector::type; + + //! Upper margin + using UpperMargin = typename pmacc::math::CT::make_Int::type; + + /** Return derivative value at the given point + * + * @tparam T_DataBox data box type with field data + * @param data position in the data box to compute derivative at + */ + template + HDINLINE typename T_DataBox::ValueType operator()(T_DataBox const& data) const + { + using Index = pmacc::DataSpace; + auto const lowerIndex = -pmacc::math::basisVector(); + return (data(Index{}) - data(lowerIndex)) / cellSize[T_direction]; + } + }; + + namespace traits + { + /** Functor type trait specialization for backward derivative + * + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + struct DerivativeFunctor + : pmacc::meta::accessors::Identity> + { + }; + + } // namespace traits + } // namespace differentiation + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/differentiation/Curl.def b/include/picongpu/fields/differentiation/Curl.def new file mode 100644 index 0000000000..f49e894263 --- /dev/null +++ b/include/picongpu/fields/differentiation/Curl.def @@ -0,0 +1,41 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/fields/differentiation/Derivative.def" + + +namespace picongpu +{ + namespace fields + { + namespace differentiation + { + /** Functor to compute field curl at the given point + * + * @tparam T_Derivative derivative tag (not functor), defines the + * finite-difference scheme for partial derivatives + */ + template + struct Curl; + + } // namespace differentiation + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/differentiation/Curl.hpp b/include/picongpu/fields/differentiation/Curl.hpp new file mode 100644 index 0000000000..8cd238558e --- /dev/null +++ b/include/picongpu/fields/differentiation/Curl.hpp @@ -0,0 +1,128 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/fields/differentiation/Curl.def" +#include "picongpu/fields/differentiation/Derivative.hpp" +#include "picongpu/traits/GetMargin.hpp" + +#include + + +namespace picongpu +{ + namespace fields + { + namespace differentiation + { + /** Functor to compute field curl at the given point + * + * @tparam T_Derivative derivative tag (not functor), defines the + * finite-difference scheme for partial derivatives + */ + template + struct Curl + { + //! Derivative tag + using Derivative = T_Derivative; + + //! Derivative function along x type + using XDerivativeFunctor = decltype(makeDerivativeFunctor()); + + //! Derivative function along y type + using YDerivativeFunctor = decltype(makeDerivativeFunctor()); + + //! Derivative function along z type + using ZDerivativeFunctor = decltype(makeDerivativeFunctor()); + + //! Lower margin: max of the derivative lower margins + using LowerMargin = typename pmacc::math::CT::max< + typename pmacc::math::CT::max< + typename GetLowerMargin::type, + typename GetLowerMargin::type>::type, + typename GetLowerMargin::type>::type; + + //! Upper margin: max of the derivative upper margins + using UpperMargin = typename pmacc::math::CT::max< + typename pmacc::math::CT::max< + typename GetUpperMargin::type, + typename GetUpperMargin::type>::type, + typename GetUpperMargin::type>::type; + + //! Create curl functor + HDINLINE Curl() + : xDerivativeFunctor(makeDerivativeFunctor()) + , yDerivativeFunctor(makeDerivativeFunctor()) + , zDerivativeFunctor(makeDerivativeFunctor()) + { + } + + /** Return curl value at the given point + * + * @tparam T_DataBox data box type with field data + */ + template + HDINLINE typename T_DataBox::ValueType operator()(T_DataBox const& data) const + { + auto const dFdx = xDerivative(data); + auto const dFdy = yDerivative(data); + auto const dFdz = zDerivative(data); + return float3_X{dFdy.z() - dFdz.y(), dFdz.x() - dFdx.z(), dFdx.y() - dFdy.x()}; + } + + /** Return x derivative value at the given point + * + * @tparam T_DataBox data box type with field data + */ + template + HDINLINE typename T_DataBox::ValueType xDerivative(T_DataBox const& data) const + { + return xDerivativeFunctor(data); + } + + /** Return y derivative value at the given point + * + * @tparam T_DataBox data box type with field data + */ + template + HDINLINE typename T_DataBox::ValueType yDerivative(T_DataBox const& data) const + { + return yDerivativeFunctor(data); + } + + /** Return z derivative value at the given point + * + * @tparam T_DataBox data box type with field data + */ + template + HDINLINE typename T_DataBox::ValueType zDerivative(T_DataBox const& data) const + { + return zDerivativeFunctor(data); + } + + private: + XDerivativeFunctor const xDerivativeFunctor; + YDerivativeFunctor const yDerivativeFunctor; + ZDerivativeFunctor const zDerivativeFunctor; + }; + + } // namespace differentiation + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/differentiation/Derivative.def b/include/picongpu/fields/differentiation/Derivative.def new file mode 100644 index 0000000000..269c8478b3 --- /dev/null +++ b/include/picongpu/fields/differentiation/Derivative.def @@ -0,0 +1,40 @@ +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + + +namespace picongpu +{ + namespace fields + { + namespace differentiation + { + //! Forward (upper - current) difference derivative tag + struct Forward; + + //! Backward (current - lower) difference derivative tag + struct Backward; + + //! Zero derivative tag + struct Zero; + + } // namespace differentiation + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/differentiation/Derivative.hpp b/include/picongpu/fields/differentiation/Derivative.hpp new file mode 100644 index 0000000000..b99127b48f --- /dev/null +++ b/include/picongpu/fields/differentiation/Derivative.hpp @@ -0,0 +1,79 @@ +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/differentiation/BackwardDerivative.hpp" +#include "picongpu/fields/differentiation/Derivative.def" +#include "picongpu/fields/differentiation/ForwardDerivative.hpp" +#include "picongpu/fields/differentiation/Traits.hpp" +#include "picongpu/fields/differentiation/ZeroDerivative.hpp" + +#include + + +namespace picongpu +{ + namespace fields + { + namespace differentiation + { + /** Interface of field derivative functors created by makeDerivativeFunctor() + * + * In addition to operator(), the functor must be copyable and assignable. + */ + struct DerivativeFunctorConcept + { + /** Return derivative value at the given point + * + * @tparam T_DataBox data box type with field data + * @param data position in the data box to compute derivative at + */ + template + HDINLINE typename T_DataBox::ValueType operator()(T_DataBox const& data) const; + }; + + /** Type of derivative functor for the given derivative tag and direction + * + * Derivative tag defines the scheme and is used for configuration, while + * the functor actually computes the derivatives along the given direction. + * + * @tparam T_Derivative derivative tag, defines the finite-difference scheme + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + using DerivativeFunctor = typename traits::DerivativeFunctor::type; + + /** Create a functor to compute field derivative along the given direction + * + * In case T_direction is >= simDim, returns the zero derivative functor + * + * @tparam T_Derivative derivative tag, defines the finite-difference scheme + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + HDINLINE auto makeDerivativeFunctor() + { + return traits::MakeDerivativeFunctor{}(); + } + + } // namespace differentiation + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/differentiation/ForwardDerivative.hpp b/include/picongpu/fields/differentiation/ForwardDerivative.hpp new file mode 100644 index 0000000000..c47734c723 --- /dev/null +++ b/include/picongpu/fields/differentiation/ForwardDerivative.hpp @@ -0,0 +1,82 @@ +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/differentiation/Derivative.def" +#include "picongpu/fields/differentiation/Traits.hpp" + +#include +#include + +#include + + +namespace picongpu +{ + namespace fields + { + namespace differentiation + { + /** Functor for forward difference derivative along the given direction + * + * Computes (upper - current) / step, previously called DifferenceToUpper. + * + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + struct ForwardDerivativeFunctor + { + //! Lower margin + using LowerMargin = typename pmacc::math::CT::make_Int::type; + + //! Upper margin + using UpperMargin = typename pmacc::math::CT::make_BasisVector::type; + + /** Return derivative value at the given point + * + * @tparam T_DataBox data box type with field data + * @param data position in the data box to compute derivative at + */ + template + HDINLINE typename T_DataBox::ValueType operator()(T_DataBox const& data) const + { + using Index = pmacc::DataSpace; + auto const upperIndex = pmacc::math::basisVector(); + return (data(upperIndex) - data(Index{})) / cellSize[T_direction]; + } + }; + + namespace traits + { + /** Functor type trait specialization for forward derivative + * + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + struct DerivativeFunctor + : pmacc::meta::accessors::Identity> + { + }; + + } // namespace traits + } // namespace differentiation + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/differentiation/Traits.hpp b/include/picongpu/fields/differentiation/Traits.hpp new file mode 100644 index 0000000000..0db9250370 --- /dev/null +++ b/include/picongpu/fields/differentiation/Traits.hpp @@ -0,0 +1,90 @@ +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/differentiation/Derivative.def" + +#include + + +namespace picongpu +{ + namespace fields + { + namespace differentiation + { + namespace traits + { + /** Type trait for derivative functor for the given derivative tag and + * direction, accessible as ::type + * + * Has to be specialized for each derivative tag. + * + * @tparam T_Derivative derivative tag, defines the finite-difference scheme + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + struct DerivativeFunctor; + + /** Factory for functors to compute field derivative along the given direction + * + * In case T_direction is >= simDim, returns the zero derivative functor. + * Does not need to be specialized when DerivativeFunctor is specialized. + * + * @tparam T_Derivative derivative tag, defines the finite-difference scheme + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + * @tparam T_isLesserThanDim flag to decide between normal and zero derivative + */ + template + struct MakeDerivativeFunctor + { + using Functor = typename DerivativeFunctor::type; + + //! Return a functor + HDINLINE Functor operator()() const + { + return Functor{}; + } + }; + + /** Factory for functors to compute field derivative along the given direction + * + * Implementation for T_direction >= simDim, always returns zero derivative + * + * @tparam T_Derivative derivative tag, defines the finite-difference scheme + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + struct MakeDerivativeFunctor + { + using ZeroFunctor = typename DerivativeFunctor::type; + + //! Return a zero functor + HDINLINE ZeroFunctor operator()() const + { + return ZeroFunctor{}; + } + }; + + } // namespace traits + } // namespace differentiation + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/differentiation/ZeroDerivative.hpp b/include/picongpu/fields/differentiation/ZeroDerivative.hpp new file mode 100644 index 0000000000..6a22b4ccc2 --- /dev/null +++ b/include/picongpu/fields/differentiation/ZeroDerivative.hpp @@ -0,0 +1,80 @@ +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/differentiation/Derivative.def" +#include "picongpu/fields/differentiation/Traits.hpp" + +#include +#include + +#include + + +namespace picongpu +{ + namespace fields + { + namespace differentiation + { + /** Functor for zero derivative along the given direction + * + * Always returns zero. + * + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + struct ZeroDerivativeFunctor + { + //! Lower margin + using LowerMargin = typename pmacc::math::CT::make_Int::type; + + //! Upper margin + using UpperMargin = typename pmacc::math::CT::make_Int::type; + + /** Return zero + * + * @tparam T_DataBox data box type with field data + * @param data position in the data box to compute derivative at + */ + template + HDINLINE typename T_DataBox::ValueType operator()(T_DataBox const& data) const + { + return T_DataBox::ValueType::create(0.0_X); + } + }; + + namespace traits + { + /** Functor type trait specialization for zero derivative + * + * @tparam T_direction direction to take derivative in, 0 = x, 1 = y, 2 = z + */ + template + struct DerivativeFunctor + : pmacc::meta::accessors::Identity> + { + }; + + } // namespace traits + } // namespace differentiation + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/laserProfiles/ExpRampWithPrepulse.def b/include/picongpu/fields/laserProfiles/ExpRampWithPrepulse.def index d4b213f82d..e6e9c59373 100644 --- a/include/picongpu/fields/laserProfiles/ExpRampWithPrepulse.def +++ b/include/picongpu/fields/laserProfiles/ExpRampWithPrepulse.def @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Ilja Goethel, Axel Huebl +/* Copyright 2018-2021 Ilja Goethel, Axel Huebl * * * This file is part of PIConGPU. @@ -25,153 +25,157 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace expRampWithPrepulse -{ -namespace defaults -{ - struct ExpRampWithPrepulseParam + namespace fields { - // Intensities of prepulse and exponential preramp - static constexpr float_X INT_RATIO_PREPULSE = 0.; - static constexpr float_X INT_RATIO_POINT_1 = 1.e-8; - static constexpr float_X INT_RATIO_POINT_2 = 1.e-4; - static constexpr float_X INT_RATIO_POINT_3 = 1.e-4; - - // time-positions of prepulse and preramps points - static constexpr float_64 TIME_PREPULSE_SI = -950.0e-15; - static constexpr float_64 TIME_PEAKPULSE_SI = 0.0e-15; - static constexpr float_64 TIME_POINT_1_SI = -1000.0e-15; - static constexpr float_64 TIME_POINT_2_SI = -300.0e-15; - static constexpr float_64 TIME_POINT_3_SI = -100.0e-15; - - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** UNITCONV */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - static constexpr float_64 _A0 = 20.; - - /** unit: Volt /meter */ - static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt /meter */ - //constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** Stretch temporal profile by a constant plateau between the up and downramp - * unit: seconds */ - static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 0.0 * WAVE_LENGTH_SI / ::picongpu::SI::SPEED_OF_LIGHT_SI; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 3.0e-14 / 2.35482; // half of the time in which E falls to half its initial value (then I falls to half its value in 15fs, approx 6 wavelengths). Those are 4.8 wavelenghts. - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * WO_X_SI is this distance in x-direction - * W0_Z_SI is this distance in z-direction - * if both values are equal, the laser has a circular shape in x-z - * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } - * [ 1.17741 ] - * unit: meter */ - static constexpr float_64 W0_X_SI = 2.5 * WAVE_LENGTH_SI; - static constexpr float_64 W0_Z_SI = W0_X_SI; - - /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before plateau - * and half at the end of the plateau - * unit: none */ - static constexpr float_64 RAMP_INIT = 16.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - /** Available polarisation types - */ - enum PolarisationType + namespace laserProfiles { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; -} // namespace defaults -} // namespace expRampWithPrepulse - - /** Wavepacket with spatial Gaussian envelope and adjustable temporal shape. - * - * Allows defining a prepulse and two regions of exponential preramp with - * independent slopes. The definition works by specifying three (t, intensity)- - * points, where time is counted from the very beginning in SI and the - * intensity (yes, intensity, not amplitude) is given in multiples of the main - * peak. - * - * Be careful - problematic for few cycle pulses. Thought the rest is cloned - * from laserWavepacket, the correctionFactor is not included (this made a - * correction to the laser phase, which is necessary for very short pulses, - * since otherwise a test particle is, after the laser pulse has passed, not - * returned to immobility, as it should). Since the analytical solution is - * only implemented for the Gaussian regime, and we have mostly exponential - * regimes here, it was not retained here. - * - * A Gaussian peak (optionally lengthened by a plateau) is preceded by - * two pieces of exponential preramps, defined by 3 (time, intensity)- - * -points. - * - * The first two points get connected by an exponential, the 2nd and - * 3rd point are connected by another exponential, which is then - * extrapolated to the peak. The Gaussian is added everywhere, but - * typically contributes significantly only near the peak. - * It is advisable to set the third point far enough from the plateau - * (approx 3*FWHM), then the contribution from the Gaussian is - * negligible there, and the intensity can be set as measured from the - * laser profile. - * - * Optionally a Gaussian prepulse can be added, given by the parameters - * of the relative intensity and time point. - * The time of the prepulse and the three preramp points are given in - * SI, the intensities are given as multiples of the peak intensity. - * - * @tparam T_Params class parameter to configure the Gaussian Beam profile, - * see members of - * expRampWithPrepulse::defaults::ExpRampWithPrepulseParam - * for required members - */ - template< typename T_Params = expRampWithPrepulse::defaults::ExpRampWithPrepulseParam > - struct ExpRampWithPrepulse; - -} // namespace laserProfiles -} // namespace fields + namespace expRampWithPrepulse + { + namespace defaults + { + struct ExpRampWithPrepulseParam + { + // Intensities of prepulse and exponential preramp + static constexpr float_X INT_RATIO_PREPULSE = 0.; + static constexpr float_X INT_RATIO_POINT_1 = 1.e-8; + static constexpr float_X INT_RATIO_POINT_2 = 1.e-4; + static constexpr float_X INT_RATIO_POINT_3 = 1.e-4; + + // time-positions of prepulse and preramps points + static constexpr float_64 TIME_PREPULSE_SI = -950.0e-15; + static constexpr float_64 TIME_PEAKPULSE_SI = 0.0e-15; + static constexpr float_64 TIME_POINT_1_SI = -1000.0e-15; + static constexpr float_64 TIME_POINT_2_SI = -300.0e-15; + static constexpr float_64 TIME_POINT_3_SI = -100.0e-15; + + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** UNITCONV */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + static constexpr float_64 _A0 = 20.; + + /** unit: Volt /meter */ + static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt /meter */ + // constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** Stretch temporal profile by a constant plateau between the up and downramp + * unit: seconds */ + static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI + = 0.0 * WAVE_LENGTH_SI / ::picongpu::SI::SPEED_OF_LIGHT_SI; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 3.0e-14 + / 2.35482; // half of the time in which E falls to half its initial value (then I falls to + // half its value in 15fs, approx 6 wavelengths). Those are 4.8 wavelenghts. + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * WO_X_SI is this distance in x-direction + * W0_Z_SI is this distance in z-direction + * if both values are equal, the laser has a circular shape in x-z + * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } + * [ 1.17741 ] + * unit: meter */ + static constexpr float_64 W0_X_SI = 2.5 * WAVE_LENGTH_SI; + static constexpr float_64 W0_Z_SI = W0_X_SI; + + /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before + * plateau and half at the end of the plateau unit: none */ + static constexpr float_64 RAMP_INIT = 16.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + /** Available polarisation types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + } // namespace defaults + } // namespace expRampWithPrepulse + + /** Wavepacket with spatial Gaussian envelope and adjustable temporal shape. + * + * Allows defining a prepulse and two regions of exponential preramp with + * independent slopes. The definition works by specifying three (t, intensity)- + * points, where time is counted from the very beginning in SI and the + * intensity (yes, intensity, not amplitude) is given in multiples of the main + * peak. + * + * Be careful - problematic for few cycle pulses. Thought the rest is cloned + * from laserWavepacket, the correctionFactor is not included (this made a + * correction to the laser phase, which is necessary for very short pulses, + * since otherwise a test particle is, after the laser pulse has passed, not + * returned to immobility, as it should). Since the analytical solution is + * only implemented for the Gaussian regime, and we have mostly exponential + * regimes here, it was not retained here. + * + * A Gaussian peak (optionally lengthened by a plateau) is preceded by + * two pieces of exponential preramps, defined by 3 (time, intensity)- + * -points. + * + * The first two points get connected by an exponential, the 2nd and + * 3rd point are connected by another exponential, which is then + * extrapolated to the peak. The Gaussian is added everywhere, but + * typically contributes significantly only near the peak. + * It is advisable to set the third point far enough from the plateau + * (approx 3*FWHM), then the contribution from the Gaussian is + * negligible there, and the intensity can be set as measured from the + * laser profile. + * + * Optionally a Gaussian prepulse can be added, given by the parameters + * of the relative intensity and time point. + * The time of the prepulse and the three preramp points are given in + * SI, the intensities are given as multiples of the peak intensity. + * + * @tparam T_Params class parameter to configure the Gaussian Beam profile, + * see members of + * expRampWithPrepulse::defaults::ExpRampWithPrepulseParam + * for required members + */ + template + struct ExpRampWithPrepulse; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/laserProfiles/ExpRampWithPrepulse.hpp b/include/picongpu/fields/laserProfiles/ExpRampWithPrepulse.hpp index 52cdf7af2a..a8878d249b 100644 --- a/include/picongpu/fields/laserProfiles/ExpRampWithPrepulse.hpp +++ b/include/picongpu/fields/laserProfiles/ExpRampWithPrepulse.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Ilja Goethel, Axel Huebl +/* Copyright 2018-2021 Ilja Goethel, Axel Huebl * * This file is part of PIConGPU. * @@ -27,370 +27,353 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace expRampWithPrepulse -{ - template< typename T_Params > - struct Unitless : public T_Params - { - using Params = T_Params; - - static constexpr float_X WAVE_LENGTH = float_X( Params::WAVE_LENGTH_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X PULSE_LENGTH = float_X( Params::PULSE_LENGTH_SI / UNIT_TIME ); // unit: seconds (1 sigma) - static constexpr float_X LASER_NOFOCUS_CONSTANT = float_X( Params::LASER_NOFOCUS_CONSTANT_SI / UNIT_TIME ); // unit: seconds - static constexpr float_X AMPLITUDE = float_X( Params::AMPLITUDE_SI / UNIT_EFIELD ); // unit: Volt /meter - static constexpr float_X W0_X = float_X( Params::W0_X_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X W0_Z = float_X( Params::W0_Z_SI / UNIT_LENGTH ); // unit: meter - - static constexpr float_64 TIME_PREPULSE = float_64( Params::TIME_PREPULSE_SI / UNIT_TIME ); - static constexpr float_64 TIME_PEAKPULSE = float_64( Params::TIME_PEAKPULSE_SI / UNIT_TIME ); - static constexpr float_64 TIME_1 = float_64( Params::TIME_POINT_1_SI / UNIT_TIME ); - static constexpr float_64 TIME_2 = float_64( Params::TIME_POINT_2_SI / UNIT_TIME ); - static constexpr float_64 TIME_3 = float_64( Params::TIME_POINT_3_SI / UNIT_TIME ); - static constexpr float_X endUpramp = TIME_PEAKPULSE - 0.5_X * LASER_NOFOCUS_CONSTANT; - static constexpr float_X startDownramp = TIME_PEAKPULSE + 0.5_X * LASER_NOFOCUS_CONSTANT; - - static constexpr float_X INIT_TIME = float_X( ( TIME_PEAKPULSE + Params::RAMP_INIT * PULSE_LENGTH ) / UNIT_TIME ); - - // compile-time checks for physical sanity: - static_assert( - ( TIME_1 < TIME_2 ) && ( TIME_2 < TIME_3 ) && ( TIME_3 < endUpramp ), - "The times in the parameters TIME_POINT_1/2/3 and the beginning of the plateau (which is at TIME_PEAKPULSE - 0.5*RAMP_INIT*PULSE_LENGTH) should be in ascending order" - ); - - // some prerequisites for check of intensities (approximate check, because I can't use exp and log) - static constexpr float_X ratio_dt = ( endUpramp - TIME_3 ) / ( TIME_3 - TIME_2 ); // ratio of time intervals - static constexpr float_X ri1 = Params::INT_RATIO_POINT_3 / Params::INT_RATIO_POINT_2; // first intensity ratio - static constexpr float_X ri2 = 0.2_X / Params::INT_RATIO_POINT_3; // second intensity ratio (0.2 is an arbitrary upper border for the intensity of the exp ramp) - - /* Approximate check, if ri1 ^ ratio_dt > ri2. That would mean, that the exponential curve through (time2, int2) and (time3, int3) lies above (endUpramp, 0.2) - * the power function is emulated by "rounding" the exponent to a rational number and expanding both sides by the common denominator, to get integer powers, see below - * for this, the range for ratio_dt is split into parts; the checked condition is "rounded down", i.e. it's weaker in every point of those ranges except one. - */ - static constexpr bool intensity_too_big = - ( ratio_dt >= 3._X && ri1 * ri1 * ri1 > ri2) || - ( ratio_dt >= 2._X && ri1 * ri1 > ri2) || - ( ratio_dt >= 1.5_X && ri1 * ri1 * ri1 > ri2 * ri2) || - ( ratio_dt >= 1._X && ri1 > ri2) || - ( ratio_dt >= 0.8_X && ri1 * ri1 * ri1 * ri1 > ri2 * ri2 * ri2 * ri2 * ri2 ) || - ( ratio_dt >= 0.75_X && ri1 * ri1 * ri1 > ri2 * ri2 * ri2 * ri2 ) || - ( ratio_dt >= 0.67_X && ri1 * ri1 > ri2 * ri2 * ri2 ) || - ( ratio_dt >= 0.6_X && ri1 * ri1 * ri1 > ri2 * ri2 * ri2 * ri2 * ri2 ) || - ( ratio_dt >= 0.5_X && ri1 > ri2 * ri2 ) || - ( ratio_dt >= 0.4_X && ri1 * ri1 > ri2 * ri2 * ri2 * ri2 * ri2 ) || - ( ratio_dt >= 0.33_X && ri1 > ri2 * ri2 * ri2 ) || - ( ratio_dt >= 0.25_X && ri1 > ri2 * ri2 * ri2 * ri2 ) || - ( ratio_dt >= 0.2_X && ri1 > ri2 * ri2 * ri2 * ri2 * ri2 ); - static_assert( - !intensity_too_big, - "The intensities of the ramp are very large - the extrapolation to the time of the main pulse would give more than half of the pulse amplitude. This is not a Gaussian pulse at all anymore - probably some of the parameters are different from what you think!?" - ); - - /* initialize the laser not in the first cell is equal to a negative shift - * in time - */ - static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; - - /* a symmetric pulse will be initialized at position z=0 for - * a time of RAMP_INIT * PULSE_LENGTH + LASER_NOFOCUS_CONSTANT = INIT_TIME. - * we shift the complete pulse for the half of this time to start with - * the front of the laser pulse. - */ - static constexpr float_X time_start_init = TIME_1 - ( 0.5 * Params::RAMP_INIT * PULSE_LENGTH ); - static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; - static constexpr float_64 w = 2.0 * PI * f; - }; -} // namespace expRampWithPrepulse - -namespace acc -{ - template< typename T_Unitless > - struct ExpRampWithPrepulse : public T_Unitless - { - using Unitless = T_Unitless; - - float3_X m_elong; - float_X m_phase; - typename FieldE::DataBoxType m_dataBoxE; - DataSpace< simDim > m_offsetToTotalDomain; - DataSpace< simDim > m_superCellToLocalOriginCellOffset; - - /** Device-Side Constructor - * - * @param superCellToLocalOriginCellOffset local offset in cells to current supercell - * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly after transform to centered origin) - */ - HDINLINE ExpRampWithPrepulse( - typename FieldE::DataBoxType const & dataBoxE, - DataSpace< simDim > const & superCellToLocalOriginCellOffset, - DataSpace< simDim > const & offsetToTotalDomain, - float3_X const & elong - ) : - m_elong( elong ), - m_dataBoxE( dataBoxE ), - m_offsetToTotalDomain( offsetToTotalDomain ), - m_superCellToLocalOriginCellOffset( superCellToLocalOriginCellOffset ) - { - } - - /** device side manipulation for init plane (transversal) - * - * @tparam T_Args type of the arguments passed to the user manipulator functor - * - * @param cellIndexInSuperCell ND cell index in current supercell - */ - template< typename T_Acc > - HDINLINE - void operator( )( - T_Acc const &, - DataSpace< simDim > const & cellIndexInSuperCell - ) - { - // coordinate system to global simulation as origin - DataSpace< simDim > const localCell( - cellIndexInSuperCell + - m_superCellToLocalOriginCellOffset - ); - - // transform coordinate system to center of x-z plane of initialization - constexpr uint8_t planeNormalDir = 1u; - DataSpace< simDim > offsetToCenterOfPlane( m_offsetToTotalDomain ); - offsetToCenterOfPlane[ planeNormalDir ] = 0; // do not shift origin of plane normal - floatD_X const pos = precisionCast< float_X >( localCell + offsetToCenterOfPlane ) * cellSize.shrink< simDim >(); - // @todo add half-cells via traits::FieldPosition< Solver::NumicalCellType, FieldE >() - - // transversal position only - float3_X const w0_3D( Unitless::W0_X, 0., Unitless::W0_Z ); - auto const w0( w0_3D.shrink< simDim >().remove< planeNormalDir >() ); - auto const pos_trans( pos.remove< planeNormalDir >() ); - auto const exp_compos( pos_trans * pos_trans / ( w0 * w0 ) ); - float_X const exp_arg( exp_compos.sumOfComponents() ); - - m_elong *= math::exp( -1.0_X * exp_arg ); - - if( Unitless::initPlaneY != 0 ) // compile time if - { - /* If the laser is not initialized in the first cell we emit a - * negatively and positively propagating wave. Therefore we need to multiply the - * amplitude with a correction factor depending of the cell size in - * propagation direction. - * The negatively propagating wave is damped by the absorber. - * - * The `correctionFactor` assume that the wave is moving in y direction. - */ - auto const correctionFactor = ( SPEED_OF_LIGHT * DELTA_T ) / CELL_HEIGHT * 2._X; - - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) += correctionFactor * m_elong; - } - else - { - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) = m_elong; - } - } - }; -} // namespace acc - - template< typename T_Params > - struct ExpRampWithPrepulse : public expRampWithPrepulse::Unitless< T_Params > + namespace fields { - using Unitless = expRampWithPrepulse::Unitless< T_Params >; - - float3_X elong; - float_X phase; - typename FieldE::DataBoxType dataBoxE; - DataSpace< simDim > offsetToTotalDomain; - - /** takes time t relative to the center of the Gaussian and returns value - * between 0 and 1, i.e. as multiple of the max value. - * use as: amp_t = amp_0 * gauss( t - t_0 ) - */ - HDINLINE float_X - gauss( float_X const t ) - { - float_X const exponent = t / float_X( Unitless::PULSE_LENGTH ); - return math::exp( -0.25_X * exponent * exponent ); - } - - /** get value of exponential curve through two points at given t - * t/t1/t2 given as float_X, since the envelope doesn't need the accuracy - */ - HDINLINE float_X - extrapolate_expo( - float_X const t1, - float_X const a1, - float_X const t2, - float_X const a2, - float_X const t - ) - { - const float_X log1 = ( t2 - t ) * math::log( a1 ); - const float_X log2 = ( t - t1 ) * math::log( a2 ); - return math::exp( ( log1 + log2 )/( t2 - t1 ) ); - } - - HINLINE float_X - get_envelope( float_X runTime ) + namespace laserProfiles { - float_X const AMP_PREPULSE = float_X( math::sqrt( Unitless::INT_RATIO_PREPULSE ) * Unitless::AMPLITUDE); - float_X const AMP_1 = float_X( math::sqrt( Unitless::INT_RATIO_POINT_1 ) * Unitless::AMPLITUDE ); - float_X const AMP_2 = float_X( math::sqrt( Unitless::INT_RATIO_POINT_2 ) * Unitless::AMPLITUDE ); - float_X const AMP_3 = float_X( math::sqrt( Unitless::INT_RATIO_POINT_3 ) * Unitless::AMPLITUDE ); - - float_X env = 0.0; - bool const before_preupramp = runTime < Unitless::time_start_init; - bool const before_start = runTime < Unitless::TIME_1; - bool const before_peakpulse = runTime < Unitless::endUpramp; - bool const during_first_exp = ( Unitless::TIME_1 < runTime ) && - ( runTime < Unitless::TIME_2 ); - bool const after_peakpulse = Unitless::startDownramp <= runTime; - - if( before_preupramp ) - env = 0.; - else if( before_start ) + namespace expRampWithPrepulse { - env = AMP_1 * gauss( runTime - Unitless::TIME_1 ); - } - else if( before_peakpulse ) + template + struct Unitless : public T_Params + { + using Params = T_Params; + + static constexpr float_X WAVE_LENGTH + = float_X(Params::WAVE_LENGTH_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X PULSE_LENGTH + = float_X(Params::PULSE_LENGTH_SI / UNIT_TIME); // unit: seconds (1 sigma) + static constexpr float_X LASER_NOFOCUS_CONSTANT + = float_X(Params::LASER_NOFOCUS_CONSTANT_SI / UNIT_TIME); // unit: seconds + static constexpr float_X AMPLITUDE + = float_X(Params::AMPLITUDE_SI / UNIT_EFIELD); // unit: Volt /meter + static constexpr float_X W0_X = float_X(Params::W0_X_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X W0_Z = float_X(Params::W0_Z_SI / UNIT_LENGTH); // unit: meter + + static constexpr float_64 TIME_PREPULSE = float_64(Params::TIME_PREPULSE_SI / UNIT_TIME); + static constexpr float_64 TIME_PEAKPULSE = float_64(Params::TIME_PEAKPULSE_SI / UNIT_TIME); + static constexpr float_64 TIME_1 = float_64(Params::TIME_POINT_1_SI / UNIT_TIME); + static constexpr float_64 TIME_2 = float_64(Params::TIME_POINT_2_SI / UNIT_TIME); + static constexpr float_64 TIME_3 = float_64(Params::TIME_POINT_3_SI / UNIT_TIME); + static constexpr float_X endUpramp = TIME_PEAKPULSE - 0.5_X * LASER_NOFOCUS_CONSTANT; + static constexpr float_X startDownramp = TIME_PEAKPULSE + 0.5_X * LASER_NOFOCUS_CONSTANT; + + static constexpr float_X INIT_TIME + = float_X((TIME_PEAKPULSE + Params::RAMP_INIT * PULSE_LENGTH) / UNIT_TIME); + + // compile-time checks for physical sanity: + static_assert( + (TIME_1 < TIME_2) && (TIME_2 < TIME_3) && (TIME_3 < endUpramp), + "The times in the parameters TIME_POINT_1/2/3 and the beginning of the plateau (which is at " + "TIME_PEAKPULSE - 0.5*RAMP_INIT*PULSE_LENGTH) should be in ascending order"); + + // some prerequisites for check of intensities (approximate check, because I can't use exp and log) + static constexpr float_X ratio_dt + = (endUpramp - TIME_3) / (TIME_3 - TIME_2); // ratio of time intervals + static constexpr float_X ri1 + = Params::INT_RATIO_POINT_3 / Params::INT_RATIO_POINT_2; // first intensity ratio + static constexpr float_X ri2 + = 0.2_X / Params::INT_RATIO_POINT_3; // second intensity ratio (0.2 is an arbitrary upper + // border for the intensity of the exp ramp) + + /* Approximate check, if ri1 ^ ratio_dt > ri2. That would mean, that the exponential curve through + * (time2, int2) and (time3, int3) lies above (endUpramp, 0.2) the power function is emulated by + * "rounding" the exponent to a rational number and expanding both sides by the common denominator, + * to get integer powers, see below for this, the range for ratio_dt is split into parts; the + * checked condition is "rounded down", i.e. it's weaker in every point of those ranges except one. + */ + static constexpr bool intensity_too_big = (ratio_dt >= 3._X && ri1 * ri1 * ri1 > ri2) + || (ratio_dt >= 2._X && ri1 * ri1 > ri2) || (ratio_dt >= 1.5_X && ri1 * ri1 * ri1 > ri2 * ri2) + || (ratio_dt >= 1._X && ri1 > ri2) + || (ratio_dt >= 0.8_X && ri1 * ri1 * ri1 * ri1 > ri2 * ri2 * ri2 * ri2 * ri2) + || (ratio_dt >= 0.75_X && ri1 * ri1 * ri1 > ri2 * ri2 * ri2 * ri2) + || (ratio_dt >= 0.67_X && ri1 * ri1 > ri2 * ri2 * ri2) + || (ratio_dt >= 0.6_X && ri1 * ri1 * ri1 > ri2 * ri2 * ri2 * ri2 * ri2) + || (ratio_dt >= 0.5_X && ri1 > ri2 * ri2) + || (ratio_dt >= 0.4_X && ri1 * ri1 > ri2 * ri2 * ri2 * ri2 * ri2) + || (ratio_dt >= 0.33_X && ri1 > ri2 * ri2 * ri2) + || (ratio_dt >= 0.25_X && ri1 > ri2 * ri2 * ri2 * ri2) + || (ratio_dt >= 0.2_X && ri1 > ri2 * ri2 * ri2 * ri2 * ri2); + static_assert( + !intensity_too_big, + "The intensities of the ramp are very large - the extrapolation to the time of the main pulse " + "would give more than half of the pulse amplitude. This is not a Gaussian pulse at all " + "anymore - probably some of the parameters are different from what you think!?"); + + /* initialize the laser not in the first cell is equal to a negative shift + * in time + */ + static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; + + /* a symmetric pulse will be initialized at position z=0 for + * a time of RAMP_INIT * PULSE_LENGTH + LASER_NOFOCUS_CONSTANT = INIT_TIME. + * we shift the complete pulse for the half of this time to start with + * the front of the laser pulse. + */ + static constexpr float_X time_start_init = TIME_1 - (0.5 * Params::RAMP_INIT * PULSE_LENGTH); + static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; + static constexpr float_64 w = 2.0 * PI * f; + }; + } // namespace expRampWithPrepulse + + namespace acc { - float_X const ramp_when_peakpulse = extrapolate_expo( - Unitless::TIME_2, - AMP_2, - Unitless::TIME_3, - AMP_3, - Unitless::endUpramp - ) / Unitless::AMPLITUDE; - - if( ramp_when_peakpulse > 0.5 ) + template + struct ExpRampWithPrepulse : public T_Unitless { - log< picLog::PHYSICS >( - "Attention, the intensities of the laser upramp are very large! " - "The extrapolation of the last exponential to the time of " - "the peakpulse gives more than half of the amplitude of " - "the peak Gaussian. This is not a Gaussian at all anymore, " - "and physically very unplausible, check the params for misunderstandings!" - ); - } - - env += Unitless::AMPLITUDE * ( 1._X - ramp_when_peakpulse ) * - gauss( runTime - Unitless::endUpramp ); - env += AMP_PREPULSE * gauss( runTime - Unitless::TIME_PREPULSE ); - if( during_first_exp ) - env += extrapolate_expo( - Unitless::TIME_1, - AMP_1, - Unitless::TIME_2, - AMP_2, - runTime - ); - else - env += extrapolate_expo( - Unitless::TIME_2, - AMP_2, - Unitless::TIME_3, - AMP_3, - runTime - ); - } - else if( !after_peakpulse ) - env = Unitless::AMPLITUDE; - else // after startDownramp - env = Unitless::AMPLITUDE * gauss( runTime - Unitless::startDownramp ); - return env; - } - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE ExpRampWithPrepulse( uint32_t currentStep ) - { - // get data - DataConnector & dc = Environment< >::get( ).DataConnector( ); - dataBoxE = dc.get< FieldE >( - FieldE::getName(), - true - )->getDeviceDataBox(); - - // get meta data for offsets - SubGrid< simDim > const & subGrid = Environment< simDim >::get().SubGrid(); - // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); - DataSpace< simDim > const globalCellOffset( subGrid.getLocalDomain().offset ); - DataSpace< simDim > const halfSimSize( subGrid.getGlobalDomain().size / 2 ); - - // transform coordinate system to center of global simulation as origin [cells] - offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; + using Unitless = T_Unitless; + + float3_X m_elong; + float_X m_phase; + typename FieldE::DataBoxType m_dataBoxE; + DataSpace m_offsetToTotalDomain; + DataSpace m_superCellToLocalOriginCellOffset; + + /** Device-Side Constructor + * + * @param superCellToLocalOriginCellOffset local offset in cells to current supercell + * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly + * after transform to centered origin) + */ + HDINLINE ExpRampWithPrepulse( + typename FieldE::DataBoxType const& dataBoxE, + DataSpace const& superCellToLocalOriginCellOffset, + DataSpace const& offsetToTotalDomain, + float3_X const& elong) + : m_elong(elong) + , m_dataBoxE(dataBoxE) + , m_offsetToTotalDomain(offsetToTotalDomain) + , m_superCellToLocalOriginCellOffset(superCellToLocalOriginCellOffset) + { + } + + /** device side manipulation for init plane (transversal) + * + * @tparam T_Args type of the arguments passed to the user manipulator functor + * + * @param cellIndexInSuperCell ND cell index in current supercell + */ + template + HDINLINE void operator()(T_Acc const&, DataSpace const& cellIndexInSuperCell) + { + // coordinate system to global simulation as origin + DataSpace const localCell(cellIndexInSuperCell + m_superCellToLocalOriginCellOffset); + + // transform coordinate system to center of x-z plane of initialization + constexpr uint8_t planeNormalDir = 1u; + DataSpace offsetToCenterOfPlane(m_offsetToTotalDomain); + offsetToCenterOfPlane[planeNormalDir] = 0; // do not shift origin of plane normal + floatD_X const pos + = precisionCast(localCell + offsetToCenterOfPlane) * cellSize.shrink(); + // @todo add half-cells via traits::FieldPosition< Solver::NumicalCellType, FieldE >() + + // transversal position only + float3_X const w0_3D(Unitless::W0_X, 0., Unitless::W0_Z); + auto const w0(w0_3D.shrink().remove()); + auto const pos_trans(pos.remove()); + auto const exp_compos(pos_trans * pos_trans / (w0 * w0)); + float_X const exp_arg(exp_compos.sumOfComponents()); + + m_elong *= math::exp(-1.0_X * exp_arg); + + if(Unitless::initPlaneY != 0) // compile time if + { + /* If the laser is not initialized in the first cell we emit a + * negatively and positively propagating wave. Therefore we need to multiply the + * amplitude with a correction factor depending of the cell size in + * propagation direction. + * The negatively propagating wave is damped by the absorber. + * + * The `correctionFactor` assume that the wave is moving in y direction. + */ + auto const correctionFactor = (SPEED_OF_LIGHT * DELTA_T) / CELL_HEIGHT * 2._X; + + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) + += correctionFactor * m_elong; + } + else + { + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) = m_elong; + } + } + }; + } // namespace acc + + template + struct ExpRampWithPrepulse : public expRampWithPrepulse::Unitless + { + using Unitless = expRampWithPrepulse::Unitless; - // @todo reset origin of direction of moving window - // offsetToTotalDomain.y() = 0 + float3_X elong; + float_X phase; + typename FieldE::DataBoxType dataBoxE; + DataSpace offsetToTotalDomain; - elong = float3_X::create( 0.0 ); + /** takes time t relative to the center of the Gaussian and returns value + * between 0 and 1, i.e. as multiple of the max value. + * use as: amp_t = amp_0 * gauss( t - t_0 ) + */ + HDINLINE float_X gauss(float_X const t) + { + float_X const exponent = t / float_X(Unitless::PULSE_LENGTH); + return math::exp(-0.25_X * exponent * exponent); + } - /* initialize the laser not in the first cell is equal to a negative shift - * in time - */ - const float_64 runTime = Unitless::time_start_init - Unitless::laserTimeShift + - DELTA_T * currentStep; + /** get value of exponential curve through two points at given t + * t/t1/t2 given as float_X, since the envelope doesn't need the accuracy + */ + HDINLINE float_X extrapolate_expo( + float_X const t1, + float_X const a1, + float_X const t2, + float_X const a2, + float_X const t) + { + const float_X log1 = (t2 - t) * math::log(a1); + const float_X log2 = (t - t1) * math::log(a2); + return math::exp((log1 + log2) / (t2 - t1)); + } - phase = float_X( Unitless::w * runTime ) + Unitless::LASER_PHASE; + HINLINE float_X get_envelope(float_X runTime) + { + /* workaround for clang 5 linker issues + * `undefined reference to + * `picongpu::fields::laserProfiles::ExpRampWithPrepulseParam::INT_RATIO_POINT_1'` + */ + constexpr auto int_ratio_prepule = Unitless::INT_RATIO_PREPULSE; + constexpr auto int_ratio_point_1 = Unitless::INT_RATIO_POINT_1; + constexpr auto int_ratio_point_2 = Unitless::INT_RATIO_POINT_2; + constexpr auto int_ratio_point_3 = Unitless::INT_RATIO_POINT_3; + float_X const AMP_PREPULSE = float_X(math::sqrt(int_ratio_prepule) * Unitless::AMPLITUDE); + float_X const AMP_1 = float_X(math::sqrt(int_ratio_point_1) * Unitless::AMPLITUDE); + float_X const AMP_2 = float_X(math::sqrt(int_ratio_point_2) * Unitless::AMPLITUDE); + float_X const AMP_3 = float_X(math::sqrt(int_ratio_point_3) * Unitless::AMPLITUDE); + + float_X env = 0.0; + bool const before_preupramp = runTime < Unitless::time_start_init; + bool const before_start = runTime < Unitless::TIME_1; + bool const before_peakpulse = runTime < Unitless::endUpramp; + bool const during_first_exp = (Unitless::TIME_1 < runTime) && (runTime < Unitless::TIME_2); + bool const after_peakpulse = Unitless::startDownramp <= runTime; + + if(before_preupramp) + env = 0.; + else if(before_start) + { + env = AMP_1 * gauss(runTime - Unitless::TIME_1); + } + else if(before_peakpulse) + { + float_X const ramp_when_peakpulse + = extrapolate_expo(Unitless::TIME_2, AMP_2, Unitless::TIME_3, AMP_3, Unitless::endUpramp) + / Unitless::AMPLITUDE; + + if(ramp_when_peakpulse > 0.5) + { + log( + "Attention, the intensities of the laser upramp are very large! " + "The extrapolation of the last exponential to the time of " + "the peakpulse gives more than half of the amplitude of " + "the peak Gaussian. This is not a Gaussian at all anymore, " + "and physically very unplausible, check the params for misunderstandings!"); + } + + env += Unitless::AMPLITUDE * (1._X - ramp_when_peakpulse) + * gauss(runTime - Unitless::endUpramp); + env += AMP_PREPULSE * gauss(runTime - Unitless::TIME_PREPULSE); + if(during_first_exp) + env += extrapolate_expo(Unitless::TIME_1, AMP_1, Unitless::TIME_2, AMP_2, runTime); + else + env += extrapolate_expo(Unitless::TIME_2, AMP_2, Unitless::TIME_3, AMP_3, runTime); + } + else if(!after_peakpulse) + env = Unitless::AMPLITUDE; + else // after startDownramp + env = Unitless::AMPLITUDE * gauss(runTime - Unitless::startDownramp); + return env; + } - float_X const envelope = get_envelope( runTime ); + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE ExpRampWithPrepulse(uint32_t currentStep) + { + // get data + DataConnector& dc = Environment<>::get().DataConnector(); + dataBoxE = dc.get(FieldE::getName(), true)->getDeviceDataBox(); + + // get meta data for offsets + SubGrid const& subGrid = Environment::get().SubGrid(); + // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); + DataSpace const globalCellOffset(subGrid.getLocalDomain().offset); + DataSpace const halfSimSize(subGrid.getGlobalDomain().size / 2); + + // transform coordinate system to center of global simulation as origin [cells] + offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; + + // @todo reset origin of direction of moving window + // offsetToTotalDomain.y() = 0 + + elong = float3_X::create(0.0); + + /* initialize the laser not in the first cell is equal to a negative shift + * in time + */ + const float_64 runTime + = Unitless::time_start_init - Unitless::laserTimeShift + DELTA_T * currentStep; + + phase = float_X(Unitless::w * runTime) + Unitless::LASER_PHASE; + + float_X const envelope = get_envelope(runTime); + + if(Unitless::Polarisation == Unitless::LINEAR_X) + { + elong.x() = envelope * math::sin(phase); + } + else if(Unitless::Polarisation == Unitless::LINEAR_Z) + { + elong.z() = envelope * math::sin(phase); + } + else if(Unitless::Polarisation == Unitless::CIRCULAR) + { + elong.x() = envelope / math::sqrt(2.0_X) * math::sin(phase); + elong.z() = envelope / math::sqrt(2.0_X) * math::cos(phase); + } + } - if( Unitless::Polarisation == Unitless::LINEAR_X ) - { - elong.x() = envelope * math::sin( phase ); - } - else if( Unitless::Polarisation == Unitless::LINEAR_Z ) - { - elong.z() = envelope * math::sin( phase ); - } - else if( Unitless::Polarisation == Unitless::CIRCULAR ) - { - elong.x() = envelope / math::sqrt( 2.0_X ) * math::sin( phase ); - elong.z() = envelope / math::sqrt( 2.0_X ) * math::cos( phase ); - } - } - - /** create device manipulator functor - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset (in supercells, without guards) to the - * origin of the local domain - * @param configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::ExpRampWithPrepulse< Unitless > - operator()( - T_Acc const &, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & - ) const - { - auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); - return acc::ExpRampWithPrepulse< Unitless >( dataBoxE, superCellToLocalOriginCellOffset, offsetToTotalDomain, elong ); - } - - //! get the name of the laser profile - static - HINLINE std::string - getName( ) - { - return "ExpRampWithPrepulse"; - } + /** create device manipulator functor + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset (in supercells, without guards) to the + * origin of the local domain + * @param configuration of the worker + */ + template + HDINLINE acc::ExpRampWithPrepulse operator()( + T_Acc const&, + DataSpace const& localSupercellOffset, + T_WorkerCfg const&) const + { + auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); + return acc::ExpRampWithPrepulse( + dataBoxE, + superCellToLocalOriginCellOffset, + offsetToTotalDomain, + elong); + } - }; + //! get the name of the laser profile + static HINLINE std::string getName() + { + return "ExpRampWithPrepulse"; + } + }; -} // namespace laserProfiles -} // namespace fields + } // namespace laserProfiles + } // namespace fields } // namespace picongpu - diff --git a/include/picongpu/fields/laserProfiles/GaussianBeam.def b/include/picongpu/fields/laserProfiles/GaussianBeam.def index 0c274ba2c6..6aba5d3bbf 100644 --- a/include/picongpu/fields/laserProfiles/GaussianBeam.def +++ b/include/picongpu/fields/laserProfiles/GaussianBeam.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Anton Helm, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Anton Helm, Rene Widera, * Richard Pausch, Alexander Debus * * This file is part of PIConGPU. @@ -25,114 +25,118 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace gaussianBeam -{ -namespace defaults -{ - //! Use only the 0th Laguerremode for a standard Gaussian - static constexpr uint32_t MODENUMBER = 0; - PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, 1.0); - // This is just an example for a more complicated set of Laguerre modes - //constexpr uint32_t MODENUMBER = 12; - //PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, -1.0, 0.0300519, 0.319461, -0.23783, 0.0954839, 0.0318653, -0.144547, 0.0249208, -0.111989, 0.0434385, -0.030038, -0.00896321, -0.0160788); - - struct GaussianBeamParam + namespace fields { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - //static constexpr float_64 _A0 = 1.5; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * at the focus position of the laser - * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } - * [ 1.17741 ] - * - * unit: meter */ - static constexpr float_64 W0_SI = 5.0e-6 / 1.17741; - /** the distance to the laser focus in y-direction - * unit: meter */ - static constexpr float_64 FOCUS_POS_SI = 4.62e-5; - - /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH - * - * unit: none */ - static constexpr float_64 PULSE_INIT = 20.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - using LAGUERREMODES_t = defaults::LAGUERREMODES_t; - static constexpr uint32_t MODENUMBER = defaults::MODENUMBER; - - /** Available polarisation types - */ - enum PolarisationType + namespace laserProfiles { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = CIRCULAR; - }; -} // namespace defaults -} // namespace gaussianBeam - - /** Gaussian Beam laser profile with finite pulse length - * - * @tparam T_Params class parameter to configure the Gaussian Beam profile, - * see members of gaussianBeam::default::GaussianBeamParam - * for required members - */ - template< typename T_Params = gaussianBeam::defaults::GaussianBeamParam > - struct GaussianBeam; - -} // namespace laserProfiles -} // namespace fields + namespace gaussianBeam + { + namespace defaults + { + //! Use only the 0th Laguerremode for a standard Gaussian + static constexpr uint32_t MODENUMBER = 0; + PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, 1.0); + // This is just an example for a more complicated set of Laguerre modes + // constexpr uint32_t MODENUMBER = 12; + // PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, -1.0, 0.0300519, 0.319461, -0.23783, + // 0.0954839, 0.0318653, -0.144547, 0.0249208, -0.111989, 0.0434385, -0.030038, -0.00896321, + // -0.0160788); + + struct GaussianBeamParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + // static constexpr float_64 _A0 = 1.5; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * at the focus position of the laser + * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } + * [ 1.17741 ] + * + * unit: meter */ + static constexpr float_64 W0_SI = 5.0e-6 / 1.17741; + /** the distance to the laser focus in y-direction + * unit: meter */ + static constexpr float_64 FOCUS_POS_SI = 4.62e-5; + + /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH + * + * unit: none */ + static constexpr float_64 PULSE_INIT = 20.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + using LAGUERREMODES_t = defaults::LAGUERREMODES_t; + static constexpr uint32_t MODENUMBER = defaults::MODENUMBER; + + /** Available polarisation types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = CIRCULAR; + }; + } // namespace defaults + } // namespace gaussianBeam + + /** Gaussian Beam laser profile with finite pulse length + * + * @tparam T_Params class parameter to configure the Gaussian Beam profile, + * see members of gaussianBeam::default::GaussianBeamParam + * for required members + */ + template + struct GaussianBeam; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/laserProfiles/GaussianBeam.hpp b/include/picongpu/fields/laserProfiles/GaussianBeam.hpp index 100684bb63..ab595c8d2a 100644 --- a/include/picongpu/fields/laserProfiles/GaussianBeam.hpp +++ b/include/picongpu/fields/laserProfiles/GaussianBeam.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Anton Helm, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Anton Helm, Rene Widera, * Richard Pausch, Alexander Debus * * This file is part of PIConGPU. @@ -28,338 +28,358 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace gaussianBeam -{ - template< typename T_Params > - struct Unitless : public T_Params - { - using Params = T_Params; - - static constexpr float_X WAVE_LENGTH = float_X( Params::WAVE_LENGTH_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X PULSE_LENGTH = float_X( Params::PULSE_LENGTH_SI / UNIT_TIME ); // unit: seconds (1 sigma) - static constexpr float_X AMPLITUDE = float_X( Params::AMPLITUDE_SI / UNIT_EFIELD ); // unit: Volt /meter - static constexpr float_X W0 = float_X( Params::W0_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X FOCUS_POS = float_X( Params::FOCUS_POS_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X INIT_TIME = float_X( ( Params::PULSE_INIT * Params::PULSE_LENGTH_SI ) / UNIT_TIME ); // unit: seconds (full initialization length) - - /* initialize the laser not in the first cell is equal to a negative shift - * in time - */ - static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; - - static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; - - }; -} // namespace gaussianBeam - -namespace acc -{ - template< typename T_Unitless > - struct GaussianBeam : public T_Unitless + namespace fields { - using Unitless = T_Unitless; - - float3_X m_elong; - float_X m_phase; - typename FieldE::DataBoxType m_dataBoxE; - DataSpace< simDim > m_offsetToTotalDomain; - DataSpace< simDim > m_superCellToLocalOriginCellOffset; - - /** Simple iteration algorithm to implement Laguerre polynomials for GPUs. - * - * @param n order of the Laguerre polynomial - * @param x coordinate at which the polynomial is evaluated - * @return ... - */ - HDINLINE float_X simpleLaguerre( const uint32_t n, const float_X x ) + namespace laserProfiles { - //Result for special case n == 0 - if (n == 0) return 1.0_X; - uint32_t currentN = 1; - float_X laguerreNMinus1 = 1.0_X; - float_X laguerreN = 1.0_X - x; - float_X laguerreNPlus1( 0.0_X ); - while (currentN < n) + namespace gaussianBeam { - //Core statement of the algorithm - laguerreNPlus1 = ( ( 2.0_X * float_X(currentN) + 1.0_X - x) * laguerreN - float_X(currentN) * laguerreNMinus1 ) / float_X(currentN + 1u); - //Advance by one order - laguerreNMinus1 = laguerreN; - laguerreN = laguerreNPlus1; - currentN++; - } - return laguerreN; - } - - /** Device-Side Constructor - * - * @param superCellToLocalOriginCellOffset local offset in cells to current supercell - * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly after transform to centered origin) - */ - HDINLINE GaussianBeam( - typename FieldE::DataBoxType const & dataBoxE, - DataSpace< simDim > const & superCellToLocalOriginCellOffset, - DataSpace< simDim > const & offsetToTotalDomain, - float3_X const & elong, - float_X const phase - ) : - m_elong( elong ), - m_phase( phase ), - m_dataBoxE( dataBoxE ), - m_offsetToTotalDomain( offsetToTotalDomain ), - m_superCellToLocalOriginCellOffset( superCellToLocalOriginCellOffset ) - { - } - - /** device side manipulation for init plane (transversal) - * - * @tparam T_Args type of the arguments passed to the user manipulator functor - * - * @param cellIndexInSuperCell ND cell index in current supercell - */ - template< typename T_Acc > - HDINLINE - void operator( )( - T_Acc const &, - DataSpace< simDim > const & cellIndexInSuperCell - ) - { - // coordinate system to global simulation as origin - DataSpace< simDim > const localCell( - cellIndexInSuperCell + - m_superCellToLocalOriginCellOffset - ); - - // transform coordinate system to center of x-z plane of initialization - constexpr uint8_t planeNormalDir = 1u; - DataSpace< simDim > offsetToCenterOfPlane( m_offsetToTotalDomain ); - offsetToCenterOfPlane[ planeNormalDir ] = 0; // do not shift origin of plane normal - floatD_X const pos = precisionCast< float_X >( localCell + offsetToCenterOfPlane ) * cellSize.shrink< simDim >(); - // @todo add half-cells via traits::FieldPosition< Solver::NumicalCellType, FieldE >() - - // transversal position only - floatD_X planeNoNormal = floatD_X::create( 1.0_X ); - planeNoNormal[ planeNormalDir ] = 0.0_X; - float_X const r2 = math::abs2( pos * planeNoNormal ); - - // calculate focus position relative to the laser initialization plane - float_X const focusPos = Unitless::FOCUS_POS - pos.y(); - - // rayleigh length (in y-direction) - float_X const y_R = float_X( PI ) * Unitless::W0 * Unitless::W0 / Unitless::WAVE_LENGTH; - - // inverse radius of curvature of the beam's wavefronts - float_X const R_y_inv = -focusPos / ( y_R * y_R + focusPos * focusPos ); - - // initialize temporary variables - float_X etrans( 0.0_X ); - float_X etrans_norm( 0.0_X ); - PMACC_CASSERT_MSG( - MODENUMBER_must_be_smaller_than_number_of_entries_in_LAGUERREMODES_vector, - Unitless::MODENUMBER < Unitless::LAGUERREMODES_t::dim - ); - for( uint32_t m = 0 ; m <= Unitless::MODENUMBER ; ++m ) - etrans_norm += typename Unitless::LAGUERREMODES_t{}[m]; - - // beam waist in the near field: w_y(y=0) == W0 - float_X const w_y = Unitless::W0 * algorithms::math::sqrt( 1.0_X + ( focusPos / y_R )*( focusPos / y_R ) ); - //! the Gouy phase shift - float_X const xi_y = algorithms::math::atan( -focusPos / y_R ); - - if( Unitless::Polarisation == Unitless::LINEAR_X || Unitless::Polarisation == Unitless::LINEAR_Z ) - { - for( uint32_t m = 0 ; m <= Unitless::MODENUMBER ; ++m ) + template + struct Unitless : public T_Params { - etrans += typename Unitless::LAGUERREMODES_t{}[m] * simpleLaguerre( m, 2.0_X * r2 / w_y / w_y ) - * math::exp( -r2 / w_y / w_y ) * math::cos( 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * focusPos - 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + ( 2._X * float_X( m ) + 1._X ) * xi_y + m_phase ) - * math::exp( -( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::WAVE_LENGTH ) - *( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::WAVE_LENGTH ) - / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0_X * Unitless::PULSE_LENGTH ) / ( 2.0_X * Unitless::PULSE_LENGTH ) ); - } - m_elong *= etrans / etrans_norm; - } - else if( Unitless::Polarisation == Unitless::CIRCULAR ) + using Params = T_Params; + + static constexpr float_X WAVE_LENGTH + = float_X(Params::WAVE_LENGTH_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X PULSE_LENGTH + = float_X(Params::PULSE_LENGTH_SI / UNIT_TIME); // unit: seconds (1 sigma) + static constexpr float_X AMPLITUDE + = float_X(Params::AMPLITUDE_SI / UNIT_EFIELD); // unit: Volt /meter + static constexpr float_X W0 = float_X(Params::W0_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X FOCUS_POS = float_X(Params::FOCUS_POS_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X INIT_TIME = float_X( + (Params::PULSE_INIT * Params::PULSE_LENGTH_SI) + / UNIT_TIME); // unit: seconds (full initialization length) + + /* initialize the laser not in the first cell is equal to a negative shift + * in time + */ + static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; + + static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; + }; + } // namespace gaussianBeam + + namespace acc { - for( uint32_t m = 0 ; m <= Unitless::MODENUMBER ; ++m ) + template + struct GaussianBeam : public T_Unitless { - etrans += typename Unitless::LAGUERREMODES_t{}[m] * simpleLaguerre( m, 2.0_X * r2 / w_y / w_y ) - * math::exp( -r2 / w_y / w_y ) * math::cos( 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * focusPos - 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + ( 2._X * float_X( m ) + 1._X ) * xi_y + m_phase ) - * math::exp( -( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::WAVE_LENGTH ) - *( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::WAVE_LENGTH ) - / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0_X * Unitless::PULSE_LENGTH ) / ( 2.0_X * Unitless::PULSE_LENGTH ) ); - } - m_elong.x() *= etrans / etrans_norm; - m_phase += float_X( PI / 2.0 ); - etrans = 0.0_X; - for( uint32_t m = 0 ; m <= Unitless::MODENUMBER ; ++m ) + using Unitless = T_Unitless; + + float3_X m_elong; + float_X m_phase; + typename FieldE::DataBoxType m_dataBoxE; + DataSpace m_offsetToTotalDomain; + DataSpace m_superCellToLocalOriginCellOffset; + + /** Simple iteration algorithm to implement Laguerre polynomials for GPUs. + * + * @param n order of the Laguerre polynomial + * @param x coordinate at which the polynomial is evaluated + * @return ... + */ + HDINLINE float_X simpleLaguerre(const uint32_t n, const float_X x) + { + // Result for special case n == 0 + if(n == 0) + return 1.0_X; + uint32_t currentN = 1; + float_X laguerreNMinus1 = 1.0_X; + float_X laguerreN = 1.0_X - x; + float_X laguerreNPlus1(0.0_X); + while(currentN < n) + { + // Core statement of the algorithm + laguerreNPlus1 = ((2.0_X * float_X(currentN) + 1.0_X - x) * laguerreN + - float_X(currentN) * laguerreNMinus1) + / float_X(currentN + 1u); + // Advance by one order + laguerreNMinus1 = laguerreN; + laguerreN = laguerreNPlus1; + currentN++; + } + return laguerreN; + } + + /** Device-Side Constructor + * + * @param superCellToLocalOriginCellOffset local offset in cells to current supercell + * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly + * after transform to centered origin) + */ + HDINLINE GaussianBeam( + typename FieldE::DataBoxType const& dataBoxE, + DataSpace const& superCellToLocalOriginCellOffset, + DataSpace const& offsetToTotalDomain, + float3_X const& elong, + float_X const phase) + : m_elong(elong) + , m_phase(phase) + , m_dataBoxE(dataBoxE) + , m_offsetToTotalDomain(offsetToTotalDomain) + , m_superCellToLocalOriginCellOffset(superCellToLocalOriginCellOffset) + { + } + + /** device side manipulation for init plane (transversal) + * + * @tparam T_Args type of the arguments passed to the user manipulator functor + * + * @param cellIndexInSuperCell ND cell index in current supercell + */ + template + HDINLINE void operator()(T_Acc const&, DataSpace const& cellIndexInSuperCell) + { + // coordinate system to global simulation as origin + DataSpace const localCell(cellIndexInSuperCell + m_superCellToLocalOriginCellOffset); + + // transform coordinate system to center of x-z plane of initialization + constexpr uint8_t planeNormalDir = 1u; + DataSpace offsetToCenterOfPlane(m_offsetToTotalDomain); + offsetToCenterOfPlane[planeNormalDir] = 0; // do not shift origin of plane normal + floatD_X const pos + = precisionCast(localCell + offsetToCenterOfPlane) * cellSize.shrink(); + // @todo add half-cells via traits::FieldPosition< Solver::NumicalCellType, FieldE >() + + // transversal position only + floatD_X planeNoNormal = floatD_X::create(1.0_X); + planeNoNormal[planeNormalDir] = 0.0_X; + float_X const r2 = pmacc::math::abs2(pos * planeNoNormal); + + // calculate focus position relative to the laser initialization plane + float_X const focusPos = Unitless::FOCUS_POS - pos.y(); + + // rayleigh length (in y-direction) + float_X const y_R = float_X(PI) * Unitless::W0 * Unitless::W0 / Unitless::WAVE_LENGTH; + + // inverse radius of curvature of the beam's wavefronts + float_X const R_y_inv = -focusPos / (y_R * y_R + focusPos * focusPos); + + // initialize temporary variables + float_X etrans(0.0_X); + float_X etrans_norm(0.0_X); + PMACC_CASSERT_MSG( + MODENUMBER_must_be_smaller_than_number_of_entries_in_LAGUERREMODES_vector, + Unitless::MODENUMBER < Unitless::LAGUERREMODES_t::dim); + for(uint32_t m = 0; m <= Unitless::MODENUMBER; ++m) + etrans_norm += typename Unitless::LAGUERREMODES_t{}[m]; + + // beam waist in the near field: w_y(y=0) == W0 + float_X const w_y = Unitless::W0 * math::sqrt(1.0_X + (focusPos / y_R) * (focusPos / y_R)); + //! the Gouy phase shift + float_X const xi_y = math::atan(-focusPos / y_R); + + if(Unitless::Polarisation == Unitless::LINEAR_X + || Unitless::Polarisation == Unitless::LINEAR_Z) + { + for(uint32_t m = 0; m <= Unitless::MODENUMBER; ++m) + { + etrans += typename Unitless::LAGUERREMODES_t{}[m] + * simpleLaguerre(m, 2.0_X * r2 / w_y / w_y) * math::exp(-r2 / w_y / w_y) + * math::cos( + 2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * focusPos + - 2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + + (2._X * float_X(m) + 1._X) * xi_y + m_phase) + * math::exp( + -(r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::WAVE_LENGTH) + * (r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::WAVE_LENGTH) + / SPEED_OF_LIGHT / SPEED_OF_LIGHT / (2.0_X * Unitless::PULSE_LENGTH) + / (2.0_X * Unitless::PULSE_LENGTH)); + } + m_elong *= etrans / etrans_norm; + } + else if(Unitless::Polarisation == Unitless::CIRCULAR) + { + for(uint32_t m = 0; m <= Unitless::MODENUMBER; ++m) + { + etrans += typename Unitless::LAGUERREMODES_t{}[m] + * simpleLaguerre(m, 2.0_X * r2 / w_y / w_y) * math::exp(-r2 / w_y / w_y) + * math::cos( + 2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * focusPos + - 2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + + (2._X * float_X(m) + 1._X) * xi_y + m_phase) + * math::exp( + -(r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::WAVE_LENGTH) + * (r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::WAVE_LENGTH) + / SPEED_OF_LIGHT / SPEED_OF_LIGHT / (2.0_X * Unitless::PULSE_LENGTH) + / (2.0_X * Unitless::PULSE_LENGTH)); + } + m_elong.x() *= etrans / etrans_norm; + m_phase += float_X(PI / 2.0); + etrans = 0.0_X; + for(uint32_t m = 0; m <= Unitless::MODENUMBER; ++m) + { + etrans += typename Unitless::LAGUERREMODES_t{}[m] + * simpleLaguerre(m, 2.0_X * r2 / w_y / w_y) * math::exp(-r2 / w_y / w_y) + * math::cos( + 2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * focusPos + - 2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + + (2._X * float_X(m) + 1._X) * xi_y + m_phase) + * math::exp( + -(r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::WAVE_LENGTH) + * (r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::WAVE_LENGTH) + / SPEED_OF_LIGHT / SPEED_OF_LIGHT / (2.0_X * Unitless::PULSE_LENGTH) + / (2.0_X * Unitless::PULSE_LENGTH)); + } + m_elong.z() *= etrans / etrans_norm; + // reminder: if you want to use phase below, substract pi/2 + // m_phase -= float_X( PI / 2.0 ); + } + + if(Unitless::initPlaneY != 0) // compile time if + { + /* If the laser is not initialized in the first cell we emit a + * negatively and positively propagating wave. Therefore we need to multiply the + * amplitude with a correction factor depending of the cell size in + * propagation direction. + * The negatively propagating wave is damped by the absorber. + * + * The `correctionFactor` assume that the wave is moving in y direction. + */ + auto const correctionFactor = (SPEED_OF_LIGHT * DELTA_T) / CELL_HEIGHT * 2._X; + + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) + += correctionFactor * m_elong; + } + else + { + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) = m_elong; + } + } + }; + } // namespace acc + + template + struct GaussianBeam : public gaussianBeam::Unitless + { + using Unitless = gaussianBeam::Unitless; + + float3_X elong; + float_X phase; + typename FieldE::DataBoxType dataBoxE; + DataSpace offsetToTotalDomain; + + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE GaussianBeam(uint32_t currentStep) { - etrans += typename Unitless::LAGUERREMODES_t{}[m] * simpleLaguerre( m, 2.0_X * r2 / w_y / w_y ) - * math::exp( -r2 / w_y / w_y ) * math::cos( 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * focusPos - 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + ( 2._X * float_X( m ) + 1._X ) * xi_y + m_phase ) - * math::exp( -( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::WAVE_LENGTH ) - *( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::WAVE_LENGTH ) - / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0_X * Unitless::PULSE_LENGTH ) / ( 2.0_X * Unitless::PULSE_LENGTH ) ); + // get data + DataConnector& dc = Environment<>::get().DataConnector(); + dataBoxE = dc.get(FieldE::getName(), true)->getDeviceDataBox(); + + // get meta data for offsets + SubGrid const& subGrid = Environment::get().SubGrid(); + // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); + DataSpace const globalCellOffset(subGrid.getLocalDomain().offset); + DataSpace const halfSimSize(subGrid.getGlobalDomain().size / 2); + + // transform coordinate system to center of global simulation as origin [cells] + offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; + + // @todo reset origin of direction of moving window + // offsetToTotalDomain.y() = 0 + + float_64 const runTime = DELTA_T * currentStep - Unitless::laserTimeShift; + + // calculate focus position relative to the laser initialization plane + float_X const focusPos = Unitless::FOCUS_POS - Unitless::initPlaneY * CELL_HEIGHT; + + elong = float3_X::create(0.0_X); + + // This check is done here on HOST, since std::numeric_limits::epsilon() does not compile + // on laserTransversal(), which is on DEVICE. + float_X etrans_norm(0.0_X); + + PMACC_CASSERT_MSG( + MODENUMBER_must_be_smaller_than_number_of_entries_in_LAGUERREMODES_vector, + Unitless::MODENUMBER < Unitless::LAGUERREMODES_t::dim); + for(uint32_t m = 0; m <= Unitless::MODENUMBER; ++m) + etrans_norm += typename Unitless::LAGUERREMODES_t{}[m]; + PMACC_VERIFY_MSG( + math::abs(etrans_norm) > std::numeric_limits::epsilon(), + "Sum of LAGUERREMODES can not be 0."); + + + // a symmetric pulse will be initialized at position z=0 for + // a time of PULSE_INIT * PULSE_LENGTH = INIT_TIME. + // we shift the complete pulse for the half of this time to start with + // the front of the laser pulse. + constexpr float_64 mue = 0.5 * Unitless::INIT_TIME; + + // rayleigh length (in y-direction) + constexpr float_64 y_R = PI * Unitless::W0 * Unitless::W0 / Unitless::WAVE_LENGTH; + // gaussian beam waist in the nearfield: w_y(y=0) == W0 + float_64 const w_y = Unitless::W0 * math::sqrt(1.0 + (focusPos / y_R) * (focusPos / y_R)); + + float_64 envelope = float_64(Unitless::AMPLITUDE); + if(simDim == DIM2) + envelope *= math::sqrt(float_64(Unitless::W0) / w_y); + else if(simDim == DIM3) + envelope *= float_64(Unitless::W0) / w_y; + /* no 1D representation/implementation */ + + if(Unitless::Polarisation == Unitless::LINEAR_X) + { + elong.x() = float_X(envelope); + } + else if(Unitless::Polarisation == Unitless::LINEAR_Z) + { + elong.z() = float_X(envelope); + } + else if(Unitless::Polarisation == Unitless::CIRCULAR) + { + elong.x() = float_X(envelope) / math::sqrt(2.0_X); + elong.z() = float_X(envelope) / math::sqrt(2.0_X); + } + + phase = 2.0_X * float_X(PI) * float_X(Unitless::f) + * (runTime - float_X(mue) - focusPos / SPEED_OF_LIGHT) + + Unitless::LASER_PHASE; } - m_elong.z() *= etrans / etrans_norm; - // reminder: if you want to use phase below, substract pi/2 - // m_phase -= float_X( PI / 2.0 ); - } - if( Unitless::initPlaneY != 0 ) // compile time if - { - /* If the laser is not initialized in the first cell we emit a - * negatively and positively propagating wave. Therefore we need to multiply the - * amplitude with a correction factor depending of the cell size in - * propagation direction. - * The negatively propagating wave is damped by the absorber. + /** create device manipulator functor * - * The `correctionFactor` assume that the wave is moving in y direction. + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset (in supercells, without guards) to the + * origin of the local domain + * @param configuration of the worker */ - auto const correctionFactor = ( SPEED_OF_LIGHT * DELTA_T ) / CELL_HEIGHT * 2._X; - - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) += correctionFactor * m_elong; - } - else - { - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) = m_elong; - } - } - }; -} // namespace acc - - template< typename T_Params > - struct GaussianBeam : public gaussianBeam::Unitless< T_Params > - { - using Unitless = gaussianBeam::Unitless< T_Params >; - - float3_X elong; - float_X phase; - typename FieldE::DataBoxType dataBoxE; - DataSpace< simDim > offsetToTotalDomain; - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE GaussianBeam( uint32_t currentStep ) - { - // get data - DataConnector & dc = Environment< >::get( ).DataConnector( ); - dataBoxE = dc.get< FieldE >( - FieldE::getName(), - true - )->getDeviceDataBox(); - - // get meta data for offsets - SubGrid< simDim > const & subGrid = Environment< simDim >::get().SubGrid(); - // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); - DataSpace< simDim > const globalCellOffset( subGrid.getLocalDomain().offset ); - DataSpace< simDim > const halfSimSize( subGrid.getGlobalDomain().size / 2 ); - - // transform coordinate system to center of global simulation as origin [cells] - offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; - - // @todo reset origin of direction of moving window - // offsetToTotalDomain.y() = 0 - - float_64 const runTime = DELTA_T * currentStep - Unitless::laserTimeShift; - - // calculate focus position relative to the laser initialization plane - float_X const focusPos = Unitless::FOCUS_POS - Unitless::initPlaneY * CELL_HEIGHT; - - elong = float3_X::create( 0.0_X ); - - // This check is done here on HOST, since std::numeric_limits::epsilon() does not compile on laserTransversal(), which is on DEVICE. - float_X etrans_norm( 0.0_X ); - - PMACC_CASSERT_MSG( - MODENUMBER_must_be_smaller_than_number_of_entries_in_LAGUERREMODES_vector, - Unitless::MODENUMBER < Unitless::LAGUERREMODES_t::dim - ); - for( uint32_t m = 0 ; m <= Unitless::MODENUMBER ; ++m ) - etrans_norm += typename Unitless::LAGUERREMODES_t{}[m]; - PMACC_VERIFY_MSG( - algorithms::math::abs( etrans_norm ) > std::numeric_limits< float_X >::epsilon(), - "Sum of LAGUERREMODES can not be 0." - ); - - - // a symmetric pulse will be initialized at position z=0 for - // a time of PULSE_INIT * PULSE_LENGTH = INIT_TIME. - // we shift the complete pulse for the half of this time to start with - // the front of the laser pulse. - constexpr float_64 mue = 0.5 * Unitless::INIT_TIME; - - // rayleigh length (in y-direction) - constexpr float_64 y_R = PI * Unitless::W0 * Unitless::W0 / Unitless::WAVE_LENGTH; - // gaussian beam waist in the nearfield: w_y(y=0) == W0 - float_64 const w_y = Unitless::W0 * math::sqrt( 1.0 + ( focusPos / y_R )*( focusPos / y_R ) ); - - float_64 envelope = float_64( Unitless::AMPLITUDE ); - if( simDim == DIM2 ) - envelope *= math::sqrt( float_64( Unitless::W0 ) / w_y ); - else if( simDim == DIM3 ) - envelope *= float_64( Unitless::W0 ) / w_y; - /* no 1D representation/implementation */ - - if( Unitless::Polarisation == Unitless::LINEAR_X ) - { - elong.x() = float_X( envelope ); - } - else if( Unitless::Polarisation == Unitless::LINEAR_Z ) - { - elong.z() = float_X( envelope ); - } - else if( Unitless::Polarisation == Unitless::CIRCULAR ) - { - elong.x() = float_X( envelope ) / math::sqrt( 2.0_X ); - elong.z() = float_X( envelope ) / math::sqrt( 2.0_X ); - } - - phase = 2.0_X * float_X( PI ) * float_X( Unitless::f ) * ( runTime - float_X( mue ) - focusPos / SPEED_OF_LIGHT ) + Unitless::LASER_PHASE; - } - - /** create device manipulator functor - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset (in supercells, without guards) to the - * origin of the local domain - * @param configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::GaussianBeam< Unitless > - operator()( - T_Acc const &, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & - ) const - { - auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); - return acc::GaussianBeam< Unitless >( dataBoxE, superCellToLocalOriginCellOffset, offsetToTotalDomain, elong, phase ); - } - - //! get the name of the laser profile - static - HINLINE std::string - getName( ) - { - return "GaussianBeam"; - } + template + HDINLINE acc::GaussianBeam operator()( + T_Acc const&, + DataSpace const& localSupercellOffset, + T_WorkerCfg const&) const + { + auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); + return acc::GaussianBeam( + dataBoxE, + superCellToLocalOriginCellOffset, + offsetToTotalDomain, + elong, + phase); + } - }; + //! get the name of the laser profile + static HINLINE std::string getName() + { + return "GaussianBeam"; + } + }; -} // namespace laserProfiles -} // namespace fields + } // namespace laserProfiles + } // namespace fields } // namespace picongpu - diff --git a/include/picongpu/fields/laserProfiles/None.def b/include/picongpu/fields/laserProfiles/None.def index 6674f57e03..42fcc3fd46 100644 --- a/include/picongpu/fields/laserProfiles/None.def +++ b/include/picongpu/fields/laserProfiles/None.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl +/* Copyright 2013-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -24,43 +24,43 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace none -{ -namespace defaults -{ - struct NoneParam + namespace fields { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.0; + namespace laserProfiles + { + namespace none + { + namespace defaults + { + struct NoneParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.0; - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = 0.0; + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = 0.0; - /** unit: s */ - static constexpr float_64 PULSE_LENGTH_SI = 0.0; + /** unit: s */ + static constexpr float_64 PULSE_LENGTH_SI = 0.0; - /** unit: cells */ - static constexpr uint32_t initPlaneY = 0u; - }; -} // namespace defaults -} // namespace none + /** unit: cells */ + static constexpr uint32_t initPlaneY = 0u; + }; + } // namespace defaults + } // namespace none - /** Empty laser profile - * - * Does not define a laser profile but provides some hard-coded constants - * that are accessed directly in some places. - * - * @tparam T_Params class parameter to configure the "no laser" profile, - * see members of none::defaults::NoneParam for required - * members - */ - template< typename T_Params = none::defaults::NoneParam > - struct None; + /** Empty laser profile + * + * Does not define a laser profile but provides some hard-coded constants + * that are accessed directly in some places. + * + * @tparam T_Params class parameter to configure the "no laser" profile, + * see members of none::defaults::NoneParam for required + * members + */ + template + struct None; -} // namespace laserProfiles -} // namespace fields + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/laserProfiles/None.hpp b/include/picongpu/fields/laserProfiles/None.hpp index 46e3ffa1e7..e3f66bb953 100644 --- a/include/picongpu/fields/laserProfiles/None.hpp +++ b/include/picongpu/fields/laserProfiles/None.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -27,92 +27,80 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace none -{ - template< typename T_Params > - struct Unitless : public T_Params + namespace fields { - using Params = T_Params; - - static constexpr float_X WAVE_LENGTH = float_X( Params::WAVE_LENGTH_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X PULSE_LENGTH = float_X( Params::PULSE_LENGTH_SI / UNIT_TIME ); // unit: seconds (1 sigma) - static constexpr float_X AMPLITUDE = float_X( Params::AMPLITUDE_SI / UNIT_EFIELD ); // unit: Volt /meter - static constexpr float_X INIT_TIME = 0.0_X; // unit: seconds (no initialization time) - }; -} // namespace none -namespace acc -{ - template< typename T_Unitless > - struct None : public T_Unitless - { - using Unitless = T_Unitless; - - /** Device-Side Constructor - */ - HDINLINE None() + namespace laserProfiles { - } + namespace none + { + template + struct Unitless : public T_Params + { + using Params = T_Params; - /** device side manipulation for init plane (transversal) - * - * @tparam T_Args type of the arguments passed to the user manipulator functor - */ - template< typename T_Acc > - HDINLINE - void operator( )( - T_Acc const &, - DataSpace< simDim > const & - ) - { - } - }; -} // namespace acc + static constexpr float_X WAVE_LENGTH + = float_X(Params::WAVE_LENGTH_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X PULSE_LENGTH + = float_X(Params::PULSE_LENGTH_SI / UNIT_TIME); // unit: seconds (1 sigma) + static constexpr float_X AMPLITUDE + = float_X(Params::AMPLITUDE_SI / UNIT_EFIELD); // unit: Volt /meter + static constexpr float_X INIT_TIME = 0.0_X; // unit: seconds (no initialization time) + }; + } // namespace none + namespace acc + { + template + struct None : public T_Unitless + { + using Unitless = T_Unitless; - template< typename T_Params > - struct None : public none::Unitless< T_Params > - { - using Unitless = none::Unitless< T_Params >; + /** Device-Side Constructor + */ + HDINLINE None() + { + } - /** constructor - */ - HINLINE None( uint32_t ) - { - } + /** device side manipulation for init plane (transversal) + * + * @tparam T_Args type of the arguments passed to the user manipulator functor + */ + template + HDINLINE void operator()(T_Acc const&, DataSpace const&) + { + } + }; + } // namespace acc - /** create device manipulator functor - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::None< Unitless > - operator()( - T_Acc const &, - DataSpace< simDim > const &, - T_WorkerCfg const & - ) const - { - return acc::None< Unitless >( ); - } + template + struct None : public none::Unitless + { + using Unitless = none::Unitless; - //! get the name of the laser profile - static - HINLINE std::string - getName( ) - { - return "None"; - } + /** constructor + */ + HINLINE None(uint32_t) + { + } - }; + /** create device manipulator functor + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + */ + template + HDINLINE acc::None operator()(T_Acc const&, DataSpace const&, T_WorkerCfg const&) + const + { + return acc::None(); + } -} // namespace laserProfiles -} // namespace fields -} // namespace picongpu + //! get the name of the laser profile + static HINLINE std::string getName() + { + return "None"; + } + }; + } // namespace laserProfiles + } // namespace fields +} // namespace picongpu diff --git a/include/picongpu/fields/laserProfiles/PlaneWave.def b/include/picongpu/fields/laserProfiles/PlaneWave.def index 38c6511beb..a73b137842 100644 --- a/include/picongpu/fields/laserProfiles/PlaneWave.def +++ b/include/picongpu/fields/laserProfiles/PlaneWave.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl +/* Copyright 2013-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -24,96 +24,98 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace planeWave -{ -namespace defaults -{ - struct PlaneWaveParam + namespace fields { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - static constexpr float_64 _A0 = 1.5; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** Stretch temporal profile by a constant plateau between the up and downramp - * unit: seconds */ - static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 13.34e-15; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before and after the plateau - * unit: none */ - static constexpr float_64 RAMP_INIT = 20.6146; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - /** Available polarization types - */ - enum PolarisationType + namespace laserProfiles { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; -} // namespace defaults -} // namespace planeWave - - /** Plane wave laser profile - * - * Defines a plane wave with temporally Gaussian envelope. - * - * @tparam T_Params class parameter to configure the plane wave profile, - * see members of planeWave::defaults::PlaneWaveParam for - * required members - */ - template< typename T_Params = planeWave::defaults::PlaneWaveParam > - struct PlaneWave; - -} // namespace laserProfiles -} // namespace fields + namespace planeWave + { + namespace defaults + { + struct PlaneWaveParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + static constexpr float_64 _A0 = 1.5; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** Stretch temporal profile by a constant plateau between the up and downramp + * unit: seconds */ + static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 13.34e-15; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before and + * after the plateau unit: none */ + static constexpr float_64 RAMP_INIT = 20.6146; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + /** Available polarization types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + } // namespace defaults + } // namespace planeWave + + /** Plane wave laser profile + * + * Defines a plane wave with temporally Gaussian envelope. + * + * @tparam T_Params class parameter to configure the plane wave profile, + * see members of planeWave::defaults::PlaneWaveParam for + * required members + */ + template + struct PlaneWave; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/laserProfiles/PlaneWave.hpp b/include/picongpu/fields/laserProfiles/PlaneWave.hpp index 426ac8f4bc..8cabf92f5a 100644 --- a/include/picongpu/fields/laserProfiles/PlaneWave.hpp +++ b/include/picongpu/fields/laserProfiles/PlaneWave.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -27,231 +27,225 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace planeWave -{ - template< typename T_Params > - struct Unitless : public T_Params - { - using Params = T_Params; - - static constexpr float_X WAVE_LENGTH = float_X( Params::WAVE_LENGTH_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X PULSE_LENGTH = float_X( Params::PULSE_LENGTH_SI / UNIT_TIME ); // unit: seconds (1 sigma) - static constexpr float_X LASER_NOFOCUS_CONSTANT = float_X( Params::LASER_NOFOCUS_CONSTANT_SI / UNIT_TIME ); // unit: seconds - static constexpr float_X AMPLITUDE = float_X( Params::AMPLITUDE_SI / UNIT_EFIELD ); // unit: Volt /meter - static constexpr float_X INIT_TIME = float_X( ( Params::RAMP_INIT * Params::PULSE_LENGTH_SI + Params::LASER_NOFOCUS_CONSTANT_SI ) / UNIT_TIME ); // unit: seconds (full inizialisation length) - - /* initialize the laser not in the first cell is equal to a negative shift - * in time - */ - static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; - - static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; - - }; -} // namespace planeWave - -namespace acc -{ - template< typename T_Unitless > - struct PlaneWave : public T_Unitless + namespace fields { - using Unitless = T_Unitless; - - float3_X m_elong; - typename FieldE::DataBoxType m_dataBoxE; - DataSpace< simDim > m_offsetToTotalDomain; - DataSpace< simDim > m_superCellToLocalOriginCellOffset; - - /** Device-Side Constructor - * - * @param superCellToLocalOriginCellOffset local offset in cells to current supercell - * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly after transform to centered origin) - */ - HDINLINE PlaneWave( - typename FieldE::DataBoxType const & dataBoxE, - DataSpace< simDim > const & superCellToLocalOriginCellOffset, - DataSpace< simDim > const & offsetToTotalDomain, - float3_X const & elong - ) : - m_elong( elong ), - m_dataBoxE( dataBoxE ), - m_offsetToTotalDomain( offsetToTotalDomain ), - m_superCellToLocalOriginCellOffset( superCellToLocalOriginCellOffset ) + namespace laserProfiles { - } - - /** device side manipulation for init plane (transversal) - * - * @tparam T_Args type of the arguments passed to the user manipulator functor - * - * @param cellIndexInSuperCell ND cell index in current supercell - */ - template< typename T_Acc > - HDINLINE - void operator( )( - T_Acc const &, - DataSpace< simDim > const & cellIndexInSuperCell - ) - { - // coordinate system to global simulation as origin - DataSpace< simDim > const localCell( - cellIndexInSuperCell + - m_superCellToLocalOriginCellOffset - ); - - if( Unitless::initPlaneY != 0 ) // compile time if - { - /* If the laser is not initialized in the first cell we emit a - * negatively and positively propagating wave. Therefore we need to multiply the - * amplitude with a correction factor depending of the cell size in - * propagation direction. - * The negatively propagating wave is damped by the absorber. - * - * The `correctionFactor` assume that the wave is moving in y direction. - */ - auto const correctionFactor = ( SPEED_OF_LIGHT * DELTA_T ) / CELL_HEIGHT * 2._X; - - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) += correctionFactor * m_elong; - } - else - { - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) = m_elong; - } - } - }; -} // namespace acc - - template< typename T_Params > - struct PlaneWave : public planeWave::Unitless< T_Params > - { - using Unitless = planeWave::Unitless< T_Params >; - - float3_X elong; - float_X phase; - typename FieldE::DataBoxType dataBoxE; - DataSpace< simDim > offsetToTotalDomain; - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE PlaneWave( uint32_t currentStep ) : - phase( 0.0_X ) - { - // get data - DataConnector & dc = Environment< >::get( ).DataConnector( ); - dataBoxE = dc.get< FieldE >( - FieldE::getName(), - true - )->getDeviceDataBox(); - - // get meta data for offsets - SubGrid< simDim > const & subGrid = Environment< simDim >::get().SubGrid(); - // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); - DataSpace< simDim > const globalCellOffset( subGrid.getLocalDomain().offset ); - DataSpace< simDim > const halfSimSize( subGrid.getGlobalDomain().size / 2 ); - - // transform coordinate system to center of global simulation as origin [cells] - offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; - - // @todo reset origin of direction of moving window - // offsetToTotalDomain.y() = 0 - - float_64 const runTime = DELTA_T * currentStep - Unitless::laserTimeShift; - - elong = float3_X::create( 0.0 ); - - float_64 envelope = float_64( Unitless::AMPLITUDE ); - - float_64 const mue = 0.5 * Unitless::RAMP_INIT * Unitless::PULSE_LENGTH; - - float_64 const w = 2.0 * PI * Unitless::f; - float_64 const tau = Unitless::PULSE_LENGTH * math::sqrt( 2.0 ); - - float_64 const endUpramp = mue; - float_64 const startDownramp = mue + Unitless::LASER_NOFOCUS_CONSTANT; - - float_64 integrationCorrectionFactor = 0.0; - - if( runTime > startDownramp ) + namespace planeWave { - // downramp = end - float_64 const exponent = ( runTime - startDownramp ) / tau; - envelope *= exp( -0.5 * exponent * exponent ); - integrationCorrectionFactor = ( runTime - startDownramp )/ ( w * tau * tau ); - } - else if( runTime < endUpramp ) + template + struct Unitless : public T_Params + { + using Params = T_Params; + + static constexpr float_X WAVE_LENGTH + = float_X(Params::WAVE_LENGTH_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X PULSE_LENGTH + = float_X(Params::PULSE_LENGTH_SI / UNIT_TIME); // unit: seconds (1 sigma) + static constexpr float_X LASER_NOFOCUS_CONSTANT + = float_X(Params::LASER_NOFOCUS_CONSTANT_SI / UNIT_TIME); // unit: seconds + static constexpr float_X AMPLITUDE + = float_X(Params::AMPLITUDE_SI / UNIT_EFIELD); // unit: Volt /meter + static constexpr float_X INIT_TIME = float_X( + (Params::RAMP_INIT * Params::PULSE_LENGTH_SI + Params::LASER_NOFOCUS_CONSTANT_SI) + / UNIT_TIME); // unit: seconds (full inizialisation length) + + /* initialize the laser not in the first cell is equal to a negative shift + * in time + */ + static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; + + static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; + }; + } // namespace planeWave + + namespace acc { - // upramp = start - float_64 const exponent = ( runTime - endUpramp ) / tau; - envelope *= exp( -0.5 * exponent * exponent ); - integrationCorrectionFactor = ( runTime - endUpramp )/ ( w * tau * tau ); - } - - float_64 const timeOszi = runTime - endUpramp; - float_64 const t_and_phase = w * timeOszi + Unitless::LASER_PHASE; - // to understand both components [sin(...) + t/tau^2 * cos(...)] see description above - if( Unitless::Polarisation == Unitless::LINEAR_X ) + template + struct PlaneWave : public T_Unitless + { + using Unitless = T_Unitless; + + float3_X m_elong; + typename FieldE::DataBoxType m_dataBoxE; + DataSpace m_offsetToTotalDomain; + DataSpace m_superCellToLocalOriginCellOffset; + + /** Device-Side Constructor + * + * @param superCellToLocalOriginCellOffset local offset in cells to current supercell + * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly + * after transform to centered origin) + */ + HDINLINE PlaneWave( + typename FieldE::DataBoxType const& dataBoxE, + DataSpace const& superCellToLocalOriginCellOffset, + DataSpace const& offsetToTotalDomain, + float3_X const& elong) + : m_elong(elong) + , m_dataBoxE(dataBoxE) + , m_offsetToTotalDomain(offsetToTotalDomain) + , m_superCellToLocalOriginCellOffset(superCellToLocalOriginCellOffset) + { + } + + /** device side manipulation for init plane (transversal) + * + * @tparam T_Args type of the arguments passed to the user manipulator functor + * + * @param cellIndexInSuperCell ND cell index in current supercell + */ + template + HDINLINE void operator()(T_Acc const&, DataSpace const& cellIndexInSuperCell) + { + // coordinate system to global simulation as origin + DataSpace const localCell(cellIndexInSuperCell + m_superCellToLocalOriginCellOffset); + + if(Unitless::initPlaneY != 0) // compile time if + { + /* If the laser is not initialized in the first cell we emit a + * negatively and positively propagating wave. Therefore we need to multiply the + * amplitude with a correction factor depending of the cell size in + * propagation direction. + * The negatively propagating wave is damped by the absorber. + * + * The `correctionFactor` assume that the wave is moving in y direction. + */ + auto const correctionFactor = (SPEED_OF_LIGHT * DELTA_T) / CELL_HEIGHT * 2._X; + + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) + += correctionFactor * m_elong; + } + else + { + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) = m_elong; + } + } + }; + } // namespace acc + + template + struct PlaneWave : public planeWave::Unitless { - elong.x() = float_X( envelope * ( math::sin( t_and_phase ) - + math::cos( t_and_phase ) * integrationCorrectionFactor ) ); - } - else if( Unitless::Polarisation == Unitless::LINEAR_Z) - { - elong.z() = float_X( envelope * ( math::sin( t_and_phase ) - + math::cos( t_and_phase ) * integrationCorrectionFactor ) ); - } - else if( Unitless::Polarisation == Unitless::CIRCULAR ) - { - elong.x() = float_X( envelope / math::sqrt(2.0) * ( math::sin( t_and_phase ) - + math::cos( t_and_phase ) * integrationCorrectionFactor)); - elong.z() = float_X( envelope / math::sqrt(2.0) * ( math::cos( t_and_phase ) - - math::sin( t_and_phase ) * integrationCorrectionFactor ) ); - } - } - - /** create device manipulator functor - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset (in supercells, without guards) to the - * origin of the local domain - * @param configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::PlaneWave< Unitless > - operator()( - T_Acc const &, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & - ) const - { - auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); - return acc::PlaneWave< Unitless >( dataBoxE, superCellToLocalOriginCellOffset, offsetToTotalDomain, elong ); - } - - //! get the name of the laser profile - static - HINLINE std::string - getName( ) - { - return "PlaneWave"; - } + using Unitless = planeWave::Unitless; - }; + float3_X elong; + float_X phase; + typename FieldE::DataBoxType dataBoxE; + DataSpace offsetToTotalDomain; -} // namespace laserProfiles -} // namespace fields + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE PlaneWave(uint32_t currentStep) : phase(0.0_X) + { + // get data + DataConnector& dc = Environment<>::get().DataConnector(); + dataBoxE = dc.get(FieldE::getName(), true)->getDeviceDataBox(); + + // get meta data for offsets + SubGrid const& subGrid = Environment::get().SubGrid(); + // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); + DataSpace const globalCellOffset(subGrid.getLocalDomain().offset); + DataSpace const halfSimSize(subGrid.getGlobalDomain().size / 2); + + // transform coordinate system to center of global simulation as origin [cells] + offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; + + // @todo reset origin of direction of moving window + // offsetToTotalDomain.y() = 0 + + float_64 const runTime = DELTA_T * currentStep - Unitless::laserTimeShift; + + elong = float3_X::create(0.0); + + float_64 envelope = float_64(Unitless::AMPLITUDE); + + float_64 const mue = 0.5 * Unitless::RAMP_INIT * Unitless::PULSE_LENGTH; + + float_64 const w = 2.0 * PI * Unitless::f; + float_64 const tau = Unitless::PULSE_LENGTH * math::sqrt(2.0); + + float_64 const endUpramp = mue; + float_64 const startDownramp = mue + Unitless::LASER_NOFOCUS_CONSTANT; + + float_64 integrationCorrectionFactor = 0.0; + + if(runTime > startDownramp) + { + // downramp = end + float_64 const exponent = (runTime - startDownramp) / tau; + envelope *= exp(-0.5 * exponent * exponent); + integrationCorrectionFactor = (runTime - startDownramp) / (w * tau * tau); + } + else if(runTime < endUpramp) + { + // upramp = start + float_64 const exponent = (runTime - endUpramp) / tau; + envelope *= exp(-0.5 * exponent * exponent); + integrationCorrectionFactor = (runTime - endUpramp) / (w * tau * tau); + } + + float_64 const timeOszi = runTime - endUpramp; + float_64 const t_and_phase = w * timeOszi + Unitless::LASER_PHASE; + // to understand both components [sin(...) + t/tau^2 * cos(...)] see description above + if(Unitless::Polarisation == Unitless::LINEAR_X) + { + elong.x() = float_X( + envelope + * (math::sin(t_and_phase) + math::cos(t_and_phase) * integrationCorrectionFactor)); + } + else if(Unitless::Polarisation == Unitless::LINEAR_Z) + { + elong.z() = float_X( + envelope + * (math::sin(t_and_phase) + math::cos(t_and_phase) * integrationCorrectionFactor)); + } + else if(Unitless::Polarisation == Unitless::CIRCULAR) + { + elong.x() = float_X( + envelope / math::sqrt(2.0) + * (math::sin(t_and_phase) + math::cos(t_and_phase) * integrationCorrectionFactor)); + elong.z() = float_X( + envelope / math::sqrt(2.0) + * (math::cos(t_and_phase) - math::sin(t_and_phase) * integrationCorrectionFactor)); + } + } + + /** create device manipulator functor + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset (in supercells, without guards) to the + * origin of the local domain + * @param configuration of the worker + */ + template + HDINLINE acc::PlaneWave operator()( + T_Acc const&, + DataSpace const& localSupercellOffset, + T_WorkerCfg const&) const + { + auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); + return acc::PlaneWave( + dataBoxE, + superCellToLocalOriginCellOffset, + offsetToTotalDomain, + elong); + } + + //! get the name of the laser profile + static HINLINE std::string getName() + { + return "PlaneWave"; + } + }; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu - diff --git a/include/picongpu/fields/laserProfiles/Polynom.def b/include/picongpu/fields/laserProfiles/Polynom.def index 205a57f697..f45c72e14c 100644 --- a/include/picongpu/fields/laserProfiles/Polynom.def +++ b/include/picongpu/fields/laserProfiles/Polynom.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, Axel Huebl +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, Axel Huebl * * This file is part of PIConGPU. * @@ -24,96 +24,98 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace polynom -{ -namespace defaults -{ - struct PolynomParam + namespace fields { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - //static constexpr float_64 _A0 = 1.5; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 4.0e-15; - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * at the focus position of the laser - * unit: meter - */ - static constexpr float_64 W0_X_SI = 4.246e-6; // waist in x-direction - static constexpr float_64 W0_Z_SI = W0_X_SI; // waist in z-direction - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - /** Available polarization types - */ - enum PolarisationType + namespace laserProfiles { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; -} // namespace defaults -} // namespace gaussianBeam - - /** Wavepacket with a polynomial temporal intensity shape. - * - * Based on a wavepacket with Gaussian spatial envelope. - * - * @tparam T_Params class parameter to configure the polynomial laser profile, - * see members of polynom::defaults::PolynomParam for - * required members - */ - template< typename T_Params = polynom::defaults::PolynomParam > - struct Polynom; - -} // namespace laserProfiles -} // namespace fields + namespace polynom + { + namespace defaults + { + struct PolynomParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + // static constexpr float_64 _A0 = 1.5; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 4.0e-15; + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * at the focus position of the laser + * unit: meter + */ + static constexpr float_64 W0_X_SI = 4.246e-6; // waist in x-direction + static constexpr float_64 W0_Z_SI = W0_X_SI; // waist in z-direction + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + /** Available polarization types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + } // namespace defaults + } // namespace polynom + + /** Wavepacket with a polynomial temporal intensity shape. + * + * Based on a wavepacket with Gaussian spatial envelope. + * + * @tparam T_Params class parameter to configure the polynomial laser profile, + * see members of polynom::defaults::PolynomParam for + * required members + */ + template + struct Polynom; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/laserProfiles/Polynom.hpp b/include/picongpu/fields/laserProfiles/Polynom.hpp index f3a8b2af1b..6c3c370fb9 100644 --- a/include/picongpu/fields/laserProfiles/Polynom.hpp +++ b/include/picongpu/fields/laserProfiles/Polynom.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, Axel Huebl +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, Axel Huebl * * This file is part of PIConGPU. * @@ -27,244 +27,234 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace polynom -{ - template< typename T_Params > - struct Unitless : public T_Params - { - using Params = T_Params; - - static constexpr float_X WAVE_LENGTH = float_X( Params::WAVE_LENGTH_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X PULSE_LENGTH = float_X( Params::PULSE_LENGTH_SI / UNIT_TIME ); // unit: seconds (1 sigma) - static constexpr float_X AMPLITUDE = float_X( Params::AMPLITUDE_SI / UNIT_EFIELD ); // unit: Volt /meter - static constexpr float_X W0_X = float_X( Params::W0_X_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X W0_Z = float_X( Params::W0_Z_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X INIT_TIME = float_X( Params::PULSE_LENGTH_SI / UNIT_TIME ); // unit: seconds (full initialization length) - - /* initialize the laser not in the first cell is equal to a negative shift - * in time - */ - static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; - - static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; - - }; -} // namespace polynom - -namespace acc -{ - template< typename T_Unitless > - struct Polynom : public T_Unitless + namespace fields { - using Unitless = T_Unitless; - - float3_X m_elong; - float_X m_phase; - typename FieldE::DataBoxType m_dataBoxE; - DataSpace< simDim > m_offsetToTotalDomain; - DataSpace< simDim > m_superCellToLocalOriginCellOffset; - - /** Device-Side Constructor - * - * @param superCellToLocalOriginCellOffset local offset in cells to current supercell - * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly after transform to centered origin) - */ - HDINLINE Polynom( - typename FieldE::DataBoxType const & dataBoxE, - DataSpace< simDim > const & superCellToLocalOriginCellOffset, - DataSpace< simDim > const & offsetToTotalDomain, - float3_X const & elong, - float_X const phase - ) : - m_elong( elong ), - m_phase( phase ), - m_dataBoxE( dataBoxE ), - m_offsetToTotalDomain( offsetToTotalDomain ), - m_superCellToLocalOriginCellOffset( superCellToLocalOriginCellOffset ) - { - } - - /** device side manipulation for init plane (transversal) - * - * @tparam T_Args type of the arguments passed to the user manipulator functor - * - * @param cellIndexInSuperCell ND cell index in current supercell - */ - template< typename T_Acc > - HDINLINE - void operator( )( - T_Acc const &, - DataSpace< simDim > const & cellIndexInSuperCell - ) - { - // coordinate system to global simulation as origin - DataSpace< simDim > const localCell( - cellIndexInSuperCell + - m_superCellToLocalOriginCellOffset - ); - - // transform coordinate system to center of x-z plane of initialization - constexpr uint8_t planeNormalDir = 1u; - DataSpace< simDim > offsetToCenterOfPlane( m_offsetToTotalDomain ); - offsetToCenterOfPlane[ planeNormalDir ] = 0; // do not shift origin of plane normal - floatD_X const pos = precisionCast< float_X >( localCell + offsetToCenterOfPlane ) * cellSize.shrink< simDim >(); - // @todo add half-cells via traits::FieldPosition< Solver::NumicalCellType, FieldE >() - - // transversal position only - float3_X const w0_3D( Unitless::W0_X, 0., Unitless::W0_Z ); - auto const w0( w0_3D.shrink< simDim >().remove< planeNormalDir >() ); - auto const pos_trans( pos.remove< planeNormalDir >() ); - auto const exp_compos( pos_trans * pos_trans / ( w0 * w0 ) ); - float_X const exp_arg( exp_compos.sumOfComponents() ); - - m_elong *= math::exp( -1.0_X * exp_arg ); - - if( Unitless::initPlaneY != 0 ) // compile time if - { - /* If the laser is not initialized in the first cell we emit a - * negatively and positively propagating wave. Therefore we need to multiply the - * amplitude with a correction factor depending of the cell size in - * propagation direction. - * The negatively propagating wave is damped by the absorber. - * - * The `correctionFactor` assume that the wave is moving in y direction. - */ - auto const correctionFactor = ( SPEED_OF_LIGHT * DELTA_T ) / CELL_HEIGHT * 2._X; - - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) += correctionFactor * m_elong; - } - else - { - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) = m_elong; - } - } - }; -} // namespace acc - - template< typename T_Params > - struct Polynom : public polynom::Unitless< T_Params > - { - using Unitless = polynom::Unitless< T_Params >; - - float3_X elong; - float_X phase; - typename FieldE::DataBoxType dataBoxE; - DataSpace< simDim > offsetToTotalDomain; - - HDINLINE float_X - Tpolynomial( float_X const tau ) + namespace laserProfiles { - float_X result( 0.0_X ); - if( tau >= 0.0_X && tau <= 1.0_X ) - result = tau * tau * tau * ( 10.0_X - 15.0_X * tau + 6.0_X * tau * tau ); - else if( tau > 1.0_X && tau <= 2.0_X ) - result = ( 2.0_X - tau ) * ( 2.0_X - tau ) * ( 2.0_X - tau ) * ( 4.0_X - 9.0_X * tau + 6.0_X * tau * tau ); - - return result; - } - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE Polynom( uint32_t currentStep ) : - phase( 0.0_X ) - { - // get data - DataConnector & dc = Environment< >::get( ).DataConnector( ); - dataBoxE = dc.get< FieldE >( - FieldE::getName(), - true - )->getDeviceDataBox(); - - // get meta data for offsets - SubGrid< simDim > const & subGrid = Environment< simDim >::get().SubGrid(); - // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); - DataSpace< simDim > const globalCellOffset( subGrid.getLocalDomain().offset ); - DataSpace< simDim > const halfSimSize( subGrid.getGlobalDomain().size / 2 ); - - // transform coordinate system to center of global simulation as origin [cells] - offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; - - // @todo reset origin of direction of moving window - // offsetToTotalDomain.y() = 0 - - float_64 const runTime = DELTA_T * currentStep - Unitless::laserTimeShift; - - elong = float3_X::create( 0.0_X ); - - /* a symmetric pulse will be initialized at position z=0 - * the laser amplitude rises for t_rise - * and falls for t_rise - * making the laser pulse 2*t_rise long - */ - - const float_X t_rise = 0.5_X * Unitless::PULSE_LENGTH; - const float_X tau = runTime / t_rise; - - const float_X omegaLaser = 2.0_X * PI * Unitless::f; - - if( Unitless::Polarisation == Unitless::LINEAR_X ) + namespace polynom { - elong.x() = Unitless::AMPLITUDE * Tpolynomial( tau ) * - math::sin( omegaLaser * ( runTime - t_rise ) + Unitless::LASER_PHASE ); - } - else if( Unitless::Polarisation == Unitless::LINEAR_Z ) + template + struct Unitless : public T_Params + { + using Params = T_Params; + + static constexpr float_X WAVE_LENGTH + = float_X(Params::WAVE_LENGTH_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X PULSE_LENGTH + = float_X(Params::PULSE_LENGTH_SI / UNIT_TIME); // unit: seconds (1 sigma) + static constexpr float_X AMPLITUDE + = float_X(Params::AMPLITUDE_SI / UNIT_EFIELD); // unit: Volt /meter + static constexpr float_X W0_X = float_X(Params::W0_X_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X W0_Z = float_X(Params::W0_Z_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X INIT_TIME + = float_X(Params::PULSE_LENGTH_SI / UNIT_TIME); // unit: seconds (full initialization length) + + /* initialize the laser not in the first cell is equal to a negative shift + * in time + */ + static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; + + static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; + }; + } // namespace polynom + + namespace acc { - elong.z() = Unitless::AMPLITUDE * Tpolynomial( tau ) * - math::sin( omegaLaser * ( runTime - t_rise ) + Unitless::LASER_PHASE ); - } - else if( Unitless::Polarisation == Unitless::CIRCULAR ) + template + struct Polynom : public T_Unitless + { + using Unitless = T_Unitless; + + float3_X m_elong; + float_X m_phase; + typename FieldE::DataBoxType m_dataBoxE; + DataSpace m_offsetToTotalDomain; + DataSpace m_superCellToLocalOriginCellOffset; + + /** Device-Side Constructor + * + * @param superCellToLocalOriginCellOffset local offset in cells to current supercell + * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly + * after transform to centered origin) + */ + HDINLINE Polynom( + typename FieldE::DataBoxType const& dataBoxE, + DataSpace const& superCellToLocalOriginCellOffset, + DataSpace const& offsetToTotalDomain, + float3_X const& elong, + float_X const phase) + : m_elong(elong) + , m_phase(phase) + , m_dataBoxE(dataBoxE) + , m_offsetToTotalDomain(offsetToTotalDomain) + , m_superCellToLocalOriginCellOffset(superCellToLocalOriginCellOffset) + { + } + + /** device side manipulation for init plane (transversal) + * + * @tparam T_Args type of the arguments passed to the user manipulator functor + * + * @param cellIndexInSuperCell ND cell index in current supercell + */ + template + HDINLINE void operator()(T_Acc const&, DataSpace const& cellIndexInSuperCell) + { + // coordinate system to global simulation as origin + DataSpace const localCell(cellIndexInSuperCell + m_superCellToLocalOriginCellOffset); + + // transform coordinate system to center of x-z plane of initialization + constexpr uint8_t planeNormalDir = 1u; + DataSpace offsetToCenterOfPlane(m_offsetToTotalDomain); + offsetToCenterOfPlane[planeNormalDir] = 0; // do not shift origin of plane normal + floatD_X const pos + = precisionCast(localCell + offsetToCenterOfPlane) * cellSize.shrink(); + // @todo add half-cells via traits::FieldPosition< Solver::NumicalCellType, FieldE >() + + // transversal position only + float3_X const w0_3D(Unitless::W0_X, 0., Unitless::W0_Z); + auto const w0(w0_3D.shrink().remove()); + auto const pos_trans(pos.remove()); + auto const exp_compos(pos_trans * pos_trans / (w0 * w0)); + float_X const exp_arg(exp_compos.sumOfComponents()); + + m_elong *= math::exp(-1.0_X * exp_arg); + + if(Unitless::initPlaneY != 0) // compile time if + { + /* If the laser is not initialized in the first cell we emit a + * negatively and positively propagating wave. Therefore we need to multiply the + * amplitude with a correction factor depending of the cell size in + * propagation direction. + * The negatively propagating wave is damped by the absorber. + * + * The `correctionFactor` assume that the wave is moving in y direction. + */ + auto const correctionFactor = (SPEED_OF_LIGHT * DELTA_T) / CELL_HEIGHT * 2._X; + + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) + += correctionFactor * m_elong; + } + else + { + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) = m_elong; + } + } + }; + } // namespace acc + + template + struct Polynom : public polynom::Unitless { - elong.x() = Unitless::AMPLITUDE * Tpolynomial( tau ) / math::sqrt( 2.0_X ) * - math::sin( omegaLaser * ( runTime - t_rise ) + Unitless::LASER_PHASE ); - elong.z() = Unitless::AMPLITUDE * Tpolynomial( tau ) / math::sqrt( 2.0_X ) * - math::cos( omegaLaser * ( runTime - t_rise ) + Unitless::LASER_PHASE ); - } - } - - /** create device manipulator functor - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset (in supercells, without guards) to the - * origin of the local domain - * @param configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::Polynom< Unitless > - operator()( - T_Acc const &, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & - ) const - { - auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); - return acc::Polynom< Unitless >( dataBoxE, superCellToLocalOriginCellOffset, offsetToTotalDomain, elong, phase ); - } - - //! get the name of the laser profile - static - HINLINE std::string - getName( ) - { - return "Polynom"; - } - - }; - -} // namespace laserProfiles -} // namespace fields + using Unitless = polynom::Unitless; + + float3_X elong; + float_X phase; + typename FieldE::DataBoxType dataBoxE; + DataSpace offsetToTotalDomain; + + HDINLINE float_X Tpolynomial(float_X const tau) + { + float_X result(0.0_X); + if(tau >= 0.0_X && tau <= 1.0_X) + result = tau * tau * tau * (10.0_X - 15.0_X * tau + 6.0_X * tau * tau); + else if(tau > 1.0_X && tau <= 2.0_X) + result = (2.0_X - tau) * (2.0_X - tau) * (2.0_X - tau) + * (4.0_X - 9.0_X * tau + 6.0_X * tau * tau); + + return result; + } + + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE Polynom(uint32_t currentStep) : phase(0.0_X) + { + // get data + DataConnector& dc = Environment<>::get().DataConnector(); + dataBoxE = dc.get(FieldE::getName(), true)->getDeviceDataBox(); + + // get meta data for offsets + SubGrid const& subGrid = Environment::get().SubGrid(); + // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); + DataSpace const globalCellOffset(subGrid.getLocalDomain().offset); + DataSpace const halfSimSize(subGrid.getGlobalDomain().size / 2); + + // transform coordinate system to center of global simulation as origin [cells] + offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; + + // @todo reset origin of direction of moving window + // offsetToTotalDomain.y() = 0 + + float_64 const runTime = DELTA_T * currentStep - Unitless::laserTimeShift; + + elong = float3_X::create(0.0_X); + + /* a symmetric pulse will be initialized at position z=0 + * the laser amplitude rises for t_rise + * and falls for t_rise + * making the laser pulse 2*t_rise long + */ + + const float_X t_rise = 0.5_X * Unitless::PULSE_LENGTH; + const float_X tau = runTime / t_rise; + + const float_X omegaLaser = 2.0_X * PI * Unitless::f; + + if(Unitless::Polarisation == Unitless::LINEAR_X) + { + elong.x() = Unitless::AMPLITUDE * Tpolynomial(tau) + * math::sin(omegaLaser * (runTime - t_rise) + Unitless::LASER_PHASE); + } + else if(Unitless::Polarisation == Unitless::LINEAR_Z) + { + elong.z() = Unitless::AMPLITUDE * Tpolynomial(tau) + * math::sin(omegaLaser * (runTime - t_rise) + Unitless::LASER_PHASE); + } + else if(Unitless::Polarisation == Unitless::CIRCULAR) + { + elong.x() = Unitless::AMPLITUDE * Tpolynomial(tau) / math::sqrt(2.0_X) + * math::sin(omegaLaser * (runTime - t_rise) + Unitless::LASER_PHASE); + elong.z() = Unitless::AMPLITUDE * Tpolynomial(tau) / math::sqrt(2.0_X) + * math::cos(omegaLaser * (runTime - t_rise) + Unitless::LASER_PHASE); + } + } + + /** create device manipulator functor + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset (in supercells, without guards) to the + * origin of the local domain + * @param configuration of the worker + */ + template + HDINLINE acc::Polynom operator()( + T_Acc const&, + DataSpace const& localSupercellOffset, + T_WorkerCfg const&) const + { + auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); + return acc::Polynom( + dataBoxE, + superCellToLocalOriginCellOffset, + offsetToTotalDomain, + elong, + phase); + } + + //! get the name of the laser profile + static HINLINE std::string getName() + { + return "Polynom"; + } + }; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu - diff --git a/include/picongpu/fields/laserProfiles/PulseFrontTilt.def b/include/picongpu/fields/laserProfiles/PulseFrontTilt.def index 45c1dec928..3c6229b21a 100644 --- a/include/picongpu/fields/laserProfiles/PulseFrontTilt.def +++ b/include/picongpu/fields/laserProfiles/PulseFrontTilt.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Anton Helm, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Anton Helm, Heiko Burau, Rene Widera, Richard Pausch, * Axel Huebl, Alexander Debus * * This file is part of PIConGPU. @@ -25,111 +25,113 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace pulseFrontTilt -{ -namespace defaults -{ - struct PulseFrontTiltParam + namespace fields { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - //static constexpr float_64 _A0 = 1.5; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * at the focus position of the laser - * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } - * [ 1.17741 ] - * - * unit: meter */ - static constexpr float_64 W0_SI = 5.0e-6 / 1.17741; - - /** the distance to the laser focus in y-direction - * unit: meter */ - static constexpr float_64 FOCUS_POS_SI = 4.62e-5; - - /** the tilt angle between laser propagation in y-direction and laser axis in - * x-direction (0 degree == no tilt) - * unit: degree */ - static constexpr float_64 TILT_X_SI = 0.0; - - /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH - * - * unit: none */ - static constexpr float_64 PULSE_INIT = 20.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - //! Available polarisation types - enum PolarisationType + namespace laserProfiles { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; -} // namespace defaults -} // namespace pulseFrontTilt - - /** Gaussian Beam laser profile with titled pulse front - * - * @tparam T_Params class parameter to configure the Gaussian Beam with - * pulse front titlt, see members of - * pulseFrontTilt::defaults::PulseFrontTiltParam for - * required members - */ - template< typename T_Params = pulseFrontTilt::defaults::PulseFrontTiltParam > - struct PulseFrontTilt; - -} // namespace laserProfiles -} // namespace fields + namespace pulseFrontTilt + { + namespace defaults + { + struct PulseFrontTiltParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + // static constexpr float_64 _A0 = 1.5; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * at the focus position of the laser + * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } + * [ 1.17741 ] + * + * unit: meter */ + static constexpr float_64 W0_SI = 5.0e-6 / 1.17741; + + /** the distance to the laser focus in y-direction + * unit: meter */ + static constexpr float_64 FOCUS_POS_SI = 4.62e-5; + + /** the tilt angle between laser propagation in y-direction and laser axis in + * x-direction (0 degree == no tilt) + * unit: degree */ + static constexpr float_64 TILT_X_SI = 0.0; + + /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH + * + * unit: none */ + static constexpr float_64 PULSE_INIT = 20.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + //! Available polarisation types + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + } // namespace defaults + } // namespace pulseFrontTilt + + /** Gaussian Beam laser profile with titled pulse front + * + * @tparam T_Params class parameter to configure the Gaussian Beam with + * pulse front titlt, see members of + * pulseFrontTilt::defaults::PulseFrontTiltParam for + * required members + */ + template + struct PulseFrontTilt; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/laserProfiles/PulseFrontTilt.hpp b/include/picongpu/fields/laserProfiles/PulseFrontTilt.hpp index 5e33222657..f2214f11ca 100644 --- a/include/picongpu/fields/laserProfiles/PulseFrontTilt.hpp +++ b/include/picongpu/fields/laserProfiles/PulseFrontTilt.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Anton Helm, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Anton Helm, Heiko Burau, Rene Widera, Richard Pausch, * Axel Huebl, Alexander Debus * * This file is part of PIConGPU. @@ -28,281 +28,300 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace pulseFrontTilt -{ - template< typename T_Params > - struct Unitless : public T_Params - { - using Params = T_Params; - - static constexpr float_X WAVE_LENGTH = float_X( Params::WAVE_LENGTH_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X PULSE_LENGTH = float_X( Params::PULSE_LENGTH_SI / UNIT_TIME ); // unit: seconds (1 sigma) - static constexpr float_X AMPLITUDE = float_X( Params::AMPLITUDE_SI / UNIT_EFIELD ); // unit: Volt /meter - static constexpr float_X W0 = float_X( Params::W0_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X FOCUS_POS = float_X( Params::FOCUS_POS_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X INIT_TIME = float_X( Params::PULSE_INIT * Params::PULSE_LENGTH_SI / UNIT_TIME ); // unit: seconds (full initialization length) - static constexpr float_X TILT_X = float_X( Params::TILT_X_SI * PI / 180. ); // unit: radiant (in dimensions of pi) - - /* initialize the laser not in the first cell is equal to a negative shift - * in time - */ - static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; - - static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; - - }; -} // namespace pulseFrontTilt - -namespace acc -{ - template< typename T_Unitless > - struct PulseFrontTilt : public T_Unitless - { - using Unitless = T_Unitless; - - float3_X m_elong; - float_X m_phase; - typename FieldE::DataBoxType m_dataBoxE; - DataSpace< simDim > m_offsetToTotalDomain; - DataSpace< simDim > m_superCellToLocalOriginCellOffset; - - /** Device-Side Constructor - * - * @param superCellToLocalOriginCellOffset local offset in cells to current supercell - * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly after transform to centered origin) - */ - HDINLINE PulseFrontTilt( - typename FieldE::DataBoxType const & dataBoxE, - DataSpace< simDim > const & superCellToLocalOriginCellOffset, - DataSpace< simDim > const & offsetToTotalDomain, - float3_X const & elong, - float_X const phase - ) : - m_elong( elong ), - m_phase( phase ), - m_dataBoxE( dataBoxE ), - m_offsetToTotalDomain( offsetToTotalDomain ), - m_superCellToLocalOriginCellOffset( superCellToLocalOriginCellOffset ) - { - } - - /** device side manipulation for init plane (transversal) - * - * @tparam T_Args type of the arguments passed to the user manipulator functor - * - * @param cellIndexInSuperCell ND cell index in current supercell - */ - template< typename T_Acc > - HDINLINE - void operator( )( - T_Acc const &, - DataSpace< simDim > const & cellIndexInSuperCell - ) - { - // coordinate system to global simulation as origin - DataSpace< simDim > const localCell( - cellIndexInSuperCell + - m_superCellToLocalOriginCellOffset - ); - - // transform coordinate system to center of x-z plane of initialization - constexpr uint8_t planeNormalDir = 1u; - DataSpace< simDim > offsetToCenterOfPlane( m_offsetToTotalDomain ); - offsetToCenterOfPlane[ planeNormalDir ] = 0; // do not shift origin of plane normal - floatD_X const pos = precisionCast< float_X >( localCell + offsetToCenterOfPlane ) * cellSize.shrink< simDim >(); - // @todo add half-cells via traits::FieldPosition< Solver::NumicalCellType, FieldE >() - - // calculate focus position relative to the laser initialization plane - float_X const focusPos = Unitless::FOCUS_POS - pos.y(); - - float_X const timeShift = m_phase / ( 2.0_X * float_X( PI ) * float_X( Unitless::f ) ) + focusPos / SPEED_OF_LIGHT; - float_X const local_tilt_x = Unitless::TILT_X; - float_X const spaceShift_x = SPEED_OF_LIGHT * algorithms::math::tan( local_tilt_x ) * timeShift / cellSize.y(); - - // transversal position only - // floatD_X planeNoNormal = floatD_X::create( 1.0 ); - // planeNoNormal[ planeNormalDir ] = 0.0; - // Gaussian Beam with zero tilt: - // r2 = math::abs2( pos * planeNoNormal ); - auto const spaceShift = float3_X( spaceShift_x, 0., 0. ).shrink< simDim >().remove< planeNormalDir >(); - auto const pos_trans( pos.remove< planeNormalDir >() ); - - float_X const r2 = math::abs2( pos_trans + spaceShift ); - - // rayleigh length (in y-direction) - float_X const y_R = float_X( PI ) * Unitless::W0 * Unitless::W0 / Unitless::WAVE_LENGTH; - - // inverse radius of curvature of the beam's wavefronts - float_X const R_y_inv = -focusPos / ( y_R * y_R + focusPos * focusPos); - - // beam waist in the near field: w_y(y=0) == W0 - float_X const w_y = Unitless::W0 * algorithms::math::sqrt( 1.0_X + ( focusPos / y_R )*( focusPos / y_R ) ); - //! the Gouy phase shift - float_X const xi_y = algorithms::math::atan( -focusPos / y_R ); - - if( Unitless::Polarisation == Unitless::LINEAR_X || Unitless::Polarisation == Unitless::LINEAR_Z ) - { - m_elong *= math::exp( -r2 / w_y / w_y ) * math::cos( 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * focusPos - 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + xi_y + m_phase ) - * math::exp( -( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::WAVE_LENGTH ) - *( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::WAVE_LENGTH ) - / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0_X * Unitless::PULSE_LENGTH ) / ( 2.0_X * Unitless::PULSE_LENGTH ) ); - } - else if( Unitless::Polarisation == Unitless::CIRCULAR ) - { - m_elong.x() *= math::exp( -r2 / w_y / w_y ) * math::cos( 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * focusPos - 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + xi_y + m_phase ) - * math::exp( -( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::WAVE_LENGTH ) - *( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::WAVE_LENGTH ) - / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0_X * Unitless::PULSE_LENGTH ) / ( 2.0_X * Unitless::PULSE_LENGTH ) ); - m_phase += float_X( PI ) / 2.0_X; - m_elong.z() *= math::exp( -r2 / w_y / w_y ) * math::cos( 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * focusPos - 2.0_X * float_X( PI ) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + xi_y + m_phase ) - * math::exp( -( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::WAVE_LENGTH ) - *( r2 / 2.0_X * R_y_inv - focusPos - m_phase / 2.0_X / float_X( PI ) * Unitless::Unitless::WAVE_LENGTH ) - / SPEED_OF_LIGHT / SPEED_OF_LIGHT / ( 2.0_X * Unitless::PULSE_LENGTH ) / ( 2.0_X * Unitless::PULSE_LENGTH ) ); - // reminder: if you want to use phase below, substract pi/2 - // m_phase -= float_X( PI ) / 2.0_X; - } - - if( Unitless::initPlaneY != 0 ) // compile time if - { - /* If the laser is not initialized in the first cell we emit a - * negatively and positively propagating wave. Therefore we need to multiply the - * amplitude with a correction factor depending of the cell size in - * propagation direction. - * The negatively propagating wave is damped by the absorber. - * - * The `correctionFactor` assume that the wave is moving in y direction. - */ - auto const correctionFactor = ( SPEED_OF_LIGHT * DELTA_T ) / CELL_HEIGHT * 2._X; - - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) += correctionFactor * m_elong; - } - else - { - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) = m_elong; - } - } - }; -} // namespace acc - - template< typename T_Params > - struct PulseFrontTilt : public pulseFrontTilt::Unitless< T_Params > + namespace fields { - using Unitless = pulseFrontTilt::Unitless< T_Params >; - - float3_X elong; - float_X phase; - typename FieldE::DataBoxType dataBoxE; - DataSpace< simDim > offsetToTotalDomain; - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE PulseFrontTilt( uint32_t currentStep ) + namespace laserProfiles { - // get data - DataConnector & dc = Environment< >::get( ).DataConnector( ); - dataBoxE = dc.get< FieldE >( - FieldE::getName(), - true - )->getDeviceDataBox(); - - // get meta data for offsets - SubGrid< simDim > const & subGrid = Environment< simDim >::get().SubGrid(); - // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); - DataSpace< simDim > const globalCellOffset( subGrid.getLocalDomain().offset ); - DataSpace< simDim > const halfSimSize( subGrid.getGlobalDomain().size / 2 ); - - // transform coordinate system to center of global simulation as origin [cells] - offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; - - // @todo reset origin of direction of moving window - // offsetToTotalDomain.y() = 0 - - float_64 const runTime = DELTA_T * currentStep - Unitless::laserTimeShift; - - // calculate focus position relative to the laser initialization plane - float_X const focusPos = Unitless::FOCUS_POS - Unitless::initPlaneY * CELL_HEIGHT; - - elong = float3_X::create( 0.0 ); - - // a symmetric pulse will be initialized at position z=0 for - // a time of PULSE_INIT * PULSE_LENGTH = INIT_TIME. - // we shift the complete pulse for the half of this time to start with - // the front of the laser pulse. - constexpr float_64 mue = 0.5 * Unitless::INIT_TIME; - - // rayleigh length (in y-direction) - constexpr float_64 y_R = PI * Unitless::W0 * Unitless::W0 / Unitless::WAVE_LENGTH; - // gaussian beam waist in the nearfield: w_y(y=0) == W0 - float_64 const w_y = Unitless::W0 * math::sqrt( 1.0 + ( focusPos / y_R )*( focusPos / y_R ) ); - - float_64 envelope = float_64( Unitless::AMPLITUDE ); - if( simDim == DIM2 ) - envelope *= math::sqrt( float_64( Unitless::W0 ) / w_y ); - else if( simDim == DIM3 ) - envelope *= float_64( Unitless::W0 ) / w_y; - /* no 1D representation/implementation */ - - if( Unitless::Polarisation == Unitless::LINEAR_X ) + namespace pulseFrontTilt { - elong.x() = float_X( envelope ); - } - else if( Unitless::Polarisation == Unitless::LINEAR_Z ) + template + struct Unitless : public T_Params + { + using Params = T_Params; + + static constexpr float_X WAVE_LENGTH + = float_X(Params::WAVE_LENGTH_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X PULSE_LENGTH + = float_X(Params::PULSE_LENGTH_SI / UNIT_TIME); // unit: seconds (1 sigma) + static constexpr float_X AMPLITUDE + = float_X(Params::AMPLITUDE_SI / UNIT_EFIELD); // unit: Volt /meter + static constexpr float_X W0 = float_X(Params::W0_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X FOCUS_POS = float_X(Params::FOCUS_POS_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X INIT_TIME = float_X( + Params::PULSE_INIT * Params::PULSE_LENGTH_SI + / UNIT_TIME); // unit: seconds (full initialization length) + static constexpr float_X TILT_X + = float_X(Params::TILT_X_SI * PI / 180.); // unit: radiant (in dimensions of pi) + + /* initialize the laser not in the first cell is equal to a negative shift + * in time + */ + static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; + + static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; + }; + } // namespace pulseFrontTilt + + namespace acc { - elong.z() = float_X( envelope ); - } - else if( Unitless::Polarisation == Unitless::CIRCULAR ) + template + struct PulseFrontTilt : public T_Unitless + { + using Unitless = T_Unitless; + + float3_X m_elong; + float_X m_phase; + typename FieldE::DataBoxType m_dataBoxE; + DataSpace m_offsetToTotalDomain; + DataSpace m_superCellToLocalOriginCellOffset; + + /** Device-Side Constructor + * + * @param superCellToLocalOriginCellOffset local offset in cells to current supercell + * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly + * after transform to centered origin) + */ + HDINLINE PulseFrontTilt( + typename FieldE::DataBoxType const& dataBoxE, + DataSpace const& superCellToLocalOriginCellOffset, + DataSpace const& offsetToTotalDomain, + float3_X const& elong, + float_X const phase) + : m_elong(elong) + , m_phase(phase) + , m_dataBoxE(dataBoxE) + , m_offsetToTotalDomain(offsetToTotalDomain) + , m_superCellToLocalOriginCellOffset(superCellToLocalOriginCellOffset) + { + } + + /** device side manipulation for init plane (transversal) + * + * @tparam T_Args type of the arguments passed to the user manipulator functor + * + * @param cellIndexInSuperCell ND cell index in current supercell + */ + template + HDINLINE void operator()(T_Acc const&, DataSpace const& cellIndexInSuperCell) + { + // coordinate system to global simulation as origin + DataSpace const localCell(cellIndexInSuperCell + m_superCellToLocalOriginCellOffset); + + // transform coordinate system to center of x-z plane of initialization + constexpr uint8_t planeNormalDir = 1u; + DataSpace offsetToCenterOfPlane(m_offsetToTotalDomain); + offsetToCenterOfPlane[planeNormalDir] = 0; // do not shift origin of plane normal + floatD_X const pos + = precisionCast(localCell + offsetToCenterOfPlane) * cellSize.shrink(); + // @todo add half-cells via traits::FieldPosition< Solver::NumicalCellType, FieldE >() + + // calculate focus position relative to the laser initialization plane + float_X const focusPos = Unitless::FOCUS_POS - pos.y(); + + float_X const timeShift + = m_phase / (2.0_X * float_X(PI) * float_X(Unitless::f)) + focusPos / SPEED_OF_LIGHT; + float_X const local_tilt_x = Unitless::TILT_X; + float_X const spaceShift_x + = SPEED_OF_LIGHT * math::tan(local_tilt_x) * timeShift / cellSize.y(); + + // transversal position only + // floatD_X planeNoNormal = floatD_X::create( 1.0 ); + // planeNoNormal[ planeNormalDir ] = 0.0; + // Gaussian Beam with zero tilt: + // r2 = pmacc::math::abs2( pos * planeNoNormal ); + auto const spaceShift + = float3_X(spaceShift_x, 0., 0.).shrink().remove(); + auto const pos_trans(pos.remove()); + + float_X const r2 = pmacc::math::abs2(pos_trans + spaceShift); + + // rayleigh length (in y-direction) + float_X const y_R = float_X(PI) * Unitless::W0 * Unitless::W0 / Unitless::WAVE_LENGTH; + + // inverse radius of curvature of the beam's wavefronts + float_X const R_y_inv = -focusPos / (y_R * y_R + focusPos * focusPos); + + // beam waist in the near field: w_y(y=0) == W0 + float_X const w_y = Unitless::W0 * math::sqrt(1.0_X + (focusPos / y_R) * (focusPos / y_R)); + //! the Gouy phase shift + float_X const xi_y = math::atan(-focusPos / y_R); + + if(Unitless::Polarisation == Unitless::LINEAR_X + || Unitless::Polarisation == Unitless::LINEAR_Z) + { + m_elong *= math::exp(-r2 / w_y / w_y) + * math::cos( + 2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * focusPos + - 2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + xi_y + + m_phase) + * math::exp( + -(r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::WAVE_LENGTH) + * (r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::WAVE_LENGTH) + / SPEED_OF_LIGHT / SPEED_OF_LIGHT / (2.0_X * Unitless::PULSE_LENGTH) + / (2.0_X * Unitless::PULSE_LENGTH)); + } + else if(Unitless::Polarisation == Unitless::CIRCULAR) + { + m_elong.x() *= math::exp(-r2 / w_y / w_y) + * math::cos(2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * focusPos + - 2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + xi_y + + m_phase) + * math::exp(-(r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::WAVE_LENGTH) + * (r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::WAVE_LENGTH) + / SPEED_OF_LIGHT / SPEED_OF_LIGHT / (2.0_X * Unitless::PULSE_LENGTH) + / (2.0_X * Unitless::PULSE_LENGTH)); + m_phase += float_X(PI) / 2.0_X; + m_elong.z() *= math::exp(-r2 / w_y / w_y) + * math::cos(2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * focusPos + - 2.0_X * float_X(PI) / Unitless::WAVE_LENGTH * r2 / 2.0_X * R_y_inv + xi_y + + m_phase) + * math::exp(-(r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::WAVE_LENGTH) + * (r2 / 2.0_X * R_y_inv - focusPos + - m_phase / 2.0_X / float_X(PI) * Unitless::Unitless::WAVE_LENGTH) + / SPEED_OF_LIGHT / SPEED_OF_LIGHT / (2.0_X * Unitless::PULSE_LENGTH) + / (2.0_X * Unitless::PULSE_LENGTH)); + // reminder: if you want to use phase below, substract pi/2 + // m_phase -= float_X( PI ) / 2.0_X; + } + + if(Unitless::initPlaneY != 0) // compile time if + { + /* If the laser is not initialized in the first cell we emit a + * negatively and positively propagating wave. Therefore we need to multiply the + * amplitude with a correction factor depending of the cell size in + * propagation direction. + * The negatively propagating wave is damped by the absorber. + * + * The `correctionFactor` assume that the wave is moving in y direction. + */ + auto const correctionFactor = (SPEED_OF_LIGHT * DELTA_T) / CELL_HEIGHT * 2._X; + + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) + += correctionFactor * m_elong; + } + else + { + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) = m_elong; + } + } + }; + } // namespace acc + + template + struct PulseFrontTilt : public pulseFrontTilt::Unitless { - elong.x() = float_X( envelope ) / math::sqrt( 2.0_X ); - elong.z() = float_X( envelope ) / math::sqrt( 2.0_X ); - } - - phase = 2.0_X * float_X( PI ) * float_X( Unitless::f ) * ( runTime - float_X( mue ) - focusPos / SPEED_OF_LIGHT ) + Unitless::LASER_PHASE; - } - - /** create device manipulator functor - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset (in supercells, without guards) to the - * origin of the local domain - * @param configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::PulseFrontTilt< Unitless > - operator()( - T_Acc const &, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & - ) const - { - auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); - return acc::PulseFrontTilt< Unitless >( dataBoxE, superCellToLocalOriginCellOffset, offsetToTotalDomain, elong, phase ); - } - - //! get the name of the laser profile - static - HINLINE std::string - getName( ) - { - return "PulseFrontTilt"; - } + using Unitless = pulseFrontTilt::Unitless; - }; + float3_X elong; + float_X phase; + typename FieldE::DataBoxType dataBoxE; + DataSpace offsetToTotalDomain; -} // namespace laserProfiles -} // namespace fields + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE PulseFrontTilt(uint32_t currentStep) + { + // get data + DataConnector& dc = Environment<>::get().DataConnector(); + dataBoxE = dc.get(FieldE::getName(), true)->getDeviceDataBox(); + + // get meta data for offsets + SubGrid const& subGrid = Environment::get().SubGrid(); + // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); + DataSpace const globalCellOffset(subGrid.getLocalDomain().offset); + DataSpace const halfSimSize(subGrid.getGlobalDomain().size / 2); + + // transform coordinate system to center of global simulation as origin [cells] + offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; + + // @todo reset origin of direction of moving window + // offsetToTotalDomain.y() = 0 + + float_64 const runTime = DELTA_T * currentStep - Unitless::laserTimeShift; + + // calculate focus position relative to the laser initialization plane + float_X const focusPos = Unitless::FOCUS_POS - Unitless::initPlaneY * CELL_HEIGHT; + + elong = float3_X::create(0.0); + + // a symmetric pulse will be initialized at position z=0 for + // a time of PULSE_INIT * PULSE_LENGTH = INIT_TIME. + // we shift the complete pulse for the half of this time to start with + // the front of the laser pulse. + constexpr float_64 mue = 0.5 * Unitless::INIT_TIME; + + // rayleigh length (in y-direction) + constexpr float_64 y_R = PI * Unitless::W0 * Unitless::W0 / Unitless::WAVE_LENGTH; + // gaussian beam waist in the nearfield: w_y(y=0) == W0 + float_64 const w_y = Unitless::W0 * math::sqrt(1.0 + (focusPos / y_R) * (focusPos / y_R)); + + float_64 envelope = float_64(Unitless::AMPLITUDE); + if(simDim == DIM2) + envelope *= math::sqrt(float_64(Unitless::W0) / w_y); + else if(simDim == DIM3) + envelope *= float_64(Unitless::W0) / w_y; + /* no 1D representation/implementation */ + + if(Unitless::Polarisation == Unitless::LINEAR_X) + { + elong.x() = float_X(envelope); + } + else if(Unitless::Polarisation == Unitless::LINEAR_Z) + { + elong.z() = float_X(envelope); + } + else if(Unitless::Polarisation == Unitless::CIRCULAR) + { + elong.x() = float_X(envelope) / math::sqrt(2.0_X); + elong.z() = float_X(envelope) / math::sqrt(2.0_X); + } + + phase = 2.0_X * float_X(PI) * float_X(Unitless::f) + * (runTime - float_X(mue) - focusPos / SPEED_OF_LIGHT) + + Unitless::LASER_PHASE; + } + + /** create device manipulator functor + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset (in supercells, without guards) to the + * origin of the local domain + * @param configuration of the worker + */ + template + HDINLINE acc::PulseFrontTilt operator()( + T_Acc const&, + DataSpace const& localSupercellOffset, + T_WorkerCfg const&) const + { + auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); + return acc::PulseFrontTilt( + dataBoxE, + superCellToLocalOriginCellOffset, + offsetToTotalDomain, + elong, + phase); + } + + //! get the name of the laser profile + static HINLINE std::string getName() + { + return "PulseFrontTilt"; + } + }; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu - diff --git a/include/picongpu/fields/laserProfiles/Wavepacket.def b/include/picongpu/fields/laserProfiles/Wavepacket.def index 192b69fce6..ac13a4d878 100644 --- a/include/picongpu/fields/laserProfiles/Wavepacket.def +++ b/include/picongpu/fields/laserProfiles/Wavepacket.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Stefan Tietze * * This file is part of PIConGPU. @@ -25,106 +25,109 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace wavepacket -{ -namespace defaults -{ - struct WavepacketParam + namespace fields { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - //static constexpr float_64 _A0 = 1.5; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** Stretch temporal profile by a constant plateau between the up and downramp - * unit: seconds */ - static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 7.0 * WAVE_LENGTH_SI / ::picongpu::SI::SPEED_OF_LIGHT_SI; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * at the focus position of the laser - * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } - * [ 1.17741 ] - * - * unit: meter */ - static constexpr float_64 W0_X_SI = 4.246e-6; - static constexpr float_64 W0_Z_SI = W0_X_SI; - - /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH - * - * unit: none */ - static constexpr float_64 PULSE_INIT = 20.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - /** Available polarisation types - */ - enum PolarisationType + namespace laserProfiles { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; -} // namespace defaults -} // namespace wavepacket - - /** Wavepacket with Gaussian spatial and temporal envelope - * - * @tparam T_Params class parameter to configure the Wavepacket profile, - * see members of wavepacket::defaults::WavepacketParam for - * required members - */ - template< typename T_Params = wavepacket::defaults::WavepacketParam > - struct Wavepacket; - -} // namespace laserProfiles -} // namespace fields + namespace wavepacket + { + namespace defaults + { + struct WavepacketParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + // static constexpr float_64 _A0 = 1.5; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** Stretch temporal profile by a constant plateau between the up and downramp + * unit: seconds */ + static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI + = 7.0 * WAVE_LENGTH_SI / ::picongpu::SI::SPEED_OF_LIGHT_SI; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * at the focus position of the laser + * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } + * [ 1.17741 ] + * + * unit: meter */ + static constexpr float_64 W0_X_SI = 4.246e-6; + static constexpr float_64 W0_Z_SI = W0_X_SI; + + /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH + * + * unit: none */ + static constexpr float_64 PULSE_INIT = 20.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + /** Available polarisation types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + } // namespace defaults + } // namespace wavepacket + + /** Wavepacket with Gaussian spatial and temporal envelope + * + * @tparam T_Params class parameter to configure the Wavepacket profile, + * see members of wavepacket::defaults::WavepacketParam for + * required members + */ + template + struct Wavepacket; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/fields/laserProfiles/Wavepacket.hpp b/include/picongpu/fields/laserProfiles/Wavepacket.hpp index 0cfd747652..9f922216a1 100644 --- a/include/picongpu/fields/laserProfiles/Wavepacket.hpp +++ b/include/picongpu/fields/laserProfiles/Wavepacket.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Stefan Tietze * * This file is part of PIConGPU. @@ -28,249 +28,245 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace wavepacket -{ - template< typename T_Params > - struct Unitless : public T_Params - { - using Params = T_Params; - - static constexpr float_X WAVE_LENGTH = float_X( Params::WAVE_LENGTH_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X PULSE_LENGTH = float_X( Params::PULSE_LENGTH_SI / UNIT_TIME ); // unit: seconds (1 sigma) - static constexpr float_X LASER_NOFOCUS_CONSTANT = float_X( Params::LASER_NOFOCUS_CONSTANT_SI / UNIT_TIME ); //unit: seconds - static constexpr float_X AMPLITUDE = float_X( Params::AMPLITUDE_SI / UNIT_EFIELD ); // unit: Volt /meter - static constexpr float_X W0_X = float_X( Params::W0_X_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X W0_Z = float_X( Params::W0_Z_SI / UNIT_LENGTH ); // unit: meter - static constexpr float_X INIT_TIME = float_X( Params::PULSE_INIT * PULSE_LENGTH + LASER_NOFOCUS_CONSTANT ); // unit: seconds (full initialization length) - static constexpr float_X endUpramp = -0.5_X * LASER_NOFOCUS_CONSTANT; // unit: seconds - static constexpr float_X startDownramp = 0.5_X * LASER_NOFOCUS_CONSTANT; // unit: seconds - - /* initialize the laser not in the first cell is equal to a negative shift - * in time - */ - static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; - - static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; - static constexpr float_64 w = 2.0 * PI * f; - }; -} // namespace wavepacket - -namespace acc -{ - template< typename T_Unitless > - struct Wavepacket : public T_Unitless + namespace fields { - using Unitless = T_Unitless; - - float3_X m_elong; - float_X m_phase; - typename FieldE::DataBoxType m_dataBoxE; - DataSpace< simDim > m_offsetToTotalDomain; - DataSpace< simDim > m_superCellToLocalOriginCellOffset; - - /** Device-Side Constructor - * - * @param superCellToLocalOriginCellOffset local offset in cells to current supercell - * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly after transform to centered origin) - */ - HDINLINE Wavepacket( - typename FieldE::DataBoxType const & dataBoxE, - DataSpace< simDim > const & superCellToLocalOriginCellOffset, - DataSpace< simDim > const & offsetToTotalDomain, - float3_X const & elong, - float_X const phase - ) : - m_elong( elong ), - m_phase( phase ), - m_dataBoxE( dataBoxE ), - m_offsetToTotalDomain( offsetToTotalDomain ), - m_superCellToLocalOriginCellOffset( superCellToLocalOriginCellOffset ) + namespace laserProfiles { - } - - /** device side manipulation for init plane (transversal) - * - * @tparam T_Args type of the arguments passed to the user manipulator functor - * - * @param cellIndexInSuperCell ND cell index in current supercell - */ - template< typename T_Acc > - HDINLINE - void operator( )( - T_Acc const &, - DataSpace< simDim > const & cellIndexInSuperCell - ) - { - // coordinate system to global simulation as origin - DataSpace< simDim > const localCell( - cellIndexInSuperCell + - m_superCellToLocalOriginCellOffset - ); - - // transform coordinate system to center of x-z plane of initialization - constexpr uint8_t planeNormalDir = 1u; - DataSpace< simDim > offsetToCenterOfPlane( m_offsetToTotalDomain ); - offsetToCenterOfPlane[ planeNormalDir ] = 0; // do not shift origin of plane normal - floatD_X const pos = precisionCast< float_X >( localCell + offsetToCenterOfPlane ) * cellSize.shrink< simDim >(); - // @todo add half-cells via traits::FieldPosition< Solver::NumicalCellType, FieldE >() - - // transversal position only - float3_X const w0_3D( Unitless::W0_X, 0._X, Unitless::W0_Z ); - auto const w0( w0_3D.shrink< simDim >().remove< planeNormalDir >() ); - auto const pos_trans( pos.remove< planeNormalDir >() ); - auto const exp_compos( pos_trans * pos_trans / ( w0 * w0 ) ); - float_X const exp_arg( exp_compos.sumOfComponents() ); - - m_elong *= math::exp( -1.0_X * exp_arg ); - - if( Unitless::initPlaneY != 0 ) // compile time if - { - /* If the laser is not initialized in the first cell we emit a - * negatively and positively propagating wave. Therefore we need to multiply the - * amplitude with a correction factor depending of the cell size in - * propagation direction. - * The negatively propagating wave is damped by the absorber. - * - * The `correctionFactor` assume that the wave is moving in y direction. - */ - auto const correctionFactor = ( SPEED_OF_LIGHT * DELTA_T ) / CELL_HEIGHT * 2._X; - - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) += correctionFactor * m_elong; - } - else - { - // jump over the guard of the electric field - m_dataBoxE( localCell + SuperCellSize::toRT() * GuardSize::toRT() ) = m_elong; - } - } - }; -} // namespace acc - - template< typename T_Params > - struct Wavepacket : public wavepacket::Unitless< T_Params > - { - using Unitless = wavepacket::Unitless< T_Params >; - - float3_X elong; - float_X phase; - typename FieldE::DataBoxType dataBoxE; - DataSpace< simDim > offsetToTotalDomain; - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE Wavepacket( uint32_t currentStep ) - { - // get data - DataConnector & dc = Environment< >::get( ).DataConnector( ); - dataBoxE = dc.get< FieldE >( - FieldE::getName(), - true - )->getDeviceDataBox(); - - // get meta data for offsets - SubGrid< simDim > const & subGrid = Environment< simDim >::get().SubGrid(); - // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); - DataSpace< simDim > const globalCellOffset( subGrid.getLocalDomain().offset ); - DataSpace< simDim > const halfSimSize( subGrid.getGlobalDomain().size / 2 ); - - // transform coordinate system to center of global simulation as origin [cells] - offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; - - // @todo reset origin of direction of moving window - // offsetToTotalDomain.y() = 0 - - // a symmetric pulse will be initialized at position z=0 for - // a time of RAMP_INIT * PULSE_LENGTH + LASER_NOFOCUS_CONSTANT = INIT_TIME. - // we shift the complete pulse for the half of this time to start with - // the front of the laser pulse. - const float_64 mue = 0.5 * Unitless::INIT_TIME; - - float_64 const runTime = DELTA_T * currentStep - Unitless::laserTimeShift - mue; - - elong = float3_X::create( 0.0_X ); - float_X envelope = float_X( Unitless::AMPLITUDE ); - - const float_64 tau = Unitless::PULSE_LENGTH * math::sqrt( 2.0_X ); - - float_64 correctionFactor = 0.0; - - if( runTime > Unitless::startDownramp ) - { - // downramp = end - const float_64 exponent = - ( ( runTime - Unitless::startDownramp ) - / Unitless::PULSE_LENGTH / math::sqrt( 2.0 ) ); - envelope *= math::exp( -0.5 * exponent * exponent ); - correctionFactor = ( runTime - Unitless::startDownramp ) / ( tau * tau * Unitless::w ); - } - else if( runTime < Unitless::endUpramp ) - { - // upramp = start - const float_X exponent = ( ( runTime - Unitless::endUpramp ) / Unitless::PULSE_LENGTH / math::sqrt( 2.0_X ) ); - envelope *= math::exp( -0.5_X * exponent * exponent ); - correctionFactor = ( runTime - Unitless::endUpramp ) / ( tau * tau * Unitless::w ); - } - - phase += float_X( Unitless::w * runTime ) + Unitless::LASER_PHASE; - - if( Unitless::Polarisation == Unitless::LINEAR_X ) + namespace wavepacket { - elong.x() = envelope * ( math::sin( phase ) + correctionFactor * math::cos( phase ) ); - } - else if( Unitless::Polarisation == Unitless::LINEAR_Z ) + template + struct Unitless : public T_Params + { + using Params = T_Params; + + static constexpr float_X WAVE_LENGTH + = float_X(Params::WAVE_LENGTH_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X PULSE_LENGTH + = float_X(Params::PULSE_LENGTH_SI / UNIT_TIME); // unit: seconds (1 sigma) + static constexpr float_X LASER_NOFOCUS_CONSTANT + = float_X(Params::LASER_NOFOCUS_CONSTANT_SI / UNIT_TIME); // unit: seconds + static constexpr float_X AMPLITUDE + = float_X(Params::AMPLITUDE_SI / UNIT_EFIELD); // unit: Volt /meter + static constexpr float_X W0_X = float_X(Params::W0_X_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X W0_Z = float_X(Params::W0_Z_SI / UNIT_LENGTH); // unit: meter + static constexpr float_X INIT_TIME = float_X( + Params::PULSE_INIT * PULSE_LENGTH + + LASER_NOFOCUS_CONSTANT); // unit: seconds (full initialization length) + static constexpr float_X endUpramp = -0.5_X * LASER_NOFOCUS_CONSTANT; // unit: seconds + static constexpr float_X startDownramp = 0.5_X * LASER_NOFOCUS_CONSTANT; // unit: seconds + + /* initialize the laser not in the first cell is equal to a negative shift + * in time + */ + static constexpr float_X laserTimeShift = Params::initPlaneY * CELL_HEIGHT / SPEED_OF_LIGHT; + + static constexpr float_64 f = SPEED_OF_LIGHT / WAVE_LENGTH; + static constexpr float_64 w = 2.0 * PI * f; + }; + } // namespace wavepacket + + namespace acc { - elong.z() = envelope * ( math::sin( phase ) + correctionFactor * math::cos( phase ) ); - } - else if( Unitless::Polarisation == Unitless::CIRCULAR ) + template + struct Wavepacket : public T_Unitless + { + using Unitless = T_Unitless; + + float3_X m_elong; + float_X m_phase; + typename FieldE::DataBoxType m_dataBoxE; + DataSpace m_offsetToTotalDomain; + DataSpace m_superCellToLocalOriginCellOffset; + + /** Device-Side Constructor + * + * @param superCellToLocalOriginCellOffset local offset in cells to current supercell + * @param offsetToTotalDomain offset to origin of global (@todo: total) coordinate system (possibly + * after transform to centered origin) + */ + HDINLINE Wavepacket( + typename FieldE::DataBoxType const& dataBoxE, + DataSpace const& superCellToLocalOriginCellOffset, + DataSpace const& offsetToTotalDomain, + float3_X const& elong, + float_X const phase) + : m_elong(elong) + , m_phase(phase) + , m_dataBoxE(dataBoxE) + , m_offsetToTotalDomain(offsetToTotalDomain) + , m_superCellToLocalOriginCellOffset(superCellToLocalOriginCellOffset) + { + } + + /** device side manipulation for init plane (transversal) + * + * @tparam T_Args type of the arguments passed to the user manipulator functor + * + * @param cellIndexInSuperCell ND cell index in current supercell + */ + template + HDINLINE void operator()(T_Acc const&, DataSpace const& cellIndexInSuperCell) + { + // coordinate system to global simulation as origin + DataSpace const localCell(cellIndexInSuperCell + m_superCellToLocalOriginCellOffset); + + // transform coordinate system to center of x-z plane of initialization + constexpr uint8_t planeNormalDir = 1u; + DataSpace offsetToCenterOfPlane(m_offsetToTotalDomain); + offsetToCenterOfPlane[planeNormalDir] = 0; // do not shift origin of plane normal + floatD_X const pos + = precisionCast(localCell + offsetToCenterOfPlane) * cellSize.shrink(); + // @todo add half-cells via traits::FieldPosition< Solver::NumicalCellType, FieldE >() + + // transversal position only + float3_X const w0_3D(Unitless::W0_X, 0._X, Unitless::W0_Z); + auto const w0(w0_3D.shrink().remove()); + auto const pos_trans(pos.remove()); + auto const exp_compos(pos_trans * pos_trans / (w0 * w0)); + float_X const exp_arg(exp_compos.sumOfComponents()); + + m_elong *= math::exp(-1.0_X * exp_arg); + + if(Unitless::initPlaneY != 0) // compile time if + { + /* If the laser is not initialized in the first cell we emit a + * negatively and positively propagating wave. Therefore we need to multiply the + * amplitude with a correction factor depending of the cell size in + * propagation direction. + * The negatively propagating wave is damped by the absorber. + * + * The `correctionFactor` assume that the wave is moving in y direction. + */ + auto const correctionFactor = (SPEED_OF_LIGHT * DELTA_T) / CELL_HEIGHT * 2._X; + + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) + += correctionFactor * m_elong; + } + else + { + // jump over the guard of the electric field + m_dataBoxE(localCell + SuperCellSize::toRT() * GuardSize::toRT()) = m_elong; + } + } + }; + } // namespace acc + + template + struct Wavepacket : public wavepacket::Unitless { - elong.x() = envelope / math::sqrt( 2.0_X ) * ( math::sin( phase ) + correctionFactor * math::cos( phase ) ); - elong.z() = envelope / math::sqrt( 2.0_X ) * ( math::cos( phase ) + correctionFactor * math::sin( phase ) ); - } - } - - /** create device manipulator functor - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset (in supercells, without guards) to the - * origin of the local domain - * @param configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::Wavepacket< Unitless > - operator()( - T_Acc const &, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & - ) const - { - auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); - return acc::Wavepacket< Unitless >( dataBoxE, superCellToLocalOriginCellOffset, offsetToTotalDomain, elong, phase ); - } - - //! get the name of the laser profile - static - HINLINE std::string - getName( ) - { - return "Wavepacket"; - } + using Unitless = wavepacket::Unitless; - }; + float3_X elong; + float_X phase; + typename FieldE::DataBoxType dataBoxE; + DataSpace offsetToTotalDomain; -} // namespace laserProfiles -} // namespace fields + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE Wavepacket(uint32_t currentStep) + { + // get data + DataConnector& dc = Environment<>::get().DataConnector(); + dataBoxE = dc.get(FieldE::getName(), true)->getDeviceDataBox(); + + // get meta data for offsets + SubGrid const& subGrid = Environment::get().SubGrid(); + // const DataSpace< simDim > totalCellOffset( subGrid.getGlobalDomain().offset ); + DataSpace const globalCellOffset(subGrid.getLocalDomain().offset); + DataSpace const halfSimSize(subGrid.getGlobalDomain().size / 2); + + // transform coordinate system to center of global simulation as origin [cells] + offsetToTotalDomain = /* totalCellOffset + */ globalCellOffset - halfSimSize; + + // @todo reset origin of direction of moving window + // offsetToTotalDomain.y() = 0 + + // a symmetric pulse will be initialized at position z=0 for + // a time of RAMP_INIT * PULSE_LENGTH + LASER_NOFOCUS_CONSTANT = INIT_TIME. + // we shift the complete pulse for the half of this time to start with + // the front of the laser pulse. + const float_64 mue = 0.5 * Unitless::INIT_TIME; + + float_64 const runTime = DELTA_T * currentStep - Unitless::laserTimeShift - mue; + + elong = float3_X::create(0.0_X); + float_X envelope = float_X(Unitless::AMPLITUDE); + + const float_64 tau = Unitless::PULSE_LENGTH * math::sqrt(2.0_X); + + float_64 correctionFactor = 0.0; + + if(runTime > Unitless::startDownramp) + { + // downramp = end + const float_64 exponent + = ((runTime - Unitless::startDownramp) / Unitless::PULSE_LENGTH / math::sqrt(2.0)); + envelope *= math::exp(-0.5 * exponent * exponent); + correctionFactor = (runTime - Unitless::startDownramp) / (tau * tau * Unitless::w); + } + else if(runTime < Unitless::endUpramp) + { + // upramp = start + const float_X exponent + = ((runTime - Unitless::endUpramp) / Unitless::PULSE_LENGTH / math::sqrt(2.0_X)); + envelope *= math::exp(-0.5_X * exponent * exponent); + correctionFactor = (runTime - Unitless::endUpramp) / (tau * tau * Unitless::w); + } + + phase += float_X(Unitless::w * runTime) + Unitless::LASER_PHASE; + + if(Unitless::Polarisation == Unitless::LINEAR_X) + { + elong.x() = envelope * (math::sin(phase) + correctionFactor * math::cos(phase)); + } + else if(Unitless::Polarisation == Unitless::LINEAR_Z) + { + elong.z() = envelope * (math::sin(phase) + correctionFactor * math::cos(phase)); + } + else if(Unitless::Polarisation == Unitless::CIRCULAR) + { + elong.x() + = envelope / math::sqrt(2.0_X) * (math::sin(phase) + correctionFactor * math::cos(phase)); + elong.z() + = envelope / math::sqrt(2.0_X) * (math::cos(phase) + correctionFactor * math::sin(phase)); + } + } + + /** create device manipulator functor + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset (in supercells, without guards) to the + * origin of the local domain + * @param configuration of the worker + */ + template + HDINLINE acc::Wavepacket operator()( + T_Acc const&, + DataSpace const& localSupercellOffset, + T_WorkerCfg const&) const + { + auto const superCellToLocalOriginCellOffset = localSupercellOffset * SuperCellSize::toRT(); + return acc::Wavepacket( + dataBoxE, + superCellToLocalOriginCellOffset, + offsetToTotalDomain, + elong, + phase); + } + + //! get the name of the laser profile + static HINLINE std::string getName() + { + return "Wavepacket"; + } + }; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu - diff --git a/include/picongpu/fields/laserProfiles/profiles.def b/include/picongpu/fields/laserProfiles/profiles.def index f189d5aea1..5bff13daab 100644 --- a/include/picongpu/fields/laserProfiles/profiles.def +++ b/include/picongpu/fields/laserProfiles/profiles.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Anton Helm, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Anton Helm, Rene Widera, * Richard Pausch, Alexander Debus, Ilja Goethel * * This file is part of PIConGPU. diff --git a/include/picongpu/fields/laserProfiles/profiles.hpp b/include/picongpu/fields/laserProfiles/profiles.hpp index 1c07befdc4..17a532811a 100644 --- a/include/picongpu/fields/laserProfiles/profiles.hpp +++ b/include/picongpu/fields/laserProfiles/profiles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Anton Helm, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Anton Helm, Rene Widera, * Richard Pausch, Alexander Debus, Ilja Goethel * * This file is part of PIConGPU. diff --git a/include/picongpu/initialization/IInitPlugin.hpp b/include/picongpu/initialization/IInitPlugin.hpp index 371cd408f0..885834a0f6 100644 --- a/include/picongpu/initialization/IInitPlugin.hpp +++ b/include/picongpu/initialization/IInitPlugin.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once #include @@ -30,7 +29,7 @@ namespace picongpu { using namespace pmacc; - class IInitPlugin : public ILightweightPlugin + class IInitPlugin : public ILightweightPlugin { public: virtual void slide(uint32_t currentStep) = 0; @@ -40,7 +39,5 @@ namespace picongpu virtual ~IInitPlugin() { } - }; -} - +} // namespace picongpu diff --git a/include/picongpu/initialization/InitPluginNone.hpp b/include/picongpu/initialization/InitPluginNone.hpp index 636d33a4cc..bd31db0913 100644 --- a/include/picongpu/initialization/InitPluginNone.hpp +++ b/include/picongpu/initialization/InitPluginNone.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -18,13 +18,11 @@ */ - #pragma once #include "picongpu/initialization/IInitPlugin.hpp" - namespace picongpu { using namespace pmacc; @@ -32,7 +30,6 @@ namespace picongpu class InitPluginNone : public IInitPlugin { public: - virtual void slide(uint32_t currentStep) { } @@ -62,12 +59,11 @@ namespace picongpu return "InitPluginNone"; } - virtual void setMappingDescription(MappingDesc *cellDescription) + virtual void setMappingDescription(MappingDesc* cellDescription) { } protected: - virtual void pluginLoad() { } @@ -77,5 +73,4 @@ namespace picongpu } }; -} - +} // namespace picongpu diff --git a/include/picongpu/initialization/InitialiserController.hpp b/include/picongpu/initialization/InitialiserController.hpp index b2f2a6303f..872ea786b1 100644 --- a/include/picongpu/initialization/InitialiserController.hpp +++ b/include/picongpu/initialization/InitialiserController.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -39,173 +39,167 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -namespace po = boost::program_options; + namespace po = boost::program_options; -class InitialiserController : public IInitPlugin -{ -public: - - InitialiserController() : - cellDescription(nullptr) + class InitialiserController : public IInitPlugin { - } + public: + InitialiserController() : cellDescription(nullptr) + { + } - virtual ~InitialiserController() - { - } + virtual ~InitialiserController() + { + } - /** - * Initialize simulation state at timestep 0 - */ - virtual void init() - { - // start simulation using default values - log ("Starting simulation from timestep 0"); + /** + * Initialize simulation state at timestep 0 + */ + virtual void init() + { + // start simulation using default values + log("Starting simulation from timestep 0"); - SimStartInitialiser simStartInitialiser; - Environment<>::get().DataConnector().initialise(simStartInitialiser, 0); - __getTransactionEvent().waitForFinished(); + SimStartInitialiser simStartInitialiser; + Environment<>::get().DataConnector().initialise(simStartInitialiser, 0); + __getTransactionEvent().waitForFinished(); - log ("Loading from default values finished"); - } + log("Loading from default values finished"); + } - /** - * Load persistent simulation state from \p restartStep - */ - virtual void restart(uint32_t restartStep, const std::string restartDirectory) - { - // restart simulation by loading from persistent data - // the simulation will start after restartStep - log ("Restarting simulation from timestep %1% in directory '%2%'") % - restartStep % restartDirectory; - - Environment<>::get().PluginConnector().restartPlugins(restartStep, restartDirectory); - __getTransactionEvent().waitForFinished(); - - CUDA_CHECK(cudaDeviceSynchronize()); - CUDA_CHECK(cudaGetLastError()); - - GridController &gc = Environment::get().GridController(); - - // avoid deadlock between not finished pmacc tasks and MPI_Barrier - __getTransactionEvent().waitForFinished(); - /* can be spared for better scalings, but guarantees the user - * that the restart was successful */ - MPI_CHECK(MPI_Barrier(gc.getCommunicator().getMPIComm())); - - log ("Loading from persistent data finished"); - } - - /** Log omega_p for each species - * - * Calculate omega_p for each given species and create a `picLog::PHYSICS` - * log message - */ - template - struct LogOmegaP - { - void operator()() + /** + * Load persistent simulation state from \p restartStep + */ + virtual void restart(uint32_t restartStep, const std::string restartDirectory) { - /* The omega_p calculation is based on species' densityRatio - * relative to the BASE_DENSITY. Thus, it is only accurate - * for species with macroparticles sampled by density, - * but not necessarily for derived ones. - */ - using FrameType = typename T_Species::FrameType; - const float_32 charge = frame::getCharge(); - const float_32 mass = frame::getMass(); - const auto densityRatio = traits::GetDensityRatio< T_Species >::type::getValue( ); - const auto density = BASE_DENSITY * densityRatio; - log("species %2%: omega_p * dt <= 0.1 ? %1%") % - (sqrt(density * charge / mass * charge / EPS0) * DELTA_T) % - FrameType::getName(); + // restart simulation by loading from persistent data + // the simulation will start after restartStep + log("Restarting simulation from timestep %1% in directory '%2%'") % restartStep + % restartDirectory; + + Environment<>::get().PluginConnector().restartPlugins(restartStep, restartDirectory); + __getTransactionEvent().waitForFinished(); + + CUDA_CHECK(cuplaDeviceSynchronize()); + CUDA_CHECK(cuplaGetLastError()); + + GridController& gc = Environment::get().GridController(); + + // avoid deadlock between not finished pmacc tasks and MPI_Barrier + __getTransactionEvent().waitForFinished(); + /* can be spared for better scalings, but guarantees the user + * that the restart was successful */ + MPI_CHECK(MPI_Barrier(gc.getCommunicator().getMPIComm())); + + log("Loading from persistent data finished"); } - }; - /** - * Print interesting initialization information - */ - virtual void printInformation() - { - if (Environment::get().GridController().getGlobalRank() == 0) + /** Log omega_p for each species + * + * Calculate omega_p for each given species and create a `picLog::PHYSICS` + * log message + */ + template + struct LogOmegaP { - log("Courant c*dt <= %1% ? %2%") % - (1./math::sqrt(INV_CELL2_SUM)) % - (SPEED_OF_LIGHT * DELTA_T); - - using SpeciesWithMass = typename pmacc::particles::traits::FilterByFlag< - VectorAllSpecies, - massRatio<> - >::type; - using SpeciesWithMassCharge = typename pmacc::particles::traits::FilterByFlag< - SpeciesWithMass, - chargeRatio<> - >::type; - meta::ForEach< SpeciesWithMassCharge, LogOmegaP<> > logOmegaP; - log("Resolving plasma oscillations?\n" - " Estimates are based on DensityRatio to BASE_DENSITY of each species\n" - " (see: density.param, speciesDefinition.param).\n" - " It and does not cover other forms of initialization"); - logOmegaP(); - - if (fields::laserProfiles::Selected::INIT_TIME > float_X(0.0)) - log("y-cells per wavelength: %1%") % - (fields::laserProfiles::Selected::WAVE_LENGTH / CELL_HEIGHT); - const int localNrOfCells = cellDescription->getGridLayout().getDataSpaceWithoutGuarding().productOfComponents(); - log("macro particles per device: %1%") % - (localNrOfCells * particles::TYPICAL_PARTICLES_PER_CELL * (bmpl::size::type::value)); - log("typical macro particle weighting: %1%") % (particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE); - - - log("UNIT_SPEED %1%") % UNIT_SPEED; - log("UNIT_TIME %1%") % UNIT_TIME; - log("UNIT_LENGTH %1%") % UNIT_LENGTH; - log("UNIT_MASS %1%") % UNIT_MASS; - log("UNIT_CHARGE %1%") % UNIT_CHARGE; - log("UNIT_EFIELD %1%") % UNIT_EFIELD; - log("UNIT_BFIELD %1%") % UNIT_BFIELD; - log("UNIT_ENERGY %1%") % UNIT_ENERGY; + void operator()() + { + /* The omega_p calculation is based on species' densityRatio + * relative to the BASE_DENSITY. Thus, it is only accurate + * for species with macroparticles sampled by density, + * but not necessarily for derived ones. + */ + using FrameType = typename T_Species::FrameType; + const float_32 charge = frame::getCharge(); + const float_32 mass = frame::getMass(); + const auto densityRatio = traits::GetDensityRatio::type::getValue(); + const auto density = BASE_DENSITY * densityRatio; + log("species %2%: omega_p * dt <= 0.1 ? %1%") + % (sqrt(density * charge / mass * charge / EPS0) * DELTA_T) % FrameType::getName(); + } + }; + + /** + * Print interesting initialization information + */ + virtual void printInformation() + { + if(Environment::get().GridController().getGlobalRank() == 0) + { + log("Courant c*dt <= %1% ? %2%") % (1. / math::sqrt(INV_CELL2_SUM)) + % (SPEED_OF_LIGHT * DELTA_T); + + using SpeciesWithMass = + typename pmacc::particles::traits::FilterByFlag>::type; + using SpeciesWithMassCharge = + typename pmacc::particles::traits::FilterByFlag>::type; + meta::ForEach> logOmegaP; + log("Resolving plasma oscillations?\n" + " Estimates are based on DensityRatio to BASE_DENSITY of each species\n" + " (see: density.param, speciesDefinition.param).\n" + " It and does not cover other forms of initialization"); + logOmegaP(); + + if(fields::laserProfiles::Selected::INIT_TIME > float_X(0.0)) + log("y-cells per wavelength: %1%") + % (fields::laserProfiles::Selected::WAVE_LENGTH / CELL_HEIGHT); + const int localNrOfCells + = cellDescription->getGridLayout().getDataSpaceWithoutGuarding().productOfComponents(); + log("macro particles per device: %1%") + % (localNrOfCells * particles::TYPICAL_PARTICLES_PER_CELL + * (bmpl::size::type::value)); + log("typical macro particle weighting: %1%") + % (particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE); + + + log("UNIT_SPEED %1%") % UNIT_SPEED; + log("UNIT_TIME %1%") % UNIT_TIME; + log("UNIT_LENGTH %1%") % UNIT_LENGTH; + log("UNIT_MASS %1%") % UNIT_MASS; + log("UNIT_CHARGE %1%") % UNIT_CHARGE; + log("UNIT_EFIELD %1%") % UNIT_EFIELD; + log("UNIT_BFIELD %1%") % UNIT_BFIELD; + log("UNIT_ENERGY %1%") % UNIT_ENERGY; + } } - } - - void notify(uint32_t) - { - // nothing to do here - } - void pluginRegisterHelp(po::options_description& desc) - { - // nothing to do here - } + void notify(uint32_t) + { + // nothing to do here + } - std::string pluginGetName() const - { - return "Initializers"; - } + void pluginRegisterHelp(po::options_description& desc) + { + // nothing to do here + } - virtual void setMappingDescription(MappingDesc *cellDescription) - { - PMACC_ASSERT(cellDescription != nullptr); - this->cellDescription = cellDescription; - } + std::string pluginGetName() const + { + return "Initializers"; + } - virtual void slide(uint32_t currentStep) - { - SimStartInitialiser simStartInitialiser; - Environment<>::get().DataConnector().initialise(simStartInitialiser, currentStep); - __getTransactionEvent().waitForFinished(); - } + virtual void setMappingDescription(MappingDesc* cellDescription) + { + PMACC_ASSERT(cellDescription != nullptr); + this->cellDescription = cellDescription; + } -private: - /*Descripe simulation area*/ - MappingDesc *cellDescription; + virtual void slide(uint32_t currentStep) + { + SimStartInitialiser simStartInitialiser; + Environment<>::get().DataConnector().initialise(simStartInitialiser, currentStep); + __getTransactionEvent().waitForFinished(); + } - bool restartSim; - std::string restartFile; + private: + /*Descripe simulation area*/ + MappingDesc* cellDescription; -}; + bool restartSim; + std::string restartFile; + }; -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/initialization/ParserGridDistribution.cpp b/include/picongpu/initialization/ParserGridDistribution.cpp index 20250a187f..01ea99e1c0 100644 --- a/include/picongpu/initialization/ParserGridDistribution.cpp +++ b/include/picongpu/initialization/ParserGridDistribution.cpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -21,8 +21,8 @@ #include #include -#include // std::vector -#include // std::string +#include // std::vector +#include // std::string #include // std::distance #include @@ -30,21 +30,19 @@ namespace picongpu { - - ParserGridDistribution::ParserGridDistribution( std::string const s ) + ParserGridDistribution::ParserGridDistribution(std::string const s) { - parsedInput = parse( s ); + parsedInput = parse(s); } - uint32_t - ParserGridDistribution::getOffset( uint32_t const devicePos, uint32_t const maxCells ) const + uint32_t ParserGridDistribution::getOffset(uint32_t const devicePos, uint32_t const maxCells) const { value_type::const_iterator iter = parsedInput.begin(); // go to last device of these n subdomains extent{n} uint32_t i = iter->count - 1u; uint32_t sum = 0u; - while( i < devicePos ) + while(i < devicePos) { // add last subdomain sum += iter->extent * iter->count; @@ -55,26 +53,25 @@ namespace picongpu } // add part of this subdomain that is before me - sum += iter->extent * ( devicePos + iter->count - i - 1u ); + sum += iter->extent * (devicePos + iter->count - i - 1u); // check total number of cells uint32_t sumTotal = 0u; - for( iter = parsedInput.begin(); iter != parsedInput.end(); ++iter ) + for(iter = parsedInput.begin(); iter != parsedInput.end(); ++iter) sumTotal += iter->extent * iter->count; - PMACC_VERIFY( sumTotal == maxCells ); + PMACC_VERIFY(sumTotal == maxCells); return sum; } - uint32_t - ParserGridDistribution::getLocalSize( uint32_t const devicePos ) const + uint32_t ParserGridDistribution::getLocalSize(uint32_t const devicePos) const { value_type::const_iterator iter = parsedInput.begin(); // go to last device of these n subdomains extent{n} uint32_t i = iter->count - 1u; - while( i < devicePos ) + while(i < devicePos) { ++iter; // go to last device of these n subdomains extent{n} @@ -84,57 +81,43 @@ namespace picongpu return iter->extent; } - void - ParserGridDistribution::verifyDevices( uint32_t const numDevices ) const + void ParserGridDistribution::verifyDevices(uint32_t const numDevices) const { uint32_t numSubdomains = 0u; - for( SubdomainPair const & p : parsedInput ) + for(SubdomainPair const& p : parsedInput) numSubdomains += p.count; - PMACC_VERIFY( numSubdomains == numDevices ); + PMACC_VERIFY(numSubdomains == numDevices); } - ParserGridDistribution::value_type - ParserGridDistribution::parse( std::string const s ) const + ParserGridDistribution::value_type ParserGridDistribution::parse(std::string const s) const { - std::regex regFind( - R"([0-9]+(\{[0-9]+})*)", - std::regex::egrep - ); + std::regex regFind(R"([0-9]+(\{[0-9]+})*)", std::regex::egrep); - std::sregex_token_iterator iter( s.begin( ), s.end( ), - regFind, 0 ); + std::sregex_token_iterator iter(s.begin(), s.end(), regFind, 0); std::sregex_token_iterator end; value_type newInput; - newInput.reserve( std::distance( iter, end ) ); + newInput.reserve(std::distance(iter, end)); - for(; iter != end; ++iter ) + for(; iter != end; ++iter) { std::string pM = *iter; // find count n and extent b of b{n} - std::regex regCount( - R"((.*\{)|(}))", - std::regex::egrep - ); - std::string count = std::regex_replace( pM, regCount, "" ); - - std::regex regExtent( - R"(\{.*})", - std::regex::egrep - ); - std::string extent = std::regex_replace( pM, regExtent, "" ); + std::regex regCount(R"((.*\{)|(}))", std::regex::egrep); + std::string count = std::regex_replace(pM, regCount, ""); + + std::regex regExtent(R"(\{.*})", std::regex::egrep); + std::string extent = std::regex_replace(pM, regExtent, ""); // no count {n} given (implies one) - if( count == *iter ) + if(count == *iter) count = "1"; - const SubdomainPair g = { - static_cast< uint32_t > ( std::stoul(extent) ), - static_cast< uint32_t > ( std::stoul(count) ) - }; - newInput.emplace_back( g ); + const SubdomainPair g + = {static_cast(std::stoul(extent)), static_cast(std::stoul(count))}; + newInput.emplace_back(g); } return newInput; diff --git a/include/picongpu/initialization/ParserGridDistribution.hpp b/include/picongpu/initialization/ParserGridDistribution.hpp index 7366ff5e7a..d4058ea5f9 100644 --- a/include/picongpu/initialization/ParserGridDistribution.hpp +++ b/include/picongpu/initialization/ParserGridDistribution.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -19,73 +19,68 @@ #pragma once -#include // std::vector -#include // std::string +#include // std::vector +#include // std::string #include namespace picongpu { + class ParserGridDistribution + { + private: + /** 1D sudomain extents + * + * Pair of extent and count entry in our grid distribution. + * + * For example, a single entry of the grid distribution a,b,c{n},d{m},e,f + * is stored as entry (a,1) in SubdomainPair. Another as (b,1), another + * n equally spaced subdomains as (c,n), another m subdomains of extent d + * as (d,m), and so on. + */ + struct SubdomainPair + { + // extent of the current subdomain + uint32_t extent; + // count of how often the subdomain shall be repeated + uint32_t count; + }; + using value_type = std::vector; -class ParserGridDistribution -{ -private: - /** 1D sudomain extents - * - * Pair of extent and count entry in our grid distribution. - * - * For example, a single entry of the grid distribution a,b,c{n},d{m},e,f - * is stored as entry (a,1) in SubdomainPair. Another as (b,1), another - * n equally spaced subdomains as (c,n), another m subdomains of extent d - * as (d,m), and so on. - */ - struct SubdomainPair { - // extent of the current subdomain - uint32_t extent; - // count of how often the subdomain shall be repeated - uint32_t count; - }; - using value_type = std::vector< SubdomainPair >; - -public: - ParserGridDistribution( std::string const s ); + public: + ParserGridDistribution(std::string const s); - uint32_t - getOffset( uint32_t const devicePos, uint32_t const maxCells ) const; + uint32_t getOffset(uint32_t const devicePos, uint32_t const maxCells) const; - /** Get local Size of this dimension - * - * \param[in] devicePos as unsigned integer in the range [0, n-1] for this dimension - * \return uint32_t with local number of cells - */ - uint32_t - getLocalSize( uint32_t const devicePos ) const; + /** Get local Size of this dimension + * + * \param[in] devicePos as unsigned integer in the range [0, n-1] for this dimension + * \return uint32_t with local number of cells + */ + uint32_t getLocalSize(uint32_t const devicePos) const; - /** Verify the number of subdomains matches the devices - * - * Check that the number of subdomains in a dimension, after we - * expanded all regexes, matches the number of devices for it. - * - * \param[in] numDevices number of devices for this dimension - */ - void - verifyDevices( uint32_t const numDevices ) const; + /** Verify the number of subdomains matches the devices + * + * Check that the number of subdomains in a dimension, after we + * expanded all regexes, matches the number of devices for it. + * + * \param[in] numDevices number of devices for this dimension + */ + void verifyDevices(uint32_t const numDevices) const; -private: - value_type parsedInput; + private: + value_type parsedInput; - /** Parses the input string to a vector of SubdomainPair(s) - * - * Parses the input string in the form a,b,c{n},d{m},e,f - * to a vector of SubdomainPair with extent number (a,b,c,d,e,f) and - * counts (1,1,n,m,e,f) - * - * \param[in] s as string in the form a,b{n} - * \return std::vector with 2x uint32_t (extent, count) - */ - value_type - parse( std::string const s ) const; - -}; + /** Parses the input string to a vector of SubdomainPair(s) + * + * Parses the input string in the form a,b,c{n},d{m},e,f + * to a vector of SubdomainPair with extent number (a,b,c,d,e,f) and + * counts (1,1,n,m,e,f) + * + * \param[in] s as string in the form a,b{n} + * \return std::vector with 2x uint32_t (extent, count) + */ + value_type parse(std::string const s) const; + }; } // namespace picongpu diff --git a/include/picongpu/initialization/SimStartInitialiser.hpp b/include/picongpu/initialization/SimStartInitialiser.hpp index c21cc5da79..1a831feb28 100644 --- a/include/picongpu/initialization/SimStartInitialiser.hpp +++ b/include/picongpu/initialization/SimStartInitialiser.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -27,26 +27,21 @@ namespace picongpu { - -/** - * Simulation startup initialiser. - * - * Initialises a new simulation from default values. - * - */ -class SimStartInitialiser : public AbstractInitialiser -{ -public: - - void init(ISimulationData& data, uint32_t currentStep) + /** + * Simulation startup initialiser. + * + * Initialises a new simulation from default values. + * + */ + class SimStartInitialiser : public AbstractInitialiser { - - } - - virtual ~SimStartInitialiser() - { - - } -}; -} - + public: + void init(ISimulationData& data, uint32_t currentStep) + { + } + + virtual ~SimStartInitialiser() + { + } + }; +} // namespace picongpu diff --git a/include/picongpu/main.cpp b/include/picongpu/main.cpp index 5b0bfb5a37..a4dbd92cab 100644 --- a/include/picongpu/main.cpp +++ b/include/picongpu/main.cpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Sergei Bastrakov * * This file is part of PIConGPU. @@ -18,6 +18,8 @@ * If not, see . */ +#include + #include "picongpu/ArgsParser.hpp" #include #include @@ -32,37 +34,36 @@ namespace { - /** Run a PIConGPU simulation * * @param argc count of arguments in argv (same as for main() ) * @param argv arguments of program start (same as for main() ) */ - int runSimulation( int argc, char **argv ) + int runSimulation(int argc, char** argv) { using namespace picongpu; simulation_starter::SimStarter sim; - auto const parserStatus = sim.parseConfigs( argc, argv ); + auto const parserStatus = sim.parseConfigs(argc, argv); int errorCode = EXIT_FAILURE; - switch( parserStatus ) + switch(parserStatus) { - case ArgsParser::Status::error: - errorCode = EXIT_FAILURE; - break; - case ArgsParser::Status::success: - sim.load( ); - sim.start( ); - sim.unload( ); - PMACC_FALLTHROUGH; - case ArgsParser::Status::successExit: - errorCode = 0; - break; + case ArgsParser::Status::error: + errorCode = EXIT_FAILURE; + break; + case ArgsParser::Status::success: + sim.load(); + sim.start(); + sim.unload(); + PMACC_FALLTHROUGH; + case ArgsParser::Status::successExit: + errorCode = 0; + break; }; // finalize the pmacc context */ - pmacc::Environment<>::get( ).finalize( ); + pmacc::Environment<>::get().finalize(); return errorCode; } @@ -74,21 +75,20 @@ namespace * @param argc count of arguments in argv * @param argv arguments of program start */ -int main( int argc, char **argv ) +int main(int argc, char** argv) { try { - return runSimulation( argc, argv ); + return runSimulation(argc, argv); } // A last-ditch effort to report exceptions to a user - catch ( const std::exception & ex ) + catch(const std::exception& ex) { - auto const typeName = std::string( typeid( ex ).name( ) ); - std::cerr << "Unhandled exception of type '" + typeName + - "' with message '" + ex.what() + "', terminating\n"; + auto const typeName = std::string(typeid(ex).name()); + std::cerr << "Unhandled exception of type '" + typeName + "' with message '" + ex.what() + "', terminating\n"; return EXIT_FAILURE; } - catch ( ... ) + catch(...) { std::cerr << "Unhandled exception of unknown type, terminating\n"; return EXIT_FAILURE; diff --git a/include/picongpu/param/bremsstrahlung.param b/include/picongpu/param/bremsstrahlung.param index 6cadb06d21..14f7c0c279 100644 --- a/include/picongpu/param/bremsstrahlung.param +++ b/include/picongpu/param/bremsstrahlung.param @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -21,98 +21,98 @@ namespace picongpu { -namespace particles -{ -namespace bremsstrahlung -{ - -/** params related to the energy loss and deflection of the incident electron - */ -namespace electron -{ - /** Minimal kinetic electron energy in MeV for the lookup table. - * For electrons below this value Bremsstrahlung is not taken into account. - */ - constexpr float_64 MIN_ENERGY_MeV = 0.5; - - /** Maximal kinetic electron energy in MeV for the lookup table. - * Electrons above this value cause a out-of-bounds access at the - * lookup table. Bounds checking is enabled for "CRITICAL" log level. - */ - constexpr float_64 MAX_ENERGY_MeV = 200.0; - - /** Minimal polar deflection angle due to screening. See Jackson 13.5 for a rule of thumb to this value. */ - constexpr float_64 MIN_THETA = 0.01; - - /** number of lookup table divisions for the kappa axis. - * Kappa is the energy loss normalized to the initial kinetic energy. - * The axis is scaled linearly. - */ - constexpr uint32_t NUM_SAMPLES_KAPPA = 32; - - /** number of lookup table divisions for the initial kinetic energy axis. - * The axis is scaled logarithmically. - */ - constexpr uint32_t NUM_SAMPLES_EKIN = 32; - - /** Kappa is the energy loss normalized to the initial kinetic energy. - * This minimal value is needed by the numerics to avoid a division by zero. - */ - constexpr float_64 MIN_KAPPA = 1.0e-10; - -} // namespace electron - -/** params related to the creation and the emission angle of the photon - */ -namespace photon -{ - /** Low-energy threshold in keV of the incident electron for the creation of photons. - * Below this value photon emission is neglected. - */ - constexpr float_64 SOFT_PHOTONS_CUTOFF_keV = 5000.0; - - /** number of lookup table divisions for the delta axis. - * Delta is the angular emission probability (normalized to one) integrated from zero to theta, - * where theta is the angle between the photon momentum and the final electron momentum. - * - * The axis is scaled linearly. - */ - constexpr uint32_t NUM_SAMPLES_DELTA = 256; - - /** number of lookup table divisions for the gamma axis. - * Gamma is the relativistic factor of the incident electron. - * - * The axis is scaled logarithmically. - */ - constexpr uint32_t NUM_SAMPLES_GAMMA = 64; - - /** Maximal value of delta for the lookup table. - * Delta is the angular emission probability (normalized to one) integrated from zero to theta, - * where theta is the angle between the photon momentum and the final electron momentum. - * - * A value close to one is reasonable. Though exactly one was actually correct, - * because it would map to theta = pi (maximum polar angle), the sampling then would be bad - * in the ultrarelativistic case. In this regime the emission primarily takes place at small thetas. - * So a maximum delta close to one maps to a reasonable maximum theta. - */ - constexpr float_64 MAX_DELTA = 0.95; - - /** minimal gamma for the lookup table. */ - constexpr float_64 MIN_GAMMA = 1.0; - - /** maximal gamma for the lookup table. - * Bounds checking is enabled for "CRITICAL" log level. - */ - constexpr float_64 MAX_GAMMA = 250; - - /** if the emission probability per timestep is higher than this value and the log level is set to - * "CRITICAL" a warning will be raised. - */ - constexpr float_64 SINGLE_EMISSION_PROB_LIMIT = 0.4; - - constexpr float_64 WEIGHTING_RATIO = 10; -} // namespace photon - -} // namespace bremsstrahlung -} // namespace particles + namespace particles + { + namespace bremsstrahlung + { + /** params related to the energy loss and deflection of the incident electron + */ + namespace electron + { + /** Minimal kinetic electron energy in MeV for the lookup table. + * For electrons below this value Bremsstrahlung is not taken into account. + */ + constexpr float_64 MIN_ENERGY_MeV = 0.5; + + /** Maximal kinetic electron energy in MeV for the lookup table. + * Electrons above this value cause a out-of-bounds access at the + * lookup table. Bounds checking is enabled for "CRITICAL" log level. + */ + constexpr float_64 MAX_ENERGY_MeV = 200.0; + + /** Minimal polar deflection angle due to screening. See Jackson 13.5 for a rule of thumb to this + * value. */ + constexpr float_64 MIN_THETA = 0.01; + + /** number of lookup table divisions for the kappa axis. + * Kappa is the energy loss normalized to the initial kinetic energy. + * The axis is scaled linearly. + */ + constexpr uint32_t NUM_SAMPLES_KAPPA = 32; + + /** number of lookup table divisions for the initial kinetic energy axis. + * The axis is scaled logarithmically. + */ + constexpr uint32_t NUM_SAMPLES_EKIN = 32; + + /** Kappa is the energy loss normalized to the initial kinetic energy. + * This minimal value is needed by the numerics to avoid a division by zero. + */ + constexpr float_64 MIN_KAPPA = 1.0e-10; + + } // namespace electron + + /** params related to the creation and the emission angle of the photon + */ + namespace photon + { + /** Low-energy threshold in keV of the incident electron for the creation of photons. + * Below this value photon emission is neglected. + */ + constexpr float_64 SOFT_PHOTONS_CUTOFF_keV = 5000.0; + + /** number of lookup table divisions for the delta axis. + * Delta is the angular emission probability (normalized to one) integrated from zero to theta, + * where theta is the angle between the photon momentum and the final electron momentum. + * + * The axis is scaled linearly. + */ + constexpr uint32_t NUM_SAMPLES_DELTA = 256; + + /** number of lookup table divisions for the gamma axis. + * Gamma is the relativistic factor of the incident electron. + * + * The axis is scaled logarithmically. + */ + constexpr uint32_t NUM_SAMPLES_GAMMA = 64; + + /** Maximal value of delta for the lookup table. + * Delta is the angular emission probability (normalized to one) integrated from zero to theta, + * where theta is the angle between the photon momentum and the final electron momentum. + * + * A value close to one is reasonable. Though exactly one was actually correct, + * because it would map to theta = pi (maximum polar angle), the sampling then would be bad + * in the ultrarelativistic case. In this regime the emission primarily takes place at small thetas. + * So a maximum delta close to one maps to a reasonable maximum theta. + */ + constexpr float_64 MAX_DELTA = 0.95; + + /** minimal gamma for the lookup table. */ + constexpr float_64 MIN_GAMMA = 1.0; + + /** maximal gamma for the lookup table. + * Bounds checking is enabled for "CRITICAL" log level. + */ + constexpr float_64 MAX_GAMMA = 250; + + /** if the emission probability per timestep is higher than this value and the log level is set to + * "CRITICAL" a warning will be raised. + */ + constexpr float_64 SINGLE_EMISSION_PROB_LIMIT = 0.4; + + constexpr float_64 WEIGHTING_RATIO = 10; + } // namespace photon + + } // namespace bremsstrahlung + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/param/components.param b/include/picongpu/param/components.param index bd261d1820..a960304deb 100644 --- a/include/picongpu/param/components.param +++ b/include/picongpu/param/components.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Anton Helm, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Anton Helm, * Rene Widera, Richard Pausch * * This file is part of PIConGPU. @@ -30,12 +30,12 @@ namespace picongpu { -/** @namespace simulation_starter - * - * Simulation Starter Selection: - * This value does usually not need to be changed. Change only if you want to - * implement your own `SimulationHelper` (e.g. `MySimulation`) class. - * - defaultPIConGPU : default PIConGPU configuration - */ -namespace simulation_starter = defaultPIConGPU; + /** @namespace simulation_starter + * + * Simulation Starter Selection: + * This value does usually not need to be changed. Change only if you want to + * implement your own `SimulationHelper` (e.g. `Simulation`) class. + * - defaultPIConGPU : default PIConGPU configuration + */ + namespace simulation_starter = defaultPIConGPU; } // namespace picongpu diff --git a/include/picongpu/param/density.param b/include/picongpu/param/density.param index 087085a75e..6712a5b70a 100644 --- a/include/picongpu/param/density.param +++ b/include/picongpu/param/density.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -34,250 +34,244 @@ namespace picongpu { -namespace SI -{ - /** Base density in particles per m^3 in the density profiles. - * - * This is often taken as reference maximum density in normalized profiles. - * Individual particle species can define a `densityRatio` flag relative - * to this value. - * - * unit: ELEMENTS/m^3 - */ - constexpr float_64 BASE_DENSITY_SI = 1.e25; -} // namespace SI - -namespace densityProfiles -{ - /** Profile Formula: - * `const float_X exponent = abs((y - gasCenter_SI) / gasSigma_SI);` - * `const float_X density = exp(gasFactor * pow(exponent, gasPower));` - * - * takes `gasCenterLeft_SI for y < gasCenterLeft_SI`, - * `gasCenterRight_SI for y > gasCenterRight_SI`, - * and `exponent = 0.0 for gasCenterLeft_SI < y < gasCenterRight_SI` - */ - PMACC_STRUCT(GaussianParam, - /** ... - */ - (PMACC_C_VALUE(float_X, gasFactor, -1.0)) - (PMACC_C_VALUE(float_X, gasPower, 4.0)) - - /** height of vacuum area on top border + namespace SI + { + /** Base density in particles per m^3 in the density profiles. * - * this vacuum is important because of the laser initialization, - * which is done in the first cells of the simulation and - * assumes a charge-free volume - * unit: cells - */ - (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) - - /** The central position of the distribution - * unit: meter - */ - (PMACC_C_VALUE(float_64, gasCenterLeft_SI, 4.62e-5)) - (PMACC_C_VALUE(float_64, gasCenterRight_SI, 4.62e-5)) - - /** the distance from gasCenter_SI until the gas density decreases to its 1/e-th part - * unit: meter - */ - (PMACC_C_VALUE(float_64, gasSigmaLeft_SI, 4.62e-5)) - (PMACC_C_VALUE(float_64, gasSigmaRight_SI, 4.62e-5)) - ); /* struct GaussianParam */ - - /* definition of density profile with gaussian profile */ - using Gaussian = GaussianImpl< GaussianParam >; - - - /* definition of homogenous profile */ - using Homogenous = HomogenousImpl; - - - /** parameter for `LinearExponential` profile - * - * @verbatim - * Density Profile: /\ - * / -,_ - * linear / -,_ exponential - * slope / | -,_ slope - * MAX - * @endverbatim - */ - PMACC_STRUCT(LinearExponentialParam, - /** height of vacuum area on top border + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. * - * this vacuum is important because of the laser initialization, - * which is done in the first cells of the simulation and - * assumes a charge-free volume - * unit: cells - */ - (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) - - /** Y-Position where the linear slope ends and the exponential slope - * begins - * unit: meter - */ - (PMACC_C_VALUE(float_64, gasYMax_SI, 1.0e-3)) - - /** Parameters for the linear slope: - * For Y <= gasYMax_SI: - * \rho / BASE_DENSITY = A * Y + B - * = element [0.0; 1.0] - * unit for A: 1/m - * unit for B: none + * unit: ELEMENTS/m^3 */ - (PMACC_C_VALUE(float_64, gasA_SI, 1.0e-3)) - - /** Parameters for the exponential slope - * For Y > gasYMax_SI: - * let Y' = Y - gasYMax_SI - * \rho = exp[ - Y' * D ] - * = element [0.0; 1.0] - * unit: 1/m - */ - (PMACC_C_VALUE(float_64, gasD_SI, 1.0e-3)) - - (PMACC_C_VALUE(float_64, gasB, 0.0)) - ); /* struct LinearExponentialParam */ - - /* definition of gas with linear start slop and exponential end slope */ - using LinearExponential = LinearExponentialImpl< LinearExponentialParam >; + constexpr float_64 BASE_DENSITY_SI = 1.e25; + } // namespace SI - - PMACC_STRUCT(GaussianCloudParam, + namespace densityProfiles + { /** Profile Formula: - * exponent = |globalCellPos - center| / sigma - * density = e^[ gasFactor * exponent^gasPower ] - */ - (PMACC_C_VALUE(float_X, gasFactor, -0.5)) - (PMACC_C_VALUE(float_X, gasPower, 2.0)) - - /** height of vacuum area on top border - * - * this vacuum is important because of the laser initialization, - * which is done in the first cells of the simulation and - * assumes a charge-free volume - * unit: cells - */ - (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) - - /** The central position of the gas distribution - * unit: meter - */ - (PMACC_C_VECTOR_DIM(float_64, simDim, center_SI, 1.134e-5, 1.134e-5, 1.134e-5)) - - /** the distance from gasCenter_SI until the gas density decreases to its 1/e-th part - * unit: meter */ - (PMACC_C_VECTOR_DIM(float_64, simDim, sigma_SI, 7.0e-6, 7.0e-6, 7.0e-6)) - ); /* struct GaussianCloudParam */ - - /* definition of cloud profile */ - using GaussianCloud = GaussianCloudImpl< GaussianCloudParam >; - - - /** The profile consists out of the composition of 3 1D profiles - * with the scheme: exponential increasing flank, constant sphere, - * exponential decreasing flank - * @verbatim - * ___ - * 1D: _,./ \.,_ rho(r) - * - * 2D: ..,x,.. density: . low - * .,xxx,. , middle - * ..,x,.. x high (constant) - * @endverbatim - */ - PMACC_STRUCT(SphereFlanksParam, - /** height of vacuum area on top border + * `const float_X exponent = abs((y - gasCenter_SI) / gasSigma_SI);` + * `const float_X density = exp(gasFactor * pow(exponent, gasPower));` * - * this vacuum is important because of the laser initialization, - * which is done in the first cells of the simulation and - * assumes a charge-free volume - * unit: cells - */ - (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) - - /** Radius of the constant sphere - * unit: meter - */ - (PMACC_C_VALUE(float_64, r_SI, 1.0e-3)) - - /** Inner radius if you want to build a shell/ring - * unit: meter - */ - (PMACC_C_VALUE(float_64, ri_SI, 0.0)) - - /** Middle of the constant sphere - * unit: meter - */ - (PMACC_C_VECTOR_DIM(float_64, simDim, center_SI, 8.0e-3, 8.0e-3, 8.0e-3)) - - /** Parameters for the exponential slope - * For distance > r_SI: - * let distance' = distance - r - * \rho = exp[ - distance' * exponent ] - * unit: 1/m + * takes `gasCenterLeft_SI for y < gasCenterLeft_SI`, + * `gasCenterRight_SI for y > gasCenterRight_SI`, + * and `exponent = 0.0 for gasCenterLeft_SI < y < gasCenterRight_SI` */ - (PMACC_C_VALUE(float_64, exponent_SI, 1.0e3)) - - ); /* struct SphereFlanksParam */ + PMACC_STRUCT( + GaussianParam, + /** ... + */ + (PMACC_C_VALUE(float_X, gasFactor, -1.0))(PMACC_C_VALUE(float_X, gasPower, 4.0)) - /* definition of sphere profile with flanks */ - using SphereFlanks = SphereFlanksImpl; + /** height of vacuum area on top border + * + * this vacuum is important because of the laser initialization, + * which is done in the first cells of the simulation and + * assumes a charge-free volume + * unit: cells + */ + (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) + /** The central position of the distribution + * unit: meter + */ + (PMACC_C_VALUE(float_64, gasCenterLeft_SI, 4.62e-5))(PMACC_C_VALUE(float_64, gasCenterRight_SI, 4.62e-5)) - PMACC_STRUCT(FromHDF5Param, - /* prefix of filename - * full file name: gas_0.h5 - * filename = "gas" - * iteration = 0 - */ - (PMACC_C_STRING(filename,"gas")) + /** the distance from gasCenter_SI until the gas density decreases to its 1/e-th part + * unit: meter + */ + (PMACC_C_VALUE(float_64, gasSigmaLeft_SI, 4.62e-5))( + PMACC_C_VALUE(float_64, gasSigmaRight_SI, 4.62e-5))); /* struct GaussianParam */ - (PMACC_C_STRING(datasetName,"fields/e_chargeDensity")) + /* definition of density profile with gaussian profile */ + using Gaussian = GaussianImpl; - /* simulation step*/ - (PMACC_C_VALUE(uint32_t, iteration, 0)) - (PMACC_C_VALUE(float_X, defaultDensity, 0.0)) - ); /* struct FromHDF5Param */ - /* definition of cloud profile */ - using FromHDF5 = FromHDF5Impl< FromHDF5Param >; + /* definition of homogenous profile */ + using Homogenous = HomogenousImpl; - struct FreeFormulaFunctor - { - /** This formula uses SI quantities only. - * The profile will be multiplied by BASE_DENSITY_SI. + /** parameter for `LinearExponential` profile * - * @param position_SI total offset including all slides [meter] - * @param cellSize_SI cell sizes [meter] + * @verbatim + * Density Profile: /\ + * / -,_ + * linear / -,_ exponential + * slope / | -,_ slope + * MAX + * @endverbatim + */ + PMACC_STRUCT( + LinearExponentialParam, + /** height of vacuum area on top border + * + * this vacuum is important because of the laser initialization, + * which is done in the first cells of the simulation and + * assumes a charge-free volume + * unit: cells + */ + (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) + + /** Y-Position where the linear slope ends and the exponential slope + * begins + * unit: meter + */ + (PMACC_C_VALUE(float_64, gasYMax_SI, 1.0e-3)) + + /** Parameters for the linear slope: + * For Y <= gasYMax_SI: + * \rho / BASE_DENSITY = A * Y + B + * = element [0.0; 1.0] + * unit for A: 1/m + * unit for B: none + */ + (PMACC_C_VALUE(float_64, gasA_SI, 1.0e-3)) + + /** Parameters for the exponential slope + * For Y > gasYMax_SI: + * let Y' = Y - gasYMax_SI + * \rho = exp[ - Y' * D ] + * = element [0.0; 1.0] + * unit: 1/m + */ + (PMACC_C_VALUE(float_64, gasD_SI, 1.0e-3)) + + (PMACC_C_VALUE(float_64, gasB, 0.0))); /* struct LinearExponentialParam */ + + /* definition of gas with linear start slop and exponential end slope */ + using LinearExponential = LinearExponentialImpl; + + + PMACC_STRUCT( + GaussianCloudParam, + /** Profile Formula: + * exponent = |globalCellPos - center| / sigma + * density = e^[ gasFactor * exponent^gasPower ] + */ + (PMACC_C_VALUE(float_X, gasFactor, -0.5))(PMACC_C_VALUE(float_X, gasPower, 2.0)) + + /** height of vacuum area on top border + * + * this vacuum is important because of the laser initialization, + * which is done in the first cells of the simulation and + * assumes a charge-free volume + * unit: cells + */ + (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) + + /** The central position of the gas distribution + * unit: meter + */ + (PMACC_C_VECTOR_DIM(float_64, simDim, center_SI, 1.134e-5, 1.134e-5, 1.134e-5)) + + /** the distance from gasCenter_SI until the gas density decreases to its 1/e-th part + * unit: meter */ + (PMACC_C_VECTOR_DIM(float_64, simDim, sigma_SI, 7.0e-6, 7.0e-6, 7.0e-6))); /* struct GaussianCloudParam */ + + /* definition of cloud profile */ + using GaussianCloud = GaussianCloudImpl; + + + /** The profile consists out of the composition of 3 1D profiles + * with the scheme: exponential increasing flank, constant sphere, + * exponential decreasing flank + * @verbatim + * ___ + * 1D: _,./ \.,_ rho(r) * - * @return float_X density [normalized to 1.0] + * 2D: ..,x,.. density: . low + * .,xxx,. , middle + * ..,x,.. x high (constant) + * @endverbatim */ - HDINLINE float_X - operator()( - const floatD_64& position_SI, - const float3_64& cellSize_SI - ) + PMACC_STRUCT( + SphereFlanksParam, + /** height of vacuum area on top border + * + * this vacuum is important because of the laser initialization, + * which is done in the first cells of the simulation and + * assumes a charge-free volume + * unit: cells + */ + (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) + + /** Radius of the constant sphere + * unit: meter + */ + (PMACC_C_VALUE(float_64, r_SI, 1.0e-3)) + + /** Inner radius if you want to build a shell/ring + * unit: meter + */ + (PMACC_C_VALUE(float_64, ri_SI, 0.0)) + + /** Middle of the constant sphere + * unit: meter + */ + (PMACC_C_VECTOR_DIM(float_64, simDim, center_SI, 8.0e-3, 8.0e-3, 8.0e-3)) + + /** Parameters for the exponential slope + * For distance > r_SI: + * let distance' = distance - r + * \rho = exp[ - distance' * exponent ] + * unit: 1/m + */ + (PMACC_C_VALUE(float_64, exponent_SI, 1.0e3)) + + ); /* struct SphereFlanksParam */ + + /* definition of sphere profile with flanks */ + using SphereFlanks = SphereFlanksImpl; + + + PMACC_STRUCT( + FromHDF5Param, + /* prefix of filename + * full file name: gas_0.h5 + * filename = "gas" + * iteration = 0 + */ + (PMACC_C_STRING(filename, "gas")) + + (PMACC_C_STRING(datasetName, "fields/e_chargeDensity")) + + /* simulation step*/ + (PMACC_C_VALUE(uint32_t, iteration, 0))( + PMACC_C_VALUE(float_X, defaultDensity, 0.0))); /* struct FromHDF5Param */ + + /* definition of cloud profile */ + using FromHDF5 = FromHDF5Impl; + + + struct FreeFormulaFunctor { - const float_64 y( position_SI.y() * 1000.0 ); // m -> mm - //const uint64_t y_cell_id( uint64_t(position_SI.y() / cellSize_SI[1]) ); - - /* triangle function example - * for a density profile from 0 to 400 microns */ - float_X s = 1.0_X - 5.0_X * math::abs( y - 0.2_X ); - - /* give it an empty/filled striping for every second cell */ - //s *= float_X( (y_cell_id % 2) == 0 ); - - /* all parts of the function MUST be > 0 */ - s *= float_X( s >= 0.0 ); - return s; - } - }; - - /* definition of free formula profile */ - using FreeFormula = FreeFormulaImpl< FreeFormulaFunctor >; -} // namespace densityProfiles + /** This formula uses SI quantities only. + * The profile will be multiplied by BASE_DENSITY_SI. + * + * @param position_SI total offset including all slides [meter] + * @param cellSize_SI cell sizes [meter] + * + * @return float_X density [normalized to 1.0] + */ + HDINLINE float_X operator()(const floatD_64& position_SI, const float3_64& cellSize_SI) + { + const float_64 y(position_SI.y() * 1000.0); // m -> mm + // const uint64_t y_cell_id( uint64_t(position_SI.y() / cellSize_SI[1]) ); + + /* triangle function example + * for a density profile from 0 to 400 microns */ + float_X s = 1.0_X - 5.0_X * math::abs(y - 0.2_X); + + /* give it an empty/filled striping for every second cell */ + // s *= float_X( (y_cell_id % 2) == 0 ); + + /* all parts of the function MUST be > 0 */ + s *= float_X(s >= 0.0); + return s; + } + }; + + /* definition of free formula profile */ + using FreeFormula = FreeFormulaImpl; + } // namespace densityProfiles } // namespace picongpu diff --git a/include/picongpu/param/dimension.param b/include/picongpu/param/dimension.param index 081417d0bf..a05b7ade06 100644 --- a/include/picongpu/param/dimension.param +++ b/include/picongpu/param/dimension.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl +/* Copyright 2014-2021 Axel Huebl * * This file is part of PIConGPU. * diff --git a/include/picongpu/param/fieldBackground.param b/include/picongpu/param/fieldBackground.param index 357f97801a..7cfdaff60f 100644 --- a/include/picongpu/param/fieldBackground.param +++ b/include/picongpu/param/fieldBackground.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Alexander Debus, Richard Pausch +/* Copyright 2014-2021 Axel Huebl, Alexander Debus, Richard Pausch * * This file is part of PIConGPU. * @@ -36,16 +36,15 @@ namespace picongpu /* We use this to calculate your SI input back to our unit system */ PMACC_ALIGN(m_unitField, const float3_64); - HDINLINE FieldBackgroundE( const float3_64 unitField ) : m_unitField(unitField) - {} + HDINLINE FieldBackgroundE(const float3_64 unitField) : m_unitField(unitField) + { + } /** Specify your background field E(r,t) here * * \param cellIdx The total cell id counted from the start at t = 0 * \param currentStep The current time step */ - HDINLINE float3_X - operator()( const DataSpace& cellIdx, - const uint32_t currentStep ) const + HDINLINE float3_X operator()(const DataSpace& cellIdx, const uint32_t currentStep) const { /* example: periodicity of 20 microns ( = 2.0e-5 m) */ constexpr float_64 period_SI(20.0e-6); @@ -55,8 +54,8 @@ namespace picongpu * multiplying with DELTA_T_SI */ /* specify your E-Field in V/m and convert to PIConGPU units */ - const float_X sinArg = precisionCast( y_SI / period_SI * 2.0 * PI ); - return float3_X(0.0, math::sin( sinArg ) / m_unitField[1], 0.0); + const float_X sinArg = precisionCast(y_SI / period_SI * 2.0 * PI); + return float3_X(0.0, math::sin(sinArg) / m_unitField[1], 0.0); } }; @@ -69,16 +68,15 @@ namespace picongpu /* We use this to calculate your SI input back to our unit system */ PMACC_ALIGN(m_unitField, const float3_64); - HDINLINE FieldBackgroundB( const float3_64 unitField ) : m_unitField(unitField) - {} + HDINLINE FieldBackgroundB(const float3_64 unitField) : m_unitField(unitField) + { + } /** Specify your background field B(r,t) here * * \param cellIdx The total cell id counted from the start at t=0 * \param currentStep The current time step */ - HDINLINE float3_X - operator()( const DataSpace& cellIdx, - const uint32_t currentStep ) const + HDINLINE float3_X operator()(const DataSpace& cellIdx, const uint32_t currentStep) const { /* example: periodicity of 20 microns ( = 2.0e-5 m) */ constexpr float_64 period_SI(20.0e-6); @@ -88,8 +86,8 @@ namespace picongpu * multiplying with DELTA_T_SI */ /* specify your B-Field in T and convert to PIConGPU units */ - const float_X sinArg = precisionCast( y_SI / period_SI * 2.0 * PI ); - return float3_X(0.0, math::cos( sinArg ) / m_unitField[1], 0.0); + const float_X sinArg = precisionCast(y_SI / period_SI * 2.0 * PI); + return float3_X(0.0, math::cos(sinArg) / m_unitField[1], 0.0); } }; @@ -102,16 +100,15 @@ namespace picongpu /* We use this to calculate your SI input back to our unit system */ PMACC_ALIGN(m_unitField, const float3_64); - HDINLINE FieldBackgroundJ( const float3_64 unitField ) : m_unitField(unitField) - {} + HDINLINE FieldBackgroundJ(const float3_64 unitField) : m_unitField(unitField) + { + } /** Specify your background field J(r,t) here * * \param cellIdx The total cell id counted from the start at t=0 * \param currentStep The current time step */ - HDINLINE float3_X - operator()( const DataSpace& cellIdx, - const uint32_t currentStep ) const + HDINLINE float3_X operator()(const DataSpace& cellIdx, const uint32_t currentStep) const { /* example: periodicity of 20 microns ( = 2.0e-5 m) */ constexpr float_64 period_SI(20.0e-6); @@ -121,8 +118,8 @@ namespace picongpu * multiplying with DELTA_T_SI */ /* specify your J-Field in A/m^2 and convert to PIConGPU units */ - const float_X sinArg = precisionCast( y_SI / period_SI * 2.0 * PI ); - return float3_X(0.0, math::cos( sinArg ) / m_unitField[1], 0.0); + const float_X sinArg = precisionCast(y_SI / period_SI * 2.0 * PI); + return float3_X(0.0, math::cos(sinArg) / m_unitField[1], 0.0); } }; diff --git a/include/picongpu/param/fieldSolver.param b/include/picongpu/param/fieldSolver.param index a516aacaad..9955e8919c 100644 --- a/include/picongpu/param/fieldSolver.param +++ b/include/picongpu/param/fieldSolver.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Sergei Bastrakov, Klaus Steiniger * * This file is part of PIConGPU. * @@ -25,6 +25,11 @@ * * Also allows to configure ad hoc mitigations for high frequency * noise in some setups via current smoothing. + * + * \attention + * Currently, the laser initialization in PIConGPU is implemented to work with the standard Yee solver. + * Using a solver of higher order will result in a slightly increased laser amplitude and energy than expected. + * */ #pragma once @@ -35,38 +40,43 @@ namespace picongpu { -namespace fields -{ - - /** Current Interpolation - * - * CurrentInterpolation is used to set a method performing the - * interpolate/assign operation from the generated currents of particle - * species to the electro-magnetic fields. - * - * Allowed values are: - * - None: - * - default for staggered grids/Yee-scheme - * - updates E - * - Binomial: 2nd order Binomial filter - * - smooths the current before assignment in staggered grid - * - updates E & breaks local charge conservation slightly - * - NoneDS: - * - experimental assignment for all-centered/directional splitting - * - updates E & B at the same time - */ - using CurrentInterpolation = currentInterpolation::None; + namespace fields + { + /** Current Interpolation + * + * CurrentInterpolation is used to set a method performing the + * interpolate/assign operation from the generated currents of particle + * species to the electro-magnetic fields. + * + * Allowed values are: + * - None: + * - default for staggered grids/Yee-scheme + * - updates E + * - Binomial: 2nd order Binomial filter + * - smooths the current before assignment in staggered grid + * - updates E & breaks local charge conservation slightly + */ + using CurrentInterpolation = currentInterpolation::None; - /** FieldSolver - * - * Field Solver Selection: - * - Yee< CurrentInterpolation > : standard Yee solver - * - YeePML< CurrentInterpolation >: standard Yee solver with PML absorber - * - Lehe< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction - * - DirSplitting< CurrentInterpolation >: Sentoku's Directional Splitting Method - * - None< CurrentInterpolation >: disable the vacuum update of E and B - */ - using Solver = maxwellSolver::Yee< CurrentInterpolation >; + /** FieldSolver + * + * Field Solver Selection: + * - Yee< CurrentInterpolation > : Standard Yee solver approximating derivatives with respect to time and + * space by second order finite differences. + * - YeePML< CurrentInterpolation >: Standard Yee solver using Perfectly Matched Layer Absorbing Boundary + * Conditions (PML) + * - Lehe< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction + * - LehePML< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction + * using Perfectly Matched Layer Absorbing Boundary Conditions (PML) + * - ArbitraryOrderFDTD< 4, CurrentInterpolation >: Solver using 4 neighbors to each direction to approximate + * *spatial* derivatives by finite differences. The number of neighbors can be changed from 4 to any positive, + * integer number. The order of the solver will be twice the number of neighbors in each direction. Yee's + * method is a special case of this using one neighbor to each direction. + * - ArbitraryOrderFDTDPML< 4, CurrentInterpolation >: ArbitraryOrderFDTD solver using Perfectly Matched Layer + * Absorbing Boundary Conditions (PML) + * - None< CurrentInterpolation >: disable the vacuum update of E and B + */ + using Solver = maxwellSolver::Yee; -} // namespace fields + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/param/fileOutput.param b/include/picongpu/param/fileOutput.param index cdc412710d..cb9a269464 100644 --- a/include/picongpu/param/fileOutput.param +++ b/include/picongpu/param/fileOutput.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, * Benjamin Worpitz, Richard Pausch * * This file is part of PIConGPU. @@ -63,49 +63,33 @@ namespace picongpu namespace deriveField = particles::particleToGrid; /* ChargeDensity section */ - using ChargeDensity_Seq = deriveField::CreateEligible_t< - VectorAllSpecies, - deriveField::derivedAttributes::ChargeDensity - >; + using ChargeDensity_Seq + = deriveField::CreateEligible_t; /* EnergyDensity section */ - using EnergyDensity_Seq = deriveField::CreateEligible_t< - VectorAllSpecies, - deriveField::derivedAttributes::EnergyDensity - >; + using EnergyDensity_Seq + = deriveField::CreateEligible_t; /* MomentumComponentsection: define "component" as 0=X (default), 1=Y or 2=Z (results: [-1.:1.]) */ - using MomentumComponent_Seq = deriveField::CreateEligible_t< - VectorAllSpecies, - deriveField::derivedAttributes::MomentumComponent< 0 > - >; + using MomentumComponent_Seq + = deriveField::CreateEligible_t>; /** FieldTmpSolvers groups all solvers that create data for FieldTmp ****** * * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size */ - using FieldTmpSolvers = MakeSeq_t< - ChargeDensity_Seq, - EnergyDensity_Seq, - MomentumComponent_Seq - >; + using FieldTmpSolvers = MakeSeq_t; /** FileOutputFields: Groups all Fields that shall be dumped *************/ /** Possible native fields: FieldE, FieldB, FieldJ */ - using NativeFileOutputFields = MakeSeq_t< - FieldE, - FieldB - >; + using NativeFileOutputFields = MakeSeq_t; - using FileOutputFields = MakeSeq_t< - NativeFileOutputFields, - FieldTmpSolvers - >; + using FileOutputFields = MakeSeq_t; /** FileOutputParticles: Groups all Species that shall be dumped ********** @@ -115,4 +99,4 @@ namespace picongpu */ using FileOutputParticles = VectorAllSpecies; -} +} // namespace picongpu diff --git a/include/picongpu/param/flylite.param b/include/picongpu/param/flylite.param index f4adb10a64..85c8182858 100644 --- a/include/picongpu/param/flylite.param +++ b/include/picongpu/param/flylite.param @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -41,54 +41,51 @@ namespace picongpu { -namespace flylite -{ - /** number of populations (numpop) - * - * this number defines how many configurations make up a superconfiguration - * - * range: [0, 255] - */ - constexpr uint8_t populations = 3u; // example Cu data set: 32u + namespace flylite + { + /** number of populations (numpop) + * + * this number defines how many configurations make up a superconfiguration + * + * range: [0, 255] + */ + constexpr uint8_t populations = 3u; // example Cu data set: 32u - using Superconfig = types::Superconfig< - float_64, - populations - >; + using Superconfig = types::Superconfig; - /** ionization states of the atom (iz) - * - * range: [0, 255] - */ - constexpr uint8_t ionizationStates = 29u; + /** ionization states of the atom (iz) + * + * range: [0, 255] + */ + constexpr uint8_t ionizationStates = 29u; - /** number of energy bins - * - * energy steps used for local energy histograms - * @note: no overflow- or underflow-bins are used, particles with energies - * outside the range (see below) are ignored - */ - constexpr uint16_t energies = 512u; + /** number of energy bins + * + * energy steps used for local energy histograms + * @note: no overflow- or underflow-bins are used, particles with energies + * outside the range (see below) are ignored + */ + constexpr uint16_t energies = 512u; - /** energy range for electron and photon histograms - * - * electron and photon histograms f(e) f(ph) are currently - * calculated in a linearly binned histogram while particles with - * energies outside the ranges below are ignored - * - * unit: eV - */ - constexpr float_X electronMinEnergy = 0.0; - constexpr float_X electronMaxEnergy = 100.e3; - constexpr float_X photonMinEnergy = 0.0; - constexpr float_X photonMaxEnergy = 100.e3; + /** energy range for electron and photon histograms + * + * electron and photon histograms f(e) f(ph) are currently + * calculated in a linearly binned histogram while particles with + * energies outside the ranges below are ignored + * + * unit: eV + */ + constexpr float_X electronMinEnergy = 0.0; + constexpr float_X electronMaxEnergy = 100.e3; + constexpr float_X photonMinEnergy = 0.0; + constexpr float_X photonMaxEnergy = 100.e3; - /** you better not change this line, the wooooorld depends on it! - * - * no seriously, per-supercell is the quickest way to average particle - * quantities such as density, energy histogram, etc. and I won't implement - * another size until needed - */ - using spatialAverageBox = SuperCellSize; -} // namespace flylite + /** you better not change this line, the wooooorld depends on it! + * + * no seriously, per-supercell is the quickest way to average particle + * quantities such as density, energy histogram, etc. and I won't implement + * another size until needed + */ + using spatialAverageBox = SuperCellSize; + } // namespace flylite } // namespace picongpu diff --git a/include/picongpu/param/grid.param b/include/picongpu/param/grid.param index 8b06404d69..8813496202 100644 --- a/include/picongpu/param/grid.param +++ b/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -68,9 +68,9 @@ namespace picongpu * unit: none */ constexpr uint32_t ABSORBER_CELLS[3][2] = { - {32, 32}, /*x direction [negative,positive]*/ - {32, 32}, /*y direction [negative,positive]*/ - {32, 32} /*z direction [negative,positive]*/ + {32, 32}, /*x direction [negative,positive]*/ + {32, 32}, /*y direction [negative,positive]*/ + {32, 32} /*z direction [negative,positive]*/ }; /** Define the strength of the absorber for any direction @@ -80,7 +80,7 @@ namespace picongpu constexpr float_X ABSORBER_STRENGTH[3][2] = { {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ - {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ }; /** When to move the co-moving window. @@ -101,4 +101,3 @@ namespace picongpu constexpr float_64 movePoint = 0.9; } // namespace picongpu - diff --git a/include/picongpu/param/ionizationEnergies.param b/include/picongpu/param/ionizationEnergies.param index 303ba7b68b..5b12c5b534 100644 --- a/include/picongpu/param/ionizationEnergies.param +++ b/include/picongpu/param/ionizationEnergies.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Marco Garten, Axel Huebl +/* Copyright 2014-2021 Marco Garten, Axel Huebl * * This file is part of PIConGPU. * @@ -32,246 +32,253 @@ namespace picongpu { -namespace ionization -{ -/** Ionization potentials - * - * Please follow these rules for defining ionization energies of atomic species, - * unless your chosen ionization model requires a different unit system than `AU::` - * - input of values in either atomic units or converting eV or Joule to them - * -> use either UNITCONV_eV_to_AU or SI::ATOMIC_UNIT_ENERGY for that purpose - * - use `float_X` as the preferred data type - * - * example: - * ionization energy for ground state hydrogen: 13.6 eV - * 1 Joule = 1 kg * m^2 / s^2 - * 1 eV = 1.602e-19 J - * - * 1 AU (energy) = 27.2 eV - * = 1 Hartree - * = 4.36e-18 J - * = 2 Rydberg - * = 2 x Hydrogen ground state binding energy - * - * Atomic units are useful for ionization models because they simplify the - * formulae greatly and provide intuitively understandable relations to a - * well-known system, i.e. the Hydrogen atom. - * - * for PMACC_CONST_VECTOR usage, - * @see include/pmacc/math/ConstVector.hpp - * for finding ionization energies, - * @url http://physics.nist.gov/PhysRefData/ASD/ionEnergy.html - * - * Reference: Kramida, A., Ralchenko, Yu., Reader, J., and NIST ASD Team (2014) - * NIST Atomic Spectra Database (ver. 5.2), [Online] - * Available: http://physics.nist.gov/asd [2017, February 8] - * National Institute of Standards and Technology, Gaithersburg, MD - */ -namespace energies -{ -namespace AU -{ - /* ionization energy for ground state hydrogen in atomic units */ - PMACC_CONST_VECTOR(float_X, 1, Hydrogen, - 13.59843 * UNITCONV_eV_to_AU - ); + namespace ionization + { + /** Ionization potentials + * + * Please follow these rules for defining ionization energies of atomic species, + * unless your chosen ionization model requires a different unit system than `AU::` + * - input of values in either atomic units or converting eV or Joule to them + * -> use either UNITCONV_eV_to_AU or SI::ATOMIC_UNIT_ENERGY for that purpose + * - use `float_X` as the preferred data type + * + * example: + * ionization energy for ground state hydrogen: 13.6 eV + * 1 Joule = 1 kg * m^2 / s^2 + * 1 eV = 1.602e-19 J + * + * 1 AU (energy) = 27.2 eV + * = 1 Hartree + * = 4.36e-18 J + * = 2 Rydberg + * = 2 x Hydrogen ground state binding energy + * + * Atomic units are useful for ionization models because they simplify the + * formulae greatly and provide intuitively understandable relations to a + * well-known system, i.e. the Hydrogen atom. + * + * for PMACC_CONST_VECTOR usage, + * @see include/pmacc/math/ConstVector.hpp + * for finding ionization energies, + * @url http://physics.nist.gov/PhysRefData/ASD/ionEnergy.html + * + * Reference: Kramida, A., Ralchenko, Yu., Reader, J., and NIST ASD Team (2014) + * NIST Atomic Spectra Database (ver. 5.2), [Online] + * Available: http://physics.nist.gov/asd [2017, February 8] + * National Institute of Standards and Technology, Gaithersburg, MD + */ + namespace energies + { + namespace AU + { + /* ionization energy for ground state hydrogen in atomic units */ + PMACC_CONST_VECTOR(float_X, 1, Hydrogen, 13.59843 * UNITCONV_eV_to_AU); - /* ionization energy for ground state deuterium in atomic units */ - PMACC_CONST_VECTOR(float_X, 1, Deuterium, - 13.60213 * UNITCONV_eV_to_AU - ); + /* ionization energy for ground state deuterium in atomic units */ + PMACC_CONST_VECTOR(float_X, 1, Deuterium, 13.60213 * UNITCONV_eV_to_AU); - /* ionization energy for ground state helium in atomic units */ - PMACC_CONST_VECTOR(float_X, 2, Helium, - 24.58739 * UNITCONV_eV_to_AU, - 54.41776 * UNITCONV_eV_to_AU - ); + /* ionization energy for ground state helium in atomic units */ + PMACC_CONST_VECTOR(float_X, 2, Helium, 24.58739 * UNITCONV_eV_to_AU, 54.41776 * UNITCONV_eV_to_AU); - /* ionization energy for carbon in atomic units */ - PMACC_CONST_VECTOR(float_X, 6, Carbon, - 11.2603 * UNITCONV_eV_to_AU, - 24.3845 * UNITCONV_eV_to_AU, - 47.88778 * UNITCONV_eV_to_AU, - 64.49351 * UNITCONV_eV_to_AU, - 392.0905 * UNITCONV_eV_to_AU, - 489.993177 * UNITCONV_eV_to_AU - ); + /* ionization energy for carbon in atomic units */ + PMACC_CONST_VECTOR( + float_X, + 6, + Carbon, + 11.2603 * UNITCONV_eV_to_AU, + 24.3845 * UNITCONV_eV_to_AU, + 47.88778 * UNITCONV_eV_to_AU, + 64.49351 * UNITCONV_eV_to_AU, + 392.0905 * UNITCONV_eV_to_AU, + 489.993177 * UNITCONV_eV_to_AU); - /* ionization energy for nitrogen in atomic units */ - PMACC_CONST_VECTOR(float_X, 7, Nitrogen, - 14.53413 * UNITCONV_eV_to_AU, - 29.60125 * UNITCONV_eV_to_AU, - 47.4453 * UNITCONV_eV_to_AU, - 77.4735 * UNITCONV_eV_to_AU, - 97.89013 * UNITCONV_eV_to_AU, - 552.06731 * UNITCONV_eV_to_AU, - 667.04609 * UNITCONV_eV_to_AU - ); + /* ionization energy for nitrogen in atomic units */ + PMACC_CONST_VECTOR( + float_X, + 7, + Nitrogen, + 14.53413 * UNITCONV_eV_to_AU, + 29.60125 * UNITCONV_eV_to_AU, + 47.4453 * UNITCONV_eV_to_AU, + 77.4735 * UNITCONV_eV_to_AU, + 97.89013 * UNITCONV_eV_to_AU, + 552.06731 * UNITCONV_eV_to_AU, + 667.04609 * UNITCONV_eV_to_AU); - /* ionization energy for oxygen in atomic units */ - PMACC_CONST_VECTOR(float_X, 8, Oxygen, - 13.61805 * UNITCONV_eV_to_AU, - 35.12112 * UNITCONV_eV_to_AU, - 54.93554 * UNITCONV_eV_to_AU, - 77.41350 * UNITCONV_eV_to_AU, - 113.8989 * UNITCONV_eV_to_AU, - 138.1189 * UNITCONV_eV_to_AU, - 739.3268 * UNITCONV_eV_to_AU, - 871.4098 * UNITCONV_eV_to_AU - ); + /* ionization energy for oxygen in atomic units */ + PMACC_CONST_VECTOR( + float_X, + 8, + Oxygen, + 13.61805 * UNITCONV_eV_to_AU, + 35.12112 * UNITCONV_eV_to_AU, + 54.93554 * UNITCONV_eV_to_AU, + 77.41350 * UNITCONV_eV_to_AU, + 113.8989 * UNITCONV_eV_to_AU, + 138.1189 * UNITCONV_eV_to_AU, + 739.3268 * UNITCONV_eV_to_AU, + 871.4098 * UNITCONV_eV_to_AU); - /* ionization energy for aluminium in atomic units */ - PMACC_CONST_VECTOR(float_X, 13, Aluminium, - 5.98577 * UNITCONV_eV_to_AU, - 18.8285 * UNITCONV_eV_to_AU, - 28.4476 * UNITCONV_eV_to_AU, - 119.992 * UNITCONV_eV_to_AU, - 153.825 * UNITCONV_eV_to_AU, - 190.495 * UNITCONV_eV_to_AU, - 241.769 * UNITCONV_eV_to_AU, - 284.647 * UNITCONV_eV_to_AU, - 330.214 * UNITCONV_eV_to_AU, - 398.656 * UNITCONV_eV_to_AU, - 442.006 * UNITCONV_eV_to_AU, - 2085.97 * UNITCONV_eV_to_AU, - 2304.14 * UNITCONV_eV_to_AU - ); + /* ionization energy for aluminium in atomic units */ + PMACC_CONST_VECTOR( + float_X, + 13, + Aluminium, + 5.98577 * UNITCONV_eV_to_AU, + 18.8285 * UNITCONV_eV_to_AU, + 28.4476 * UNITCONV_eV_to_AU, + 119.992 * UNITCONV_eV_to_AU, + 153.825 * UNITCONV_eV_to_AU, + 190.495 * UNITCONV_eV_to_AU, + 241.769 * UNITCONV_eV_to_AU, + 284.647 * UNITCONV_eV_to_AU, + 330.214 * UNITCONV_eV_to_AU, + 398.656 * UNITCONV_eV_to_AU, + 442.006 * UNITCONV_eV_to_AU, + 2085.97 * UNITCONV_eV_to_AU, + 2304.14 * UNITCONV_eV_to_AU); - /* ionization energy for silicon in atomic units */ - PMACC_CONST_VECTOR(float_X, 14, Silicon, - 8.151683 * UNITCONV_eV_to_AU, - 16.345845 * UNITCONV_eV_to_AU, - 33.493 * UNITCONV_eV_to_AU, - 45.14179 * UNITCONV_eV_to_AU, - 166.767 * UNITCONV_eV_to_AU, - 205.267 * UNITCONV_eV_to_AU, - 246.32 * UNITCONV_eV_to_AU, - 303.66 * UNITCONV_eV_to_AU, - 351.1 * UNITCONV_eV_to_AU, - 401.38 * UNITCONV_eV_to_AU, - 476.18 * UNITCONV_eV_to_AU, - 523.415 * UNITCONV_eV_to_AU, - 2437.65804 * UNITCONV_eV_to_AU, - 2673.1774 * UNITCONV_eV_to_AU - ); + /* ionization energy for silicon in atomic units */ + PMACC_CONST_VECTOR( + float_X, + 14, + Silicon, + 8.151683 * UNITCONV_eV_to_AU, + 16.345845 * UNITCONV_eV_to_AU, + 33.493 * UNITCONV_eV_to_AU, + 45.14179 * UNITCONV_eV_to_AU, + 166.767 * UNITCONV_eV_to_AU, + 205.267 * UNITCONV_eV_to_AU, + 246.32 * UNITCONV_eV_to_AU, + 303.66 * UNITCONV_eV_to_AU, + 351.1 * UNITCONV_eV_to_AU, + 401.38 * UNITCONV_eV_to_AU, + 476.18 * UNITCONV_eV_to_AU, + 523.415 * UNITCONV_eV_to_AU, + 2437.65804 * UNITCONV_eV_to_AU, + 2673.1774 * UNITCONV_eV_to_AU); - /* ionization energy for copper in atomic units */ - PMACC_CONST_VECTOR(float_X, 29, Copper, - 7.72638 * UNITCONV_eV_to_AU, - 20.2924 * UNITCONV_eV_to_AU, - 36.8411 * UNITCONV_eV_to_AU, - 57.385 * UNITCONV_eV_to_AU, - 79.87 * UNITCONV_eV_to_AU, - 103.010 * UNITCONV_eV_to_AU, - 139.012 * UNITCONV_eV_to_AU, - 166.021 * UNITCONV_eV_to_AU, - 198.022 * UNITCONV_eV_to_AU, - 232.25 * UNITCONV_eV_to_AU, - 265.332 * UNITCONV_eV_to_AU, - 367.09 * UNITCONV_eV_to_AU, - 401.03 * UNITCONV_eV_to_AU, - 436.06 * UNITCONV_eV_to_AU, - 483.19 * UNITCONV_eV_to_AU, - 518.712 * UNITCONV_eV_to_AU, - 552.821 * UNITCONV_eV_to_AU, - 632.56 * UNITCONV_eV_to_AU, - 670.608 * UNITCONV_eV_to_AU, - 1690.59 * UNITCONV_eV_to_AU, - 1800.3 * UNITCONV_eV_to_AU, - 1918.4 * UNITCONV_eV_to_AU, - 2044.6 * UNITCONV_eV_to_AU, - 2179.4 * UNITCONV_eV_to_AU, - 2307.32 * UNITCONV_eV_to_AU, - 2479.12 * UNITCONV_eV_to_AU, - 2586.95 * UNITCONV_eV_to_AU, - 11062.4 * UNITCONV_eV_to_AU, - 11567.6 * UNITCONV_eV_to_AU - ); + /* ionization energy for copper in atomic units */ + PMACC_CONST_VECTOR( + float_X, + 29, + Copper, + 7.72638 * UNITCONV_eV_to_AU, + 20.2924 * UNITCONV_eV_to_AU, + 36.8411 * UNITCONV_eV_to_AU, + 57.385 * UNITCONV_eV_to_AU, + 79.87 * UNITCONV_eV_to_AU, + 103.010 * UNITCONV_eV_to_AU, + 139.012 * UNITCONV_eV_to_AU, + 166.021 * UNITCONV_eV_to_AU, + 198.022 * UNITCONV_eV_to_AU, + 232.25 * UNITCONV_eV_to_AU, + 265.332 * UNITCONV_eV_to_AU, + 367.09 * UNITCONV_eV_to_AU, + 401.03 * UNITCONV_eV_to_AU, + 436.06 * UNITCONV_eV_to_AU, + 483.19 * UNITCONV_eV_to_AU, + 518.712 * UNITCONV_eV_to_AU, + 552.821 * UNITCONV_eV_to_AU, + 632.56 * UNITCONV_eV_to_AU, + 670.608 * UNITCONV_eV_to_AU, + 1690.59 * UNITCONV_eV_to_AU, + 1800.3 * UNITCONV_eV_to_AU, + 1918.4 * UNITCONV_eV_to_AU, + 2044.6 * UNITCONV_eV_to_AU, + 2179.4 * UNITCONV_eV_to_AU, + 2307.32 * UNITCONV_eV_to_AU, + 2479.12 * UNITCONV_eV_to_AU, + 2586.95 * UNITCONV_eV_to_AU, + 11062.4 * UNITCONV_eV_to_AU, + 11567.6 * UNITCONV_eV_to_AU); - /* ionization energy for gold in atomic units */ - PMACC_CONST_VECTOR(float_X, 79, Gold, - 9.2256 * UNITCONV_eV_to_AU, - 20.203 * UNITCONV_eV_to_AU, - 30.016 * UNITCONV_eV_to_AU, - 45.017 * UNITCONV_eV_to_AU, - 60.019 * UNITCONV_eV_to_AU, - 74.020 * UNITCONV_eV_to_AU, - 94.020 * UNITCONV_eV_to_AU, - 112.02 * UNITCONV_eV_to_AU, - 130.12 * UNITCONV_eV_to_AU, - 149.02 * UNITCONV_eV_to_AU, - 168.21 * UNITCONV_eV_to_AU, - 248.01 * UNITCONV_eV_to_AU, - 275.14 * UNITCONV_eV_to_AU, - 299.15 * UNITCONV_eV_to_AU, - 324.16 * UNITCONV_eV_to_AU, - 365.19 * UNITCONV_eV_to_AU, - 392.20 * UNITCONV_eV_to_AU, - 433.21 * UNITCONV_eV_to_AU, - 487.25 * UNITCONV_eV_to_AU, - 517.30 * UNITCONV_eV_to_AU, - 546.30 * UNITCONV_eV_to_AU, - 600.30 * UNITCONV_eV_to_AU, - 650.40 * UNITCONV_eV_to_AU, - 710.40 * UNITCONV_eV_to_AU, - 760.40 * UNITCONV_eV_to_AU, - 820.40 * UNITCONV_eV_to_AU, - 870.40 * UNITCONV_eV_to_AU, - 930.50 * UNITCONV_eV_to_AU, - 990.50 * UNITCONV_eV_to_AU, - 1040.5 * UNITCONV_eV_to_AU, - 1100.5 * UNITCONV_eV_to_AU, - 1150.6 * UNITCONV_eV_to_AU, - 1210.6 * UNITCONV_eV_to_AU, - 1475.5 * UNITCONV_eV_to_AU, - 1527.5 * UNITCONV_eV_to_AU, - 1584.5 * UNITCONV_eV_to_AU, - 1644.5 * UNITCONV_eV_to_AU, - 1702.4 * UNITCONV_eV_to_AU, - 1758.4 * UNITCONV_eV_to_AU, - 1845.4 * UNITCONV_eV_to_AU, - 1904.4 * UNITCONV_eV_to_AU, - 1967.4 * UNITCONV_eV_to_AU, - 2026.4 * UNITCONV_eV_to_AU, - 2261.4 * UNITCONV_eV_to_AU, - 2320.4 * UNITCONV_eV_to_AU, - 2383.4 * UNITCONV_eV_to_AU, - 2443.4 * UNITCONV_eV_to_AU, - 2640.4 * UNITCONV_eV_to_AU, - 2708.4 * UNITCONV_eV_to_AU, - 2870.4 * UNITCONV_eV_to_AU, - 2941.0 * UNITCONV_eV_to_AU, - 4888.4 * UNITCONV_eV_to_AU, - 5013.4 * UNITCONV_eV_to_AU, - 5156.5 * UNITCONV_eV_to_AU, - 5307.5 * UNITCONV_eV_to_AU, - 5452.5 * UNITCONV_eV_to_AU, - 5594.5 * UNITCONV_eV_to_AU, - 5846.6 * UNITCONV_eV_to_AU, - 5994.6 * UNITCONV_eV_to_AU, - 6156.7 * UNITCONV_eV_to_AU, - 6305.1 * UNITCONV_eV_to_AU, - 6724.1 * UNITCONV_eV_to_AU, - 6854.1 * UNITCONV_eV_to_AU, - 6997.2 * UNITCONV_eV_to_AU, - 7130.2 * UNITCONV_eV_to_AU, - 7756.3 * UNITCONV_eV_to_AU, - 7910.4 * UNITCONV_eV_to_AU, - 8210.4 * UNITCONV_eV_to_AU, - 8360.5 * UNITCONV_eV_to_AU, - 18040. * UNITCONV_eV_to_AU, - 18401. * UNITCONV_eV_to_AU, - 18791. * UNITCONV_eV_to_AU, - 19151. * UNITCONV_eV_to_AU, - 21471. * UNITCONV_eV_to_AU, - 21921. * UNITCONV_eV_to_AU, - 22500. * UNITCONV_eV_to_AU, - 22868. * UNITCONV_eV_to_AU, - 91516. * UNITCONV_eV_to_AU, - 93254. * UNITCONV_eV_to_AU - ); + /* ionization energy for gold in atomic units */ + PMACC_CONST_VECTOR( + float_X, + 79, + Gold, + 9.2256 * UNITCONV_eV_to_AU, + 20.203 * UNITCONV_eV_to_AU, + 30.016 * UNITCONV_eV_to_AU, + 45.017 * UNITCONV_eV_to_AU, + 60.019 * UNITCONV_eV_to_AU, + 74.020 * UNITCONV_eV_to_AU, + 94.020 * UNITCONV_eV_to_AU, + 112.02 * UNITCONV_eV_to_AU, + 130.12 * UNITCONV_eV_to_AU, + 149.02 * UNITCONV_eV_to_AU, + 168.21 * UNITCONV_eV_to_AU, + 248.01 * UNITCONV_eV_to_AU, + 275.14 * UNITCONV_eV_to_AU, + 299.15 * UNITCONV_eV_to_AU, + 324.16 * UNITCONV_eV_to_AU, + 365.19 * UNITCONV_eV_to_AU, + 392.20 * UNITCONV_eV_to_AU, + 433.21 * UNITCONV_eV_to_AU, + 487.25 * UNITCONV_eV_to_AU, + 517.30 * UNITCONV_eV_to_AU, + 546.30 * UNITCONV_eV_to_AU, + 600.30 * UNITCONV_eV_to_AU, + 650.40 * UNITCONV_eV_to_AU, + 710.40 * UNITCONV_eV_to_AU, + 760.40 * UNITCONV_eV_to_AU, + 820.40 * UNITCONV_eV_to_AU, + 870.40 * UNITCONV_eV_to_AU, + 930.50 * UNITCONV_eV_to_AU, + 990.50 * UNITCONV_eV_to_AU, + 1040.5 * UNITCONV_eV_to_AU, + 1100.5 * UNITCONV_eV_to_AU, + 1150.6 * UNITCONV_eV_to_AU, + 1210.6 * UNITCONV_eV_to_AU, + 1475.5 * UNITCONV_eV_to_AU, + 1527.5 * UNITCONV_eV_to_AU, + 1584.5 * UNITCONV_eV_to_AU, + 1644.5 * UNITCONV_eV_to_AU, + 1702.4 * UNITCONV_eV_to_AU, + 1758.4 * UNITCONV_eV_to_AU, + 1845.4 * UNITCONV_eV_to_AU, + 1904.4 * UNITCONV_eV_to_AU, + 1967.4 * UNITCONV_eV_to_AU, + 2026.4 * UNITCONV_eV_to_AU, + 2261.4 * UNITCONV_eV_to_AU, + 2320.4 * UNITCONV_eV_to_AU, + 2383.4 * UNITCONV_eV_to_AU, + 2443.4 * UNITCONV_eV_to_AU, + 2640.4 * UNITCONV_eV_to_AU, + 2708.4 * UNITCONV_eV_to_AU, + 2870.4 * UNITCONV_eV_to_AU, + 2941.0 * UNITCONV_eV_to_AU, + 4888.4 * UNITCONV_eV_to_AU, + 5013.4 * UNITCONV_eV_to_AU, + 5156.5 * UNITCONV_eV_to_AU, + 5307.5 * UNITCONV_eV_to_AU, + 5452.5 * UNITCONV_eV_to_AU, + 5594.5 * UNITCONV_eV_to_AU, + 5846.6 * UNITCONV_eV_to_AU, + 5994.6 * UNITCONV_eV_to_AU, + 6156.7 * UNITCONV_eV_to_AU, + 6305.1 * UNITCONV_eV_to_AU, + 6724.1 * UNITCONV_eV_to_AU, + 6854.1 * UNITCONV_eV_to_AU, + 6997.2 * UNITCONV_eV_to_AU, + 7130.2 * UNITCONV_eV_to_AU, + 7756.3 * UNITCONV_eV_to_AU, + 7910.4 * UNITCONV_eV_to_AU, + 8210.4 * UNITCONV_eV_to_AU, + 8360.5 * UNITCONV_eV_to_AU, + 18040. * UNITCONV_eV_to_AU, + 18401. * UNITCONV_eV_to_AU, + 18791. * UNITCONV_eV_to_AU, + 19151. * UNITCONV_eV_to_AU, + 21471. * UNITCONV_eV_to_AU, + 21921. * UNITCONV_eV_to_AU, + 22500. * UNITCONV_eV_to_AU, + 22868. * UNITCONV_eV_to_AU, + 91516. * UNITCONV_eV_to_AU, + 93254. * UNITCONV_eV_to_AU); -} // namespace AU -} // namespace energies -} // namespace ionization + } // namespace AU + } // namespace energies + } // namespace ionization } // namespace picongpu diff --git a/include/picongpu/param/ionizer.param b/include/picongpu/param/ionizer.param index 7ecc3189a6..900854127a 100644 --- a/include/picongpu/param/ionizer.param +++ b/include/picongpu/param/ionizer.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Marco Garten, Axel Huebl +/* Copyright 2014-2021 Marco Garten, Axel Huebl * * This file is part of PIConGPU. * @@ -23,7 +23,6 @@ * of the periodic table. The elements here should have a matching list of * ionization energies in @see ionizationEnergies.param. Moreover this file * contains a description of how to configure an ionization model for a species. - * Currently each species can only be assigned exactly one ionization model. * * Furthermore there are parameters for specific ionization models to be found * here. That includes lists of screened nuclear charges as seen by bound @@ -40,474 +39,495 @@ namespace picongpu { -/** Ionization Model Configuration - * - * - None : no particle is ionized - * - BSI : simple barrier suppression ionization - * - BSIEffectiveZ : BSI taking electron shielding into account via an effective - * atomic number Z_eff - * - ADKLinPol : Ammosov-Delone-Krainov tunneling ionization (H-like) - * -> linearly polarized lasers - * - ADKCircPol : Ammosov-Delone-Krainov tunneling ionization (H-like) - * -> circularly polarized lasers - * - Keldysh : Keldysh ionization model - * - ThomasFermi : statistical impact ionization based on Thomas-Fermi - * atomic model - * Attention: requires 2 FieldTmp slots @see memory.param - * - * Research and development: - * - BSIStarkShifted : BSI for hydrogen-like atoms and ions considering the - * Stark upshift of ionization potentials - * - * Usage: Add flags to the list of particle flags that has the following structure - * - * ionizers< MakeSeq_t< particles::ionization::IonizationModel< Species2BCreated > > >, - * atomicNumbers< ionization::atomicNumbers::Element_t >, - * effectiveNuclearCharge< ionization::effectiveNuclearCharge::Element_t >, - * ionizationEnergies< ionization::energies::AU::Element_t > - */ -namespace ionization -{ -/*! Specify (chemical) element - * - * Proton and neutron numbers define the chemical element that the ion species - * is based on. This value can be non-integer for physical models taking - * charge shielding effects into account. - * @see http://en.wikipedia.org/wiki/Effective_nuclear_charge - * - * It is wrapped into a struct because of C++ restricting floats from being - * template arguments. - * - * Do not forget to set the correct mass and charge via - * `massRatio<>` and `chargeRatio<>`! - */ -namespace atomicNumbers -{ - /** H-1 99.98% NA */ - struct Hydrogen_t - { - static constexpr float_X numberOfProtons = 1.0; - static constexpr float_X numberOfNeutrons = 0.0; - }; - - /** H-2 0.02% NA */ - struct Deuterium_t - { - static constexpr float_X numberOfProtons = 1.0; - static constexpr float_X numberOfNeutrons = 1.0; - }; - - /** He-4 ~100% NA */ - struct Helium_t - { - static constexpr float_X numberOfProtons = 2.0; - static constexpr float_X numberOfNeutrons = 2.0; - }; - - /** C-12 98.9% NA */ - struct Carbon_t - { - static constexpr float_X numberOfProtons = 6.0; - static constexpr float_X numberOfNeutrons = 6.0; - }; - - /** N-14 99.6% NA */ - struct Nitrogen_t - { - static constexpr float_X numberOfProtons = 7.0; - static constexpr float_X numberOfNeutrons = 7.0; - }; - - /** O-16 99.76% NA */ - struct Oxygen_t - { - static constexpr float_X numberOfProtons = 8.0; - static constexpr float_X numberOfNeutrons = 8.0; - }; - - /** Al-27 ~100% NA */ - struct Aluminium_t - { - static constexpr float_X numberOfProtons = 13.0; - static constexpr float_X numberOfNeutrons = 14.0; - }; - - /** Si-28 ~92.23% NA */ - struct Silicon_t - { - static constexpr float_X numberOfProtons = 14.0; - static constexpr float_X numberOfNeutrons = 14.0; - }; - - /** Cu-63 69.15% NA */ - struct Copper_t - { - static constexpr float_X numberOfProtons = 29.0; - static constexpr float_X numberOfNeutrons = 34.0; - }; - - /** Au-197 ~100% NA */ - struct Gold_t - { - static constexpr float_X numberOfProtons = 79.0; - static constexpr float_X numberOfNeutrons = 118.0; - }; -} // namespace atomicNumbers - -/** Effective Nuclear Charge - * - * Due to the shielding effect of inner electron shells in an atom / ion - * which makes the core charge seem smaller to valence electrons - * new, effective, atomic core charge numbers can be defined to make the - * crude barrier suppression ionization (BSI) model less inaccurate. - * - * @see https://en.wikipedia.org/wiki/Effective_nuclear_charge - * or refer directly to the calculations by Slater or Clementi and Raimondi - * - * References: - * Clementi, E.; Raimondi, D. L. (1963) - * "Atomic Screening Constants from SCF Functions" - * J. Chem. Phys. 38 (11): 2686–2689. doi:10.1063/1.1733573 - * Clementi, E.; Raimondi, D. L.; Reinhardt, W. P. (1967) - * "Atomic Screening Constants from SCF Functions. II. Atoms with 37 to 86 Electrons" - * Journal of Chemical Physics. 47: 1300–1307. doi:10.1063/1.1712084 - * - * IMPORTANT NOTE: - * You have to insert the values in REVERSE order since the lowest shell - * corresponds to the last ionization process! - */ -namespace effectiveNuclearCharge -{ - /* For hydrogen Z_eff is obviously equal to Z */ - PMACC_CONST_VECTOR(float_X, 1, Hydrogen, - /* 1s^1 */ - 1. - ); - - /* Example: deuterium */ - PMACC_CONST_VECTOR(float_X, 1, Deuterium, - /* 1s^1 */ - 1. - ); - - /* Example: helium */ - PMACC_CONST_VECTOR(float_X, 2, Helium, - /* 1s^2 */ - 1.688, - 1.688 - ); - - /* Example: carbon */ - PMACC_CONST_VECTOR(float_X, 6, Carbon, - /* 2p^2 */ - 3.136, - 3.136, - /* 2s^2 */ - 3.217, - 3.217, - /* 1s^2 */ - 5.673, - 5.673 - ); - - /* Example: nitrogen */ - PMACC_CONST_VECTOR(float_X, 7, Nitrogen, - /* 2p^3 */ - 3.834, - 3.834, - 3.834, - /* 2s^2 */ - 3.874, - 3.874, - /* 1s^2 */ - 6.665, - 6.665 - ); - - /* Example: oxygen */ - PMACC_CONST_VECTOR(float_X, 8, Oxygen, - /* 2p^4 */ - 4.453, - 4.453, - 4.453, - 4.453, - /* 2s^2 */ - 4.492, - 4.492, - /* 1s^2 */ - 7.658, - 7.658 - ); - - /* Example: aluminium */ - PMACC_CONST_VECTOR(float_X, 13, Aluminium, - /* 3p^1 */ - 4.066, - /* 3s^2 */ - 4.117, - 4.117, - /* 2p^6 */ - 8.963, - 8.963, - 8.963, - 8.963, - 8.963, - 8.963, - /* 2s^2 */ - 8.214, - 8.214, - /* 1s^2 */ - 12.591, - 12.591 - ); - - /* Example: silicon */ - PMACC_CONST_VECTOR(float_X, 14, Silicon, - /* 3p^2 */ - 4.285, - 4.285, - /* 3s^2 */ - 4.903, - 4.903, - /* 2p^6 */ - 9.945, - 9.945, - 9.945, - 9.945, - 9.945, - 9.945, - /* 2s^2 */ - 9.020, - 9.020, - /* 1s^2 */ - 13.575, - 13.575 - ); - - - /* Example: copper - * Note: Copper is one of the few exceptions to the Madelung energy ordering - * rule! Other exceptions: Au, Ag, Pd, Cr, Mo - * predicted configuration: [Ar] 4s^2 3d^9 - * actual configuration: [Ar] 4s^1 3d^10 - */ - PMACC_CONST_VECTOR(float_X, 29, Copper, - /* 3d^10 */ - 13.201, - 13.201, - 13.201, - 13.201, - 13.201, - 13.201, - 13.201, - 13.201, - 13.201, - 13.201, - /* 4s^1 */ - 5.842, - /* 3p^6 */ - 14.731, - 14.731, - 14.731, - 14.731, - 14.731, - 14.731, - /* 3s^2 */ - 15.594, - 15.594, - /* 2p^6 */ - 25.097, - 25.097, - 25.097, - 25.097, - 25.097, - 25.097, - /* 2s^2 */ - 21.020, - 21.020, - /* 1s^2 */ - 28.339, - 28.339 - ); - - /* Example: gold - * Note: Gold is one of the few exceptions to the Madelung energy ordering - * rule! Other exceptions: Cu, Ag, Pd, Cr, Mo - * predicted configuration: [Xe] 6s^2 4f^14 5d^9 - * actual configuration: [Xe] 6s^1 4f^14 5d^10 - */ - PMACC_CONST_VECTOR(float_X, 79, Gold, - /* 5d^10 */ - 20.126, - 20.126, - 20.126, - 20.126, - 20.126, - 20.126, - 20.126, - 20.126, - 20.126, - 20.126, - /* 4f^14 */ - 40.650, - 40.650, - 40.650, - 40.650, - 40.650, - 40.650, - 40.650, - 40.650, - 40.650, - 40.650, - 40.650, - 40.650, - 40.650, - 40.650, - /* 6s^1 */ - 10.938, - /* 5p^6 */ - 25.170, - 25.170, - 25.170, - 25.170, - 25.170, - 25.170, - /* 4d^10 */ - 41.528, - 41.528, - 41.528, - 41.528, - 41.528, - 41.528, - 41.528, - 41.528, - 41.528, - 41.528, - /* 5s^2 */ - 27.327, - 27.327, - /* 4p^6 */ - 43.547, - 43.547, - 43.547, - 43.547, - 43.547, - 43.547, - /* 3d^10 */ - 65.508, - 65.508, - 65.508, - 65.508, - 65.508, - 65.508, - 65.508, - 65.508, - 65.508, - 65.508, - /* 4s^2 */ - 44.413, - 44.413, - /* 3p^6 */ - 56.703, - 56.703, - 56.703, - 56.703, - 56.703, - 56.703, - /* 3s^2 */ - 55.763, - 55.763, - /* 2p^6 */ - 74.513, - 74.513, - 74.513, - 74.513, - 74.513, - 74.513, - /* 2s^2 */ - 58.370, - 58.370, - /* 1s^2 */ - 77.476, - 77.476 - ); -} // namespace effectiveNuclearCharge -} // namespace ionization - -namespace particles -{ -namespace ionization -{ -namespace thomasFermi -{ - - /** Fitting parameters to average ionization degree Z* = 4/3*pi*R_0^3 * n(R_0) - * as an extension towards arbitrary atoms and temperatures - * - * See table IV of - * \url http://www.sciencedirect.com/science/article/pii/S0065219908601451 - * doi:10.1016/S0065-2199(08)60145-1 - */ - constexpr float_X TFAlpha = 14.3139; - constexpr float_X TFBeta = 0.6624; - - constexpr float_X TFA1 = 3.323e-3; - constexpr float_X TFA2 = 9.718e-1; - constexpr float_X TFA3 = 9.26148e-5; - constexpr float_X TFA4 = 3.10165; - - constexpr float_X TFB0 = -1.7630; - constexpr float_X TFB1 = 1.43175; - constexpr float_X TFB2 = 0.31546; - - constexpr float_X TFC1 = -0.366667; - constexpr float_X TFC2 = 0.983333; - - /** cutoff energy for electron "temperature" calculation - * - * In laser produced plasmas we can have different, well-separable groups - * of electrons. For the Thomas-Fermi ionization model we only want the - * thermalized "bulk" electrons. Including the high-energy "prompt" - * electrons is physically questionable since they do not have a large - * cross section for collisional ionization. - * - * unit: keV - */ - constexpr float_X CUTOFF_MAX_ENERGY_KEV = 50.0; - /** cutoff energy for electron "temperature" calculation in SI units*/ - constexpr float_X CUTOFF_MAX_ENERGY = CUTOFF_MAX_ENERGY_KEV * UNITCONV_keV_to_Joule; - - /** lower ion density cutoff + /** Ionization Model Configuration * - * The Thomas-Fermi model yields unphysical artifacts for low ion densities. - * Low ion densities imply lower collision frequency and thus less collisional ionization. - * The Thomas-Fermi model yields an increasing charge state for decreasing densities and electron temperatures of 10eV and above. - * This cutoff will be used to set the lower application threshold for charge state calculation. + * - None : no particle is ionized + * - BSI : simple barrier suppression ionization + * - BSIEffectiveZ : BSI taking electron shielding into account via an effective + * atomic number Z_eff + * - ADKLinPol : Ammosov-Delone-Krainov tunneling ionization (H-like) + * -> linearly polarized lasers + * - ADKCircPol : Ammosov-Delone-Krainov tunneling ionization (H-like) + * -> circularly polarized lasers + * - Keldysh : Keldysh ionization model + * - ThomasFermi : statistical impact ionization based on Thomas-Fermi + * atomic model + * Attention: requires 2 FieldTmp slots @see memory.param * - * @note This cutoff value should be set in accordance to FLYCHK calculations, - * for instance! It is not a universal value and requires some preliminary - * approximations! + * Research and development: + * - BSIStarkShifted : BSI for hydrogen-like atoms and ions considering the + * Stark upshift of ionization potentials * - * unit: 1 / m^3 + * Usage: Add flags to the list of particle flags that has the following structure * - * example: 1.7422e27 as a hydrogen ion number density equal to the corresponding critical electron number density for an 800nm laser - * - * The choice of the default is motivated by by the following: - * In laser-driven plasmas all dynamics in density regions below the - * critical electron density will be laser-dominated. Once ions of that density - * are ionized once the laser will not penetrate fully anymore and the as electrons are heated - * the dynamics will be collision-dominated. - */ - constexpr float_X CUTOFF_LOW_DENSITY = 1.7422e27; - - /** lower electron temperature cutoff - * - * The Thomas-Fermi model predicts initial ionization for many materials of - * solid density even when the electron temperature is 0. + * ionizers< MakeSeq_t< particles::ionization::IonizationModel< Species2BCreated > > >, + * atomicNumbers< ionization::atomicNumbers::Element_t >, + * effectiveNuclearCharge< ionization::effectiveNuclearCharge::Element_t >, + * ionizationEnergies< ionization::energies::AU::Element_t > */ - constexpr float_X CUTOFF_LOW_TEMPERATURE_EV = 1.0; - -} // namespace thomasFermi -} // namespace ionization -} // namespace particles + namespace ionization + { + /*! Specify (chemical) element + * + * Proton and neutron numbers define the chemical element that the ion species + * is based on. This value can be non-integer for physical models taking + * charge shielding effects into account. + * @see http://en.wikipedia.org/wiki/Effective_nuclear_charge + * + * It is wrapped into a struct because of C++ restricting floats from being + * template arguments. + * + * Do not forget to set the correct mass and charge via + * `massRatio<>` and `chargeRatio<>`! + */ + namespace atomicNumbers + { + /** H-1 99.98% NA */ + struct Hydrogen_t + { + static constexpr float_X numberOfProtons = 1.0; + static constexpr float_X numberOfNeutrons = 0.0; + }; + + /** H-2 0.02% NA */ + struct Deuterium_t + { + static constexpr float_X numberOfProtons = 1.0; + static constexpr float_X numberOfNeutrons = 1.0; + }; + + /** He-4 ~100% NA */ + struct Helium_t + { + static constexpr float_X numberOfProtons = 2.0; + static constexpr float_X numberOfNeutrons = 2.0; + }; + + /** C-12 98.9% NA */ + struct Carbon_t + { + static constexpr float_X numberOfProtons = 6.0; + static constexpr float_X numberOfNeutrons = 6.0; + }; + + /** N-14 99.6% NA */ + struct Nitrogen_t + { + static constexpr float_X numberOfProtons = 7.0; + static constexpr float_X numberOfNeutrons = 7.0; + }; + + /** O-16 99.76% NA */ + struct Oxygen_t + { + static constexpr float_X numberOfProtons = 8.0; + static constexpr float_X numberOfNeutrons = 8.0; + }; + + /** Al-27 ~100% NA */ + struct Aluminium_t + { + static constexpr float_X numberOfProtons = 13.0; + static constexpr float_X numberOfNeutrons = 14.0; + }; + + /** Si-28 ~92.23% NA */ + struct Silicon_t + { + static constexpr float_X numberOfProtons = 14.0; + static constexpr float_X numberOfNeutrons = 14.0; + }; + + /** Cu-63 69.15% NA */ + struct Copper_t + { + static constexpr float_X numberOfProtons = 29.0; + static constexpr float_X numberOfNeutrons = 34.0; + }; + + /** Au-197 ~100% NA */ + struct Gold_t + { + static constexpr float_X numberOfProtons = 79.0; + static constexpr float_X numberOfNeutrons = 118.0; + }; + } // namespace atomicNumbers + + /** Effective Nuclear Charge + * + * Due to the shielding effect of inner electron shells in an atom / ion + * which makes the core charge seem smaller to valence electrons + * new, effective, atomic core charge numbers can be defined to make the + * crude barrier suppression ionization (BSI) model less inaccurate. + * + * @see https://en.wikipedia.org/wiki/Effective_nuclear_charge + * or refer directly to the calculations by Slater or Clementi and Raimondi + * + * References: + * Clementi, E.; Raimondi, D. L. (1963) + * "Atomic Screening Constants from SCF Functions" + * J. Chem. Phys. 38 (11): 2686–2689. doi:10.1063/1.1733573 + * Clementi, E.; Raimondi, D. L.; Reinhardt, W. P. (1967) + * "Atomic Screening Constants from SCF Functions. II. Atoms with 37 to 86 Electrons" + * Journal of Chemical Physics. 47: 1300–1307. doi:10.1063/1.1712084 + * + * IMPORTANT NOTE: + * You have to insert the values in REVERSE order since the lowest shell + * corresponds to the last ionization process! + */ + namespace effectiveNuclearCharge + { + /* For hydrogen Z_eff is obviously equal to Z */ + PMACC_CONST_VECTOR( + float_X, + 1, + Hydrogen, + /* 1s^1 */ + 1.); + + /* Example: deuterium */ + PMACC_CONST_VECTOR( + float_X, + 1, + Deuterium, + /* 1s^1 */ + 1.); + + /* Example: helium */ + PMACC_CONST_VECTOR( + float_X, + 2, + Helium, + /* 1s^2 */ + 1.688, + 1.688); + + /* Example: carbon */ + PMACC_CONST_VECTOR( + float_X, + 6, + Carbon, + /* 2p^2 */ + 3.136, + 3.136, + /* 2s^2 */ + 3.217, + 3.217, + /* 1s^2 */ + 5.673, + 5.673); + + /* Example: nitrogen */ + PMACC_CONST_VECTOR( + float_X, + 7, + Nitrogen, + /* 2p^3 */ + 3.834, + 3.834, + 3.834, + /* 2s^2 */ + 3.874, + 3.874, + /* 1s^2 */ + 6.665, + 6.665); + + /* Example: oxygen */ + PMACC_CONST_VECTOR( + float_X, + 8, + Oxygen, + /* 2p^4 */ + 4.453, + 4.453, + 4.453, + 4.453, + /* 2s^2 */ + 4.492, + 4.492, + /* 1s^2 */ + 7.658, + 7.658); + + /* Example: aluminium */ + PMACC_CONST_VECTOR( + float_X, + 13, + Aluminium, + /* 3p^1 */ + 4.066, + /* 3s^2 */ + 4.117, + 4.117, + /* 2p^6 */ + 8.963, + 8.963, + 8.963, + 8.963, + 8.963, + 8.963, + /* 2s^2 */ + 8.214, + 8.214, + /* 1s^2 */ + 12.591, + 12.591); + + /* Example: silicon */ + PMACC_CONST_VECTOR( + float_X, + 14, + Silicon, + /* 3p^2 */ + 4.285, + 4.285, + /* 3s^2 */ + 4.903, + 4.903, + /* 2p^6 */ + 9.945, + 9.945, + 9.945, + 9.945, + 9.945, + 9.945, + /* 2s^2 */ + 9.020, + 9.020, + /* 1s^2 */ + 13.575, + 13.575); + + + /* Example: copper + * Note: Copper is one of the few exceptions to the Madelung energy ordering + * rule! Other exceptions: Au, Ag, Pd, Cr, Mo + * predicted configuration: [Ar] 4s^2 3d^9 + * actual configuration: [Ar] 4s^1 3d^10 + */ + PMACC_CONST_VECTOR( + float_X, + 29, + Copper, + /* 3d^10 */ + 13.201, + 13.201, + 13.201, + 13.201, + 13.201, + 13.201, + 13.201, + 13.201, + 13.201, + 13.201, + /* 4s^1 */ + 5.842, + /* 3p^6 */ + 14.731, + 14.731, + 14.731, + 14.731, + 14.731, + 14.731, + /* 3s^2 */ + 15.594, + 15.594, + /* 2p^6 */ + 25.097, + 25.097, + 25.097, + 25.097, + 25.097, + 25.097, + /* 2s^2 */ + 21.020, + 21.020, + /* 1s^2 */ + 28.339, + 28.339); + + /* Example: gold + * Note: Gold is one of the few exceptions to the Madelung energy ordering + * rule! Other exceptions: Cu, Ag, Pd, Cr, Mo + * predicted configuration: [Xe] 6s^2 4f^14 5d^9 + * actual configuration: [Xe] 6s^1 4f^14 5d^10 + */ + PMACC_CONST_VECTOR( + float_X, + 79, + Gold, + /* 5d^10 */ + 20.126, + 20.126, + 20.126, + 20.126, + 20.126, + 20.126, + 20.126, + 20.126, + 20.126, + 20.126, + /* 4f^14 */ + 40.650, + 40.650, + 40.650, + 40.650, + 40.650, + 40.650, + 40.650, + 40.650, + 40.650, + 40.650, + 40.650, + 40.650, + 40.650, + 40.650, + /* 6s^1 */ + 10.938, + /* 5p^6 */ + 25.170, + 25.170, + 25.170, + 25.170, + 25.170, + 25.170, + /* 4d^10 */ + 41.528, + 41.528, + 41.528, + 41.528, + 41.528, + 41.528, + 41.528, + 41.528, + 41.528, + 41.528, + /* 5s^2 */ + 27.327, + 27.327, + /* 4p^6 */ + 43.547, + 43.547, + 43.547, + 43.547, + 43.547, + 43.547, + /* 3d^10 */ + 65.508, + 65.508, + 65.508, + 65.508, + 65.508, + 65.508, + 65.508, + 65.508, + 65.508, + 65.508, + /* 4s^2 */ + 44.413, + 44.413, + /* 3p^6 */ + 56.703, + 56.703, + 56.703, + 56.703, + 56.703, + 56.703, + /* 3s^2 */ + 55.763, + 55.763, + /* 2p^6 */ + 74.513, + 74.513, + 74.513, + 74.513, + 74.513, + 74.513, + /* 2s^2 */ + 58.370, + 58.370, + /* 1s^2 */ + 77.476, + 77.476); + } // namespace effectiveNuclearCharge + } // namespace ionization + + namespace particles + { + namespace ionization + { + namespace thomasFermi + { + /** Fitting parameters to average ionization degree Z* = 4/3*pi*R_0^3 * n(R_0) + * as an extension towards arbitrary atoms and temperatures + * + * See table IV of + * \url http://www.sciencedirect.com/science/article/pii/S0065219908601451 + * doi:10.1016/S0065-2199(08)60145-1 + */ + constexpr float_X TFAlpha = 14.3139; + constexpr float_X TFBeta = 0.6624; + + constexpr float_X TFA1 = 3.323e-3; + constexpr float_X TFA2 = 9.718e-1; + constexpr float_X TFA3 = 9.26148e-5; + constexpr float_X TFA4 = 3.10165; + + constexpr float_X TFB0 = -1.7630; + constexpr float_X TFB1 = 1.43175; + constexpr float_X TFB2 = 0.31546; + + constexpr float_X TFC1 = -0.366667; + constexpr float_X TFC2 = 0.983333; + + /** cutoff energy for electron "temperature" calculation + * + * In laser produced plasmas we can have different, well-separable groups + * of electrons. For the Thomas-Fermi ionization model we only want the + * thermalized "bulk" electrons. Including the high-energy "prompt" + * electrons is physically questionable since they do not have a large + * cross section for collisional ionization. + * + * unit: keV + */ + constexpr float_X CUTOFF_MAX_ENERGY_KEV = 50.0; + /** cutoff energy for electron "temperature" calculation in SI units*/ + constexpr float_X CUTOFF_MAX_ENERGY = CUTOFF_MAX_ENERGY_KEV * UNITCONV_keV_to_Joule; + + /** lower ion density cutoff + * + * The Thomas-Fermi model yields unphysical artifacts for low ion densities. + * Low ion densities imply lower collision frequency and thus less collisional ionization. + * The Thomas-Fermi model yields an increasing charge state for decreasing densities and electron + * temperatures of 10eV and above. This cutoff will be used to set the lower application threshold for + * charge state calculation. + * + * @note This cutoff value should be set in accordance to FLYCHK calculations, + * for instance! It is not a universal value and requires some preliminary + * approximations! + * + * unit: 1 / m^3 + * + * example: 1.7422e27 as a hydrogen ion number density equal to the corresponding critical electron + * number density for an 800nm laser + * + * The choice of the default is motivated by by the following: + * In laser-driven plasmas all dynamics in density regions below the + * critical electron density will be laser-dominated. Once ions of that density + * are ionized once the laser will not penetrate fully anymore and the as electrons are heated + * the dynamics will be collision-dominated. + */ + constexpr float_X CUTOFF_LOW_DENSITY = 1.7422e27; + + /** lower electron temperature cutoff + * + * The Thomas-Fermi model predicts initial ionization for many materials of + * solid density even when the electron temperature is 0. + */ + constexpr float_X CUTOFF_LOW_TEMPERATURE_EV = 1.0; + + } // namespace thomasFermi + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/param/isaac.param b/include/picongpu/param/isaac.param index 4d02df141d..8e86287915 100644 --- a/include/picongpu/param/isaac.param +++ b/include/picongpu/param/isaac.param @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Matthes +/* Copyright 2016-2021 Alexander Matthes * * This file is part of PIConGPU. * @@ -41,35 +41,24 @@ namespace picongpu { -namespace isaacP -{ - - /** Intermediate list of native particle species of PIConGPU which shall be - * visualized. */ - using Particle_Seq = VectorAllSpecies; + namespace isaacP + { + /** Intermediate list of native particle species of PIConGPU which shall be + * visualized. */ + using Particle_Seq = VectorAllSpecies; - /** Intermediate list of native fields of PIConGPU which shall be - * visualized. */ - using Native_Seq = MakeSeq_t< - FieldE, - FieldB, - FieldJ - >; + /** Intermediate list of native fields of PIConGPU which shall be + * visualized. */ + using Native_Seq = MakeSeq_t; - /** Intermediate list of particle species, from which density fields - * shall be created at runtime to visualize them. */ - using Density_Seq = deriveField::CreateEligible_t< - Particle_Seq, - deriveField::derivedAttributes::Density - >; + /** Intermediate list of particle species, from which density fields + * shall be created at runtime to visualize them. */ + using Density_Seq = deriveField::CreateEligible_t; - /** Compile time sequence of all fields which shall be visualized. Basically - * the join of Native_Seq and Density_Seq. */ - using Fields_Seq = MakeSeq_t< - Native_Seq, - Density_Seq - >; + /** Compile time sequence of all fields which shall be visualized. Basically + * the join of Native_Seq and Density_Seq. */ + using Fields_Seq = MakeSeq_t; -} // namespace isaacP + } // namespace isaacP } // namespace picongpu diff --git a/include/picongpu/param/laser.param b/include/picongpu/param/laser.param index a624b4032f..3df0a5b4f5 100644 --- a/include/picongpu/param/laser.param +++ b/include/picongpu/param/laser.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, * Alexander Debus * * This file is part of PIConGPU. @@ -49,541 +49,559 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace gaussianBeam -{ - //! Use only the 0th Laguerremode for a standard Gaussian - static constexpr uint32_t MODENUMBER = 0; - PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, 1.0); - // This is just an example for a more complicated set of Laguerre modes - //constexpr uint32_t MODENUMBER = 12; - //PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, -1.0, 0.0300519, 0.319461, -0.23783, 0.0954839, 0.0318653, -0.144547, 0.0249208, -0.111989, 0.0434385, -0.030038, -0.00896321, -0.0160788); - -} // namespace gaussianBeam - - struct GaussianBeamParam - { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - //static constexpr float_64 _A0 = 1.5; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * at the focus position of the laser - * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } - * [ 1.17741 ] - * - * unit: meter */ - static constexpr float_64 W0_SI = 5.0e-6 / 1.17741; - /** the distance to the laser focus in y-direction - * unit: meter */ - static constexpr float_64 FOCUS_POS_SI = 4.62e-5; - - /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH - * - * unit: none */ - static constexpr float_64 PULSE_INIT = 20.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - using LAGUERREMODES_t = gaussianBeam::LAGUERREMODES_t; - static constexpr uint32_t MODENUMBER = gaussianBeam::MODENUMBER; - - /** Available polarisation types - */ - enum PolarisationType - { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = CIRCULAR; - }; - - struct PulseFrontTiltParam - { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - //static constexpr float_64 _A0 = 1.5; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * at the focus position of the laser - * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } - * [ 1.17741 ] - * - * unit: meter */ - static constexpr float_64 W0_SI = 5.0e-6 / 1.17741; - - /** the distance to the laser focus in y-direction - * unit: meter */ - static constexpr float_64 FOCUS_POS_SI = 4.62e-5; - - /** the tilt angle between laser propagation in y-direction and laser axis in - * x-direction (0 degree == no tilt) - * unit: degree */ - static constexpr float_64 TILT_X_SI = 0.0; - - /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH - * - * unit: none */ - static constexpr float_64 PULSE_INIT = 20.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - //! Available polarisation types - enum PolarisationType - { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = CIRCULAR; - }; - - struct WavepacketParam - { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - //static constexpr float_64 _A0 = 1.5; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** The profile of the test Lasers 0 and 2 can be stretched by a - * constant area between the up and downramp - * unit: seconds */ - static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 7.0 * WAVE_LENGTH_SI / ::picongpu::SI::SPEED_OF_LIGHT_SI; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * at the focus position of the laser - * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } - * [ 1.17741 ] - * - * unit: meter */ - static constexpr float_64 W0_X_SI = 4.246e-6; - static constexpr float_64 W0_Z_SI = W0_X_SI; - - /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH - * - * unit: none */ - static constexpr float_64 PULSE_INIT = 20.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - /** Available polarisation types - */ - enum PolarisationType - { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; - - /** Based on a wavepacket with Gaussian spatial envelope - * - * and the following temporal shape: - * A Gaussian peak (optionally lengthened by a plateau) is preceded by - * two pieces of exponential preramps, defined by 3 (time, intensity)- - * -points. - * The first two points get connected by an exponential, the 2nd and - * 3rd point are connected by another exponential, which is then - * extrapolated to the peak. The Gaussian is added everywhere, but - * typically contributes significantly only near the peak. - * It is advisable to set the third point far enough from the plateau - * (approx 3*FWHM), then the contribution from the Gaussian is - * negligible there, and the intensity can be set as measured from the - * laser profile. - * Optionally a Gaussian prepulse can be added, given by the parameters - * of the relative intensity and time point. - * The time of the prepulse and the three preramp points are given in - * SI, the intensities are given as multiples of the peak intensity. - */ - struct ExpRampWithPrepulseParam - { - // Intensities of prepulse and exponential preramp - static constexpr float_X INT_RATIO_PREPULSE = 0.; - static constexpr float_X INT_RATIO_POINT_1 = 1.e-8; - static constexpr float_X INT_RATIO_POINT_2 = 1.e-4; - static constexpr float_X INT_RATIO_POINT_3 = 1.e-4; - - // time-positions of prepulse and preramps points - static constexpr float_64 TIME_PREPULSE_SI = -950.0e-15; - static constexpr float_64 TIME_PEAKPULSE_SI = 0.0e-15; - static constexpr float_64 TIME_POINT_1_SI = -1000.0e-15; - static constexpr float_64 TIME_POINT_2_SI = -300.0e-15; - static constexpr float_64 TIME_POINT_3_SI = -100.0e-15; - - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** UNITCONV */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - static constexpr float_64 _A0 = 20.; - - /** unit: Volt /meter */ - static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt /meter */ - //constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** The profile of the test Lasers 0 and 2 can be stretched by a - * constant area between the up and downramp - * unit: seconds */ - static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 0.0 * WAVE_LENGTH_SI / ::picongpu::SI::SPEED_OF_LIGHT_SI; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 3.0e-14 / 2.35482; // half of the time in which E falls to half its initial value (then I falls to half its value in 15fs, approx 6 wavelengths). Those are 4.8 wavelenghts. - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * WO_X_SI is this distance in x-direction - * W0_Z_SI is this distance in z-direction - * if both values are equal, the laser has a circular shape in x-z - * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } - * [ 1.17741 ] - * unit: meter */ - static constexpr float_64 W0_X_SI = 2.5 * WAVE_LENGTH_SI; - static constexpr float_64 W0_Z_SI = W0_X_SI; - - /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before plateau - * and half at the end of the plateau - * unit: none */ - static constexpr float_64 RAMP_INIT = 16.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - /** Available polarisation types - */ - enum PolarisationType - { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; - - /** Based on a wavepacket with Gaussian spatial envelope - * - * Wavepacket with a polynomial temporal intensity shape. - */ - struct PolynomParam - { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - //static constexpr float_64 _A0 = 1.5; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** The profile of the test Lasers 0 and 2 can be stretched by a - * constant area between the up and downramp - * unit: seconds */ - static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 13.34e-15; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * at the focus position of the laser - * unit: meter - */ - static constexpr float_64 W0_X_SI = 4.246e-6; // waist in x-direction - static constexpr float_64 W0_Z_SI = W0_X_SI; // waist in z-direction - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH - * - * unit: none */ - static constexpr float_64 PULSE_INIT = 20.0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - /** Available polarization types - */ - enum PolarisationType - { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; - - struct PlaneWaveParam + namespace fields { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - static constexpr float_64 _A0 = 1.5; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** The profile of the test Lasers 0 and 2 can be stretched by a - * constant area between the up and downramp - * unit: seconds */ - static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 13.34e-15; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before and after the plateau - * unit: none */ - static constexpr float_64 RAMP_INIT = 20.6146; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - /** Available polarization types - */ - enum PolarisationType + namespace laserProfiles { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; - - //! currently selected laser profile - using Selected = None<>; - -} // namespace laserProfiles -} // namespace fields + namespace gaussianBeam + { + //! Use only the 0th Laguerremode for a standard Gaussian + static constexpr uint32_t MODENUMBER = 0; + PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, 1.0); + // This is just an example for a more complicated set of Laguerre modes + // constexpr uint32_t MODENUMBER = 12; + // PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, -1.0, 0.0300519, 0.319461, -0.23783, + // 0.0954839, 0.0318653, -0.144547, 0.0249208, -0.111989, 0.0434385, -0.030038, -0.00896321, + // -0.0160788); + + } // namespace gaussianBeam + + struct GaussianBeamParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + // static constexpr float_64 _A0 = 1.5; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * at the focus position of the laser + * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } + * [ 1.17741 ] + * + * unit: meter */ + static constexpr float_64 W0_SI = 5.0e-6 / 1.17741; + /** the distance to the laser focus in y-direction + * unit: meter */ + static constexpr float_64 FOCUS_POS_SI = 4.62e-5; + + /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH + * + * unit: none */ + static constexpr float_64 PULSE_INIT = 20.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + using LAGUERREMODES_t = gaussianBeam::LAGUERREMODES_t; + static constexpr uint32_t MODENUMBER = gaussianBeam::MODENUMBER; + + /** Available polarisation types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = CIRCULAR; + }; + + struct PulseFrontTiltParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + // static constexpr float_64 _A0 = 1.5; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * at the focus position of the laser + * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } + * [ 1.17741 ] + * + * unit: meter */ + static constexpr float_64 W0_SI = 5.0e-6 / 1.17741; + + /** the distance to the laser focus in y-direction + * unit: meter */ + static constexpr float_64 FOCUS_POS_SI = 4.62e-5; + + /** the tilt angle between laser propagation in y-direction and laser axis in + * x-direction (0 degree == no tilt) + * unit: degree */ + static constexpr float_64 TILT_X_SI = 0.0; + + /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH + * + * unit: none */ + static constexpr float_64 PULSE_INIT = 20.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + //! Available polarisation types + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = CIRCULAR; + }; + + struct WavepacketParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + // static constexpr float_64 _A0 = 1.5; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** The profile of the test Lasers 0 and 2 can be stretched by a + * constant area between the up and downramp + * unit: seconds */ + static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI + = 7.0 * WAVE_LENGTH_SI / ::picongpu::SI::SPEED_OF_LIGHT_SI; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * at the focus position of the laser + * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } + * [ 1.17741 ] + * + * unit: meter */ + static constexpr float_64 W0_X_SI = 4.246e-6; + static constexpr float_64 W0_Z_SI = W0_X_SI; + + /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH + * + * unit: none */ + static constexpr float_64 PULSE_INIT = 20.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + /** Available polarisation types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + + /** Based on a wavepacket with Gaussian spatial envelope + * + * and the following temporal shape: + * A Gaussian peak (optionally lengthened by a plateau) is preceded by + * two pieces of exponential preramps, defined by 3 (time, intensity)- + * -points. + * The first two points get connected by an exponential, the 2nd and + * 3rd point are connected by another exponential, which is then + * extrapolated to the peak. The Gaussian is added everywhere, but + * typically contributes significantly only near the peak. + * It is advisable to set the third point far enough from the plateau + * (approx 3*FWHM), then the contribution from the Gaussian is + * negligible there, and the intensity can be set as measured from the + * laser profile. + * Optionally a Gaussian prepulse can be added, given by the parameters + * of the relative intensity and time point. + * The time of the prepulse and the three preramp points are given in + * SI, the intensities are given as multiples of the peak intensity. + */ + struct ExpRampWithPrepulseParam + { + // Intensities of prepulse and exponential preramp + static constexpr float_X INT_RATIO_PREPULSE = 0.; + static constexpr float_X INT_RATIO_POINT_1 = 1.e-8; + static constexpr float_X INT_RATIO_POINT_2 = 1.e-4; + static constexpr float_X INT_RATIO_POINT_3 = 1.e-4; + + // time-positions of prepulse and preramps points + static constexpr float_64 TIME_PREPULSE_SI = -950.0e-15; + static constexpr float_64 TIME_PEAKPULSE_SI = 0.0e-15; + static constexpr float_64 TIME_POINT_1_SI = -1000.0e-15; + static constexpr float_64 TIME_POINT_2_SI = -300.0e-15; + static constexpr float_64 TIME_POINT_3_SI = -100.0e-15; + + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** UNITCONV */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + static constexpr float_64 _A0 = 20.; + + /** unit: Volt /meter */ + static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt /meter */ + // constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** The profile of the test Lasers 0 and 2 can be stretched by a + * constant area between the up and downramp + * unit: seconds */ + static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI + = 0.0 * WAVE_LENGTH_SI / ::picongpu::SI::SPEED_OF_LIGHT_SI; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 3.0e-14 + / 2.35482; // half of the time in which E falls to half its initial value (then I falls to half its + // value in 15fs, approx 6 wavelengths). Those are 4.8 wavelenghts. + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * WO_X_SI is this distance in x-direction + * W0_Z_SI is this distance in z-direction + * if both values are equal, the laser has a circular shape in x-z + * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } + * [ 1.17741 ] + * unit: meter */ + static constexpr float_64 W0_X_SI = 2.5 * WAVE_LENGTH_SI; + static constexpr float_64 W0_Z_SI = W0_X_SI; + + /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before plateau + * and half at the end of the plateau + * unit: none */ + static constexpr float_64 RAMP_INIT = 16.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + /** Available polarisation types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + + /** Based on a wavepacket with Gaussian spatial envelope + * + * Wavepacket with a polynomial temporal intensity shape. + */ + struct PolynomParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + // static constexpr float_64 _A0 = 1.5; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** The profile of the test Lasers 0 and 2 can be stretched by a + * constant area between the up and downramp + * unit: seconds */ + static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 13.34e-15; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * at the focus position of the laser + * unit: meter + */ + static constexpr float_64 W0_X_SI = 4.246e-6; // waist in x-direction + static constexpr float_64 W0_Z_SI = W0_X_SI; // waist in z-direction + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH + * + * unit: none */ + static constexpr float_64 PULSE_INIT = 20.0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + /** Available polarization types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + + struct PlaneWaveParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + static constexpr float_64 _A0 = 1.5; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** The profile of the test Lasers 0 and 2 can be stretched by a + * constant area between the up and downramp + * unit: seconds */ + static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 13.34e-15; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 10.615e-15 / 4.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before and after + * the plateau unit: none */ + static constexpr float_64 RAMP_INIT = 20.6146; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + /** Available polarization types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + + //! currently selected laser profile + using Selected = None<>; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/param/mallocMC.param b/include/picongpu/param/mallocMC.param index 1268f2fa7e..ca0c466be8 100644 --- a/include/picongpu/param/mallocMC.param +++ b/include/picongpu/param/mallocMC.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Carlchristian Eckert * * This file is part of PIConGPU. @@ -34,25 +34,24 @@ namespace picongpu { - //! configure the CreationPolicy "Scatter" struct DeviceHeapConfig { //! 2MiB page can hold around 256 particle frames - using pagesize = boost::mpl::int_< 2 * 1024 * 1024 >; + static constexpr uint32_t pagesize = 2u * 1024u * 1024u; /** accessblocks, regionsize and wastefactor are not conclusively * investigated and might be performance sensitive for multiple * particle species with heavily varying attributes (frame sizes) */ - using accessblocks = boost::mpl::int_< 4 >; - using regionsize = boost::mpl::int_< 8 >; - using wastefactor = boost::mpl::int_< 2 >; + static constexpr uint32_t accessblocks = 4u; + static constexpr uint32_t regionsize = 8u; + static constexpr uint32_t wastefactor = 2u; /** resetfreedpages is used to minimize memory fragmentation with * varying frame sizes */ - using resetfreedpages = boost::mpl::bool_< true >; + static constexpr bool resetfreedpages = true; }; /** Define a new allocator @@ -61,11 +60,11 @@ namespace picongpu * algorithm. */ using DeviceHeap = mallocMC::Allocator< - mallocMC::CreationPolicies::Scatter< DeviceHeapConfig >, + cupla::Acc, + mallocMC::CreationPolicies::Scatter, mallocMC::DistributionPolicies::Noop, mallocMC::OOMPolicies::ReturnNull, - mallocMC::ReservePoolPolicies::SimpleCudaMalloc, - mallocMC::AlignmentPolicies::Shrink<> - >; + mallocMC::ReservePoolPolicies::AlpakaBuf, + mallocMC::AlignmentPolicies::Shrink<>>; } // namespace picongpu diff --git a/include/picongpu/param/memory.param b/include/picongpu/param/memory.param index b469775458..2e6025da2c 100644 --- a/include/picongpu/param/memory.param +++ b/include/picongpu/param/memory.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -30,16 +30,17 @@ #include #include +#include + namespace picongpu { - /* We have to hold back 350MiB for gpu-internal operations: * - random number generator * - reduces * - ... */ - constexpr size_t reservedGpuMemorySize = 350 *1024*1024; + constexpr size_t reservedGpuMemorySize = 350 * 1024 * 1024; /* short namespace*/ namespace mCT = pmacc::math::CT; @@ -47,13 +48,10 @@ namespace picongpu * * volume of a superCell must be <= 1024 */ - using SuperCellSize = typename mCT::shrinkTo< - mCT::Int< 8, 8, 4 >, - simDim - >::type; + using SuperCellSize = typename mCT::shrinkTo, simDim>::type; /** define mapper which is used for kernel call mappings */ - using MappingDesc = MappingDescription< simDim, SuperCellSize >; + using MappingDesc = MappingDescription; /** define the size of the core, border and guard area * @@ -69,10 +67,7 @@ namespace picongpu * * GuardSize is defined in units of SuperCellSize per dimension. */ - using GuardSize = typename mCT::shrinkTo< - mCT::Int< 1, 1, 1 >, - simDim - >::type; + using GuardSize = typename mCT::shrinkTo, simDim>::type; /** bytes reserved for species exchange buffer * @@ -89,6 +84,21 @@ namespace picongpu static constexpr uint32_t BYTES_EXCHANGE_Z = 1 * 1024 * 1024; // 1 MiB static constexpr uint32_t BYTES_EDGES = 32 * 1024; // 32 kiB static constexpr uint32_t BYTES_CORNER = 8 * 1024; // 8 kiB + + /** Reference local domain size + * + * The size of the local domain for which the exchange sizes `BYTES_*` are configured for. + * The required size of each exchange will be calculated at runtime based on the local domain size and the + * reference size. The exchange size will be scaled only up and not down. Zero means that there is no reference + * domain size, exchanges will not be scaled. + */ + using REF_LOCAL_DOM_SIZE = mCT::Int<0, 0, 0>; + /** Scaling rate per direction. + * + * 1.0 means it scales linear with the ratio between the local domain size at runtime and the reference local + * domain size. + */ + const std::array DIR_SCALING_FACTOR = {{0.0, 0.0, 0.0}}; }; /** number of scalar fields that are reserved as temporary fields */ diff --git a/include/picongpu/param/particle.param b/include/picongpu/param/particle.param index 1cec2d40e4..02153dfecb 100644 --- a/include/picongpu/param/particle.param +++ b/include/picongpu/param/particle.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, * Richard Pausch * * This file is part of PIConGPU. @@ -38,143 +38,128 @@ namespace picongpu { -namespace particles -{ - - /** a particle with a weighting below MIN_WEIGHTING will not - * be created / will be deleted - * - * unit: none */ - constexpr float_X MIN_WEIGHTING = 10.0; - - /** Number of maximum particles per cell during density profile evaluation. - * - * Determines the weighting of a macro particle and with it, the number of - * particles "sampling" dynamics in phase space. - */ - constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = 2u; - -namespace manipulators -{ - - /** Parameter for DriftParam - */ - CONST_VECTOR(float_X,3,DriftParam_direction,1.0,0.0,0.0); - /** Parameter for a particle drift assignment - */ - struct DriftParam - { - static constexpr float_64 gamma = 1.0; - const DriftParam_direction_t direction; - }; - /** definition of manipulator that assigns a drift in X */ - using AssignXDrift = unary::Drift< - DriftParam, - nvidia::functors::Assign - >; - - - /** Parameter for a temperature assignment - */ - struct TemperatureParam - { - /*Initial temperature - * unit: keV - */ - static constexpr float_64 temperature = 0.0; - }; - /* definition a temperature assignment manipulator */ - using AddTemperature = unary::Temperature< TemperatureParam >; - - /** Unary particle manipulator: double each weighting - */ - struct DoubleWeightingFunctor - { - template< typename T_Particle > - DINLINE void operator()( T_Particle& particle ) - { - particle[weighting_] *= 2.0_X; - } - }; - - /** definition of a free particle manipulator: double weighting */ - using DoubleWeighting = generic::Free< DoubleWeightingFunctor >; - - struct RandomEnabledRadiationFunctor + namespace particles { - template< typename T_Rng, typename T_Particle > - DINLINE void operator()( T_Rng& rng, T_Particle& particle ) - { - // enable radiation for 10% of the particles - particle[ radiationMask_ ] = rng() < 0.1_X; - } - }; - - /* definition of RandomEnableRadiation start */ - using RandomEnabledRadiation = generic::FreeRng< - RandomEnabledRadiationFunctor, - pmacc::random::distributions::Uniform< float_X > - >; - - /** changes the in-cell position of each particle of a species */ - using RandomPosition = unary::RandomPosition; - -} // namespace manipulators - -namespace startPosition -{ - - struct RandomParameter - { - /** Count of particles per cell at initial state + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted * * unit: none */ - static constexpr uint32_t numParticlesPerCell = TYPICAL_PARTICLES_PER_CELL; - }; - /** definition of random particle start */ - using Random = RandomImpl< RandomParameter >; + constexpr float_X MIN_WEIGHTING = 10.0; - struct QuietParam - { - /** Count of particles per cell per direction at initial state + /** Number of maximum particles per cell during density profile evaluation. * - * unit: none */ - using numParticlesPerDimension = mCT::shrinkTo< - mCT::Int< - 1, - TYPICAL_PARTICLES_PER_CELL, - 1 - >, - simDim - >::type; - }; - - /** definition of quiet particle start */ - using Quiet = QuietImpl< QuietParam >; - - /** sit directly in lower corner of the cell */ - CONST_VECTOR( - float_X, - 3, - InCellOffset, - /* each x, y, z in-cell position component in range [0.0, 1.0) */ - 0.0, - 0.0, - 0.0 - ); - struct OnePositionParameter - { - /** Count of particles per cell at initial state - * - * unit: none */ - static constexpr uint32_t numParticlesPerCell = TYPICAL_PARTICLES_PER_CELL; - - const InCellOffset_t inCellOffset; - }; - - /** definition of one specific position for particle start */ - using OnePosition = OnePositionImpl< OnePositionParameter >; + * Determines the weighting of a macro particle and with it, the number of + * particles "sampling" dynamics in phase space. + */ + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = 2u; -} // namespace startPosition -} // namespace particles + namespace manipulators + { + /** Parameter for DriftParam + */ + CONST_VECTOR(float_X, 3, DriftParam_direction, 1.0, 0.0, 0.0); + /** Parameter for a particle drift assignment + */ + struct DriftParam + { + static constexpr float_64 gamma = 1.0; + const DriftParam_direction_t direction; + }; + /** definition of manipulator that assigns a drift in X */ + using AssignXDrift = unary::Drift; + + + /** Parameter for a temperature assignment + */ + struct TemperatureParam + { + /*Initial temperature + * unit: keV + */ + static constexpr float_64 temperature = 0.0; + }; + /* definition a temperature assignment manipulator */ + using AddTemperature = unary::Temperature; + + /** Unary particle manipulator: double each weighting + */ + struct DoubleWeightingFunctor + { + template + DINLINE void operator()(T_Particle& particle) + { + particle[weighting_] *= 2.0_X; + } + }; + + /** definition of a free particle manipulator: double weighting */ + using DoubleWeighting = generic::Free; + + struct RandomEnabledRadiationFunctor + { + template + DINLINE void operator()(T_Rng& rng, T_Particle& particle) + { + // enable radiation for 10% of the particles + particle[radiationMask_] = rng() < 0.1_X; + } + }; + + /* definition of RandomEnableRadiation start */ + using RandomEnabledRadiation + = generic::FreeRng>; + + /** changes the in-cell position of each particle of a species */ + using RandomPosition = unary::RandomPosition; + + } // namespace manipulators + + namespace startPosition + { + struct RandomParameter + { + /** Count of particles per cell at initial state + * + * unit: none */ + static constexpr uint32_t numParticlesPerCell = TYPICAL_PARTICLES_PER_CELL; + }; + /** definition of random particle start */ + using Random = RandomImpl; + + struct QuietParam + { + /** Count of particles per cell per direction at initial state + * + * unit: none */ + using numParticlesPerDimension + = mCT::shrinkTo, simDim>::type; + }; + + /** definition of quiet particle start */ + using Quiet = QuietImpl; + + /** sit directly in lower corner of the cell */ + CONST_VECTOR( + float_X, + 3, + InCellOffset, + /* each x, y, z in-cell position component in range [0.0, 1.0) */ + 0.0, + 0.0, + 0.0); + struct OnePositionParameter + { + /** Count of particles per cell at initial state + * + * unit: none */ + static constexpr uint32_t numParticlesPerCell = TYPICAL_PARTICLES_PER_CELL; + + const InCellOffset_t inCellOffset; + }; + + /** definition of one specific position for particle start */ + using OnePosition = OnePositionImpl; + + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/param/particleCalorimeter.param b/include/picongpu/param/particleCalorimeter.param index 23782a0311..95de4d17cc 100644 --- a/include/picongpu/param/particleCalorimeter.param +++ b/include/picongpu/param/particleCalorimeter.param @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -21,31 +21,25 @@ namespace picongpu { -namespace particleCalorimeter -{ - -/** Map yaw and pitch into [0,1] respectively. These ranges correspond to - * the normalized histogram range of the calorimeter (0: first bin, 1: last bin). - * Out-of-range values are mapped to the first or the last bin. - * - * Useful for fine tuning the spatial calorimeter resolution. - * - * \param yaw -maxYaw...maxYaw - * \param pitch -maxPitch...maxPitch - * \param maxYaw maximum value of angle yaw - * \param maxPitch maximum value of angle pitch - * \return Two values within [-1,1] - */ -HDINLINE float2_X mapYawPitchToNormedRange(const float_X yaw, - const float_X pitch, - const float_X maxYaw, - const float_X maxPitch) -{ - return float2_X( - 0.5_X + 0.5_X * yaw / maxYaw, - 0.5_X + 0.5_X * pitch / maxPitch - ); -} + namespace particleCalorimeter + { + /** Map yaw and pitch into [0,1] respectively. These ranges correspond to + * the normalized histogram range of the calorimeter (0: first bin, 1: last bin). + * Out-of-range values are mapped to the first or the last bin. + * + * Useful for fine tuning the spatial calorimeter resolution. + * + * \param yaw -maxYaw...maxYaw + * \param pitch -maxPitch...maxPitch + * \param maxYaw maximum value of angle yaw + * \param maxPitch maximum value of angle pitch + * \return Two values within [-1,1] + */ + HDINLINE float2_X + mapYawPitchToNormedRange(const float_X yaw, const float_X pitch, const float_X maxYaw, const float_X maxPitch) + { + return float2_X(0.5_X + 0.5_X * yaw / maxYaw, 0.5_X + 0.5_X * pitch / maxPitch); + } -} // namespace particleCalorimeter + } // namespace particleCalorimeter } // namespace picongpu diff --git a/include/picongpu/param/particleFilters.param b/include/picongpu/param/particleFilters.param index 6c5e1a1c13..b66188799b 100644 --- a/include/picongpu/param/particleFilters.param +++ b/include/picongpu/param/particleFilters.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -41,27 +41,25 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ - /** Plugins: collection of all available particle filters - * - * Create a list of all filters here that you want to use in plugins. - * - * Note: filter All is defined in picongpu/particles/filter/filter.def - */ - using AllParticleFilters = MakeSeq_t< - All - >; + namespace particles + { + namespace filter + { + /** Plugins: collection of all available particle filters + * + * Create a list of all filters here that you want to use in plugins. + * + * Note: filter All is defined in picongpu/particles/filter/filter.def + */ + using AllParticleFilters = MakeSeq_t; -} // namespace filter + } // namespace filter -namespace traits -{ - /* if needed for generic "free" filters, - * place `SpeciesEligibleForSolver` traits for filters here - */ -} // namespace traits -} // namespace particles + namespace traits + { + /* if needed for generic "free" filters, + * place `SpeciesEligibleForSolver` traits for filters here + */ + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/param/particleMerger.param b/include/picongpu/param/particleMerger.param index 686cc510a0..b827dce21c 100644 --- a/include/picongpu/param/particleMerger.param +++ b/include/picongpu/param/particleMerger.param @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Heiko Burau +/* Copyright 2017-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -27,16 +27,15 @@ namespace picongpu { -namespace plugins -{ -namespace particleMerging -{ - - /** maximum number of active Voronoi cells per supercell. If the number - * of active Voronoi cells reaches this limit merging events are dropped. - */ - constexpr size_t MAX_VORONOI_CELLS = 128; + namespace plugins + { + namespace particleMerging + { + /** maximum number of active Voronoi cells per supercell. If the number + * of active Voronoi cells reaches this limit merging events are dropped. + */ + constexpr size_t MAX_VORONOI_CELLS = 128; -} // namespace particleMerging -} // namespace plugins + } // namespace particleMerging + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/param/physicalConstants.param b/include/picongpu/param/physicalConstants.param index 85ac5ed3a7..7c6968bc40 100644 --- a/include/picongpu/param/physicalConstants.param +++ b/include/picongpu/param/physicalConstants.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Marco Garten * * This file is part of PIConGPU. @@ -19,7 +19,6 @@ */ - #pragma once namespace picongpu @@ -34,8 +33,7 @@ namespace picongpu /** unit: N / A^2 */ constexpr float_64 MUE0_SI = PI * 4.e-7; /** unit: C / (V m) */ - constexpr float_64 EPS0_SI = 1.0 / MUE0_SI / SPEED_OF_LIGHT_SI - / SPEED_OF_LIGHT_SI; + constexpr float_64 EPS0_SI = 1.0 / MUE0_SI / SPEED_OF_LIGHT_SI / SPEED_OF_LIGHT_SI; /** impedance of free space * unit: ohm */ @@ -75,11 +73,15 @@ namespace picongpu * doi:10.1088/0026-1394/52/2/360 */ constexpr float_64 N_AVOGADRO = 6.02214076e23; - } + + //! Classical electron radius in SI units + constexpr float_64 ELECTRON_RADIUS_SI = ELECTRON_CHARGE_SI * ELECTRON_CHARGE_SI + / (4.0 * PI * EPS0_SI * ELECTRON_MASS_SI * SPEED_OF_LIGHT_SI * SPEED_OF_LIGHT_SI); + } // namespace SI /** Unit of speed */ constexpr float_64 UNIT_SPEED = SI::SPEED_OF_LIGHT_SI; - constexpr float_X SPEED_OF_LIGHT = float_X( SI::SPEED_OF_LIGHT_SI / UNIT_SPEED ); + constexpr float_X SPEED_OF_LIGHT = float_X(SI::SPEED_OF_LIGHT_SI / UNIT_SPEED); // converts // @@ -96,14 +98,17 @@ namespace picongpu // constexpr float_64 An_Arbitrary_Energy_Input_keV = 30.0; // unit: keV // // // first convert to SI (because SI stays our standard Unit System!) - // constexpr float_64 An_Arbitrary_Energy_Input_SI = An_Arbitrary_Energy_Input_keV * UNITCONV_keV_to_Joule // unit: Joule + // constexpr float_64 An_Arbitrary_Energy_Input_SI = An_Arbitrary_Energy_Input_keV * UNITCONV_keV_to_Joule // + // unit: Joule // // // now the "real" convert to our internal unitless system - // constexpr float_X An_Arbitrary_Energy_Input = float_X(An_Arbitrary_Energy_Input_SI / UNIT_ENERGY) // unit: none + // constexpr float_X An_Arbitrary_Energy_Input = float_X(An_Arbitrary_Energy_Input_SI / UNIT_ENERGY) // unit: + // none // // As a convention, we DO NOT use the short track: // constexpr float_64 An_Arbitrary_Energy_Input_keV = 30.0; // unit: keV - // constexpr float_X An_Arbitrary_Energy_Input = float_X(An_Arbitrary_Energy_Input_SI * UNITCONV_keV_to_Joule / UNIT_ENERGY) // unit: none + // constexpr float_X An_Arbitrary_Energy_Input = float_X(An_Arbitrary_Energy_Input_SI * UNITCONV_keV_to_Joule / + // UNIT_ENERGY) // unit: none // constexpr float_64 UNITCONV_keV_to_Joule = 1.60217646e-16; constexpr float_64 UNITCONV_Joule_to_keV = (1.0 / UNITCONV_keV_to_Joule); @@ -113,4 +118,4 @@ namespace picongpu constexpr float_64 UNITCONV_AU_to_eV = 27.21139; constexpr float_64 UNITCONV_eV_to_AU = (1.0 / UNITCONV_AU_to_eV); -} +} // namespace picongpu diff --git a/include/picongpu/param/pml.param b/include/picongpu/param/pml.param index f75e75bcd6..9d875c1b11 100644 --- a/include/picongpu/param/pml.param +++ b/include/picongpu/param/pml.param @@ -1,4 +1,4 @@ -/* Copyright 2019-2020 Sergei Bastrakov +/* Copyright 2019-2021 Sergei Bastrakov, Klaus Steiniger * * This file is part of PIConGPU. * @@ -19,9 +19,9 @@ /** @file * - * Configure the perfectly matched layer (PML). + * Configure the Perfectly Matched Layer absorbing boundary conditions (PML). * - * To enable PML use YeePML field solver. + * To enable PML use YeePML, LehePML or ArbitraryOrderFDTDPML field solver. */ #pragma once @@ -29,133 +29,122 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ -namespace yeePML -{ - - /* The parameters in this file are only used if the field solver selected is - * YeePML. - * The original paper on this approach is J.A. Roden, S.D. Gedney. - * Convolution PML (CPML): An efficient FDTD implementation of the CFS - PML - * for arbitrary media. Microwave and optical technology letters. 27 (5), - * 334-339 (2000). - * https://doi.org/10.1002/1098-2760(20001205)27:5%3C334::AID-MOP14%3E3.0.CO;2-A - * Our implementation is based on a more detailed description in section - * 7.9 of the book A. Taflove, S.C. Hagness. Computational Electrodynamics. - * The Finite-Difference Time-Domain Method. Third Edition. Artech house, - * Boston (2005), referred to as [Taflove, Hagness]. - */ + namespace fields + { + namespace maxwellSolver + { + namespace Pml + { + /* The parameters in this file are only used if the field solver selected + * uses Perfectly Matched Layer Absorbing Boundary Conditions (PML). + * The original paper on this approach is J.A. Roden, S.D. Gedney. + * Convolution PML (CPML): An efficient FDTD implementation of the CFS - PML + * for arbitrary media. Microwave and optical technology letters. 27 (5), + * 334-339 (2000). + * https://doi.org/10.1002/1098-2760(20001205)27:5%3C334::AID-MOP14%3E3.0.CO;2-A + * Our implementation is based on a more detailed description in section + * 7.9 of the book A. Taflove, S.C. Hagness. Computational Electrodynamics. + * The Finite-Difference Time-Domain Method. Third Edition. Artech house, + * Boston (2005), referred to as [Taflove, Hagness]. + */ - constexpr uint32_t THICKNESS = 8; + constexpr uint32_t THICKNESS = 8; - /** Thickness of the absorbing layer, in number of cells - * - * PML is located inside the global simulation area, near the outer borders. - * Setting size to 0 results in disabling absorption at the corresponding - * boundary. Normally thickness is between 6 and 16 cells, with larger - * values providing less reflections. - * 8 cells should be good enough for most simulations. There are no - * requirements on thickness being a multiple of the supercell size. - * It is only required that PML is small enough to be fully contained in - * a single layer of local domains near the global simulation area boundary - * (Note that the domains of this layer might be changing, e.g. due to - * moving window.) - * Unit: number of cells. - */ - constexpr uint32_t NUM_CELLS[ 3 ][ 2 ] = { - { THICKNESS, THICKNESS }, // x direction [negative, positive] - { THICKNESS, THICKNESS }, // y direction [negative, positive] - { THICKNESS, THICKNESS } // z direction [negative, positive] - }; + /** Thickness of the absorbing layer, in number of cells + * + * PML is located inside the global simulation area, near the outer borders. + * Setting size to 0 results in disabling absorption at the corresponding + * boundary. Normally thickness is between 6 and 16 cells, with larger + * values providing less reflections. + * 8 cells should be good enough for most simulations. There are no + * requirements on thickness being a multiple of the supercell size. + * It is only required that PML is small enough to be fully contained in + * a single layer of local domains near the global simulation area boundary + * (Note that the domains of this layer might be changing, e.g. due to + * moving window.) + * Unit: number of cells. + */ + constexpr uint32_t NUM_CELLS[3][2] = { + {THICKNESS, THICKNESS}, // x direction [negative, positive] + {THICKNESS, THICKNESS}, // y direction [negative, positive] + {THICKNESS, THICKNESS} // z direction [negative, positive] + }; - /** Order of polynomial grading for artificial electric conductivity and - * stretching coefficient - * - * The conductivity (sigma) is polynomially scaling from 0 at the internal - * border of PML to the maximum value (defined below) at the external - * border. The stretching coefficient (kappa) scales from 1 to the - * corresponding maximum value (defined below) with the same polynomial. - * The grading is given in [Taflove, Hagness], eq. (7.60a, b), with - * the order denoted 'm'. - * Must be >= 0. Normally between 3 and 4, not required to be integer. - * Unitless. - */ - constexpr float_64 SIGMA_KAPPA_GRADING_ORDER = 4.0; + /** Order of polynomial grading for artificial electric conductivity and + * stretching coefficient + * + * The conductivity (sigma) is polynomially scaling from 0 at the internal + * border of PML to the maximum value (defined below) at the external + * border. The stretching coefficient (kappa) scales from 1 to the + * corresponding maximum value (defined below) with the same polynomial. + * The grading is given in [Taflove, Hagness], eq. (7.60a, b), with + * the order denoted 'm'. + * Must be >= 0. Normally between 3 and 4, not required to be integer. + * Unitless. + */ + constexpr float_64 SIGMA_KAPPA_GRADING_ORDER = 4.0; - // [Taflove, Hagness], eq. (7.66) - constexpr float_64 SIGMA_OPT_SI[ 3 ] = { - 0.8 * ( SIGMA_KAPPA_GRADING_ORDER + 1.0 ) / ( SI::Z0_SI * SI::CELL_WIDTH_SI ), - 0.8 * ( SIGMA_KAPPA_GRADING_ORDER + 1.0 ) / ( SI::Z0_SI * SI::CELL_HEIGHT_SI ), - 0.8 * ( SIGMA_KAPPA_GRADING_ORDER + 1.0 ) / ( SI::Z0_SI * SI::CELL_DEPTH_SI ) - }; + // [Taflove, Hagness], eq. (7.66) + constexpr float_64 SIGMA_OPT_SI[3] + = {0.8 * (SIGMA_KAPPA_GRADING_ORDER + 1.0) / (SI::Z0_SI * SI::CELL_WIDTH_SI), + 0.8 * (SIGMA_KAPPA_GRADING_ORDER + 1.0) / (SI::Z0_SI * SI::CELL_HEIGHT_SI), + 0.8 * (SIGMA_KAPPA_GRADING_ORDER + 1.0) / (SI::Z0_SI * SI::CELL_DEPTH_SI)}; - // Muptiplier to express SIGMA_MAX_SI with SIGMA_OPT_SI - constexpr float_64 SIGMA_OPT_MULTIPLIER = 1.0; + // Muptiplier to express SIGMA_MAX_SI with SIGMA_OPT_SI + constexpr float_64 SIGMA_OPT_MULTIPLIER = 1.0; - /** Max value of artificial electric conductivity in PML - * - * Components correspond to directions: element 0 corresponds to absorption - * along x direction, 1 = y, 2 = z. Grading is described in comments for - * SIGMA_KAPPA_GRADING_ORDER. - * Too small values lead to significant reflections from the external - * border, too large - to reflections due to discretization errors. - * Artificial magnetic permeability will be chosen to perfectly match this. - * Must be >= 0. Normally between 0.7 * SIGMA_OPT_SI and 1.1 * SIGMA_OPT_SI. - * Unit: siemens / m. - */ - constexpr float_64 SIGMA_MAX_SI[ 3 ] = { - SIGMA_OPT_SI[ 0 ] * SIGMA_OPT_MULTIPLIER, - SIGMA_OPT_SI[ 1 ] * SIGMA_OPT_MULTIPLIER, - SIGMA_OPT_SI[ 2 ] * SIGMA_OPT_MULTIPLIER - }; + /** Max value of artificial electric conductivity in PML + * + * Components correspond to directions: element 0 corresponds to absorption + * along x direction, 1 = y, 2 = z. Grading is described in comments for + * SIGMA_KAPPA_GRADING_ORDER. + * Too small values lead to significant reflections from the external + * border, too large - to reflections due to discretization errors. + * Artificial magnetic permeability will be chosen to perfectly match this. + * Must be >= 0. Normally between 0.7 * SIGMA_OPT_SI and 1.1 * SIGMA_OPT_SI. + * Unit: siemens / m. + */ + constexpr float_64 SIGMA_MAX_SI[3] + = {SIGMA_OPT_SI[0] * SIGMA_OPT_MULTIPLIER, + SIGMA_OPT_SI[1] * SIGMA_OPT_MULTIPLIER, + SIGMA_OPT_SI[2] * SIGMA_OPT_MULTIPLIER}; - /** Max value of coordinate stretching coefficient in PML - * - * Components correspond to directions: element 0 corresponds to absorption - * along x direction, 1 = y, 2 = z. Grading is described in comments for - * SIGMA_KAPPA_GRADING_ORDER. - * Must be >= 1. For relatively homogeneous domains 1.0 is a reasonable value. - * Highly elongated domains can have better absorption with values between - * 7.0 and 20.0, for example, see section 7.11.2 in [Taflove, Hagness]. - * Unitless. - */ - constexpr float_64 KAPPA_MAX[ 3 ] = { - 1.0, - 1.0, - 1.0 - }; + /** Max value of coordinate stretching coefficient in PML + * + * Components correspond to directions: element 0 corresponds to absorption + * along x direction, 1 = y, 2 = z. Grading is described in comments for + * SIGMA_KAPPA_GRADING_ORDER. + * Must be >= 1. For relatively homogeneous domains 1.0 is a reasonable value. + * Highly elongated domains can have better absorption with values between + * 7.0 and 20.0, for example, see section 7.11.2 in [Taflove, Hagness]. + * Unitless. + */ + constexpr float_64 KAPPA_MAX[3] = {1.0, 1.0, 1.0}; - /** Order of polynomial grading for complex frequency shift - * - * The complex frequency shift (alpha) is polynomially downscaling from the - * maximum value (defined below) at the internal border of PML to 0 at the - * external border. The grading is given in [Taflove, Hagness], eq. (7.79), - * with the order denoted 'm_a'. - * Must be >= 0. Normally values are around 1.0. - * Unitless. - */ - constexpr float_64 ALPHA_GRADING_ORDER = 1.0; + /** Order of polynomial grading for complex frequency shift + * + * The complex frequency shift (alpha) is polynomially downscaling from the + * maximum value (defined below) at the internal border of PML to 0 at the + * external border. The grading is given in [Taflove, Hagness], eq. (7.79), + * with the order denoted 'm_a'. + * Must be >= 0. Normally values are around 1.0. + * Unitless. + */ + constexpr float_64 ALPHA_GRADING_ORDER = 1.0; - /** Complex frequency shift in PML - * - * Components correspond to directions: element 0 corresponds to absorption - * along x direction, 1 = y, 2 = z. Setting it to 0 will make PML behave - * as uniaxial PML. Setting it to a positive value helps to attenuate - * evanescent modes, but can degrade absorption of propagating modes, as - * described in section 7.7 and 7.11.3 in [Taflove, Hagness]. - * Must be >= 0. Normally values are 0 or between 0.15 and 0.3. - * Unit: siemens / m. - */ - constexpr float_64 ALPHA_MAX_SI[ 3 ] = { - 0.2, - 0.2, - 0.2 - }; + /** Complex frequency shift in PML + * + * Components correspond to directions: element 0 corresponds to absorption + * along x direction, 1 = y, 2 = z. Setting it to 0 will make PML behave + * as uniaxial PML. Setting it to a positive value helps to attenuate + * evanescent modes, but can degrade absorption of propagating modes, as + * described in section 7.7 and 7.11.3 in [Taflove, Hagness]. + * Must be >= 0. Normally values are 0 or between 0.15 and 0.3. + * Unit: siemens / m. + */ + constexpr float_64 ALPHA_MAX_SI[3] = {0.2, 0.2, 0.2}; -} // namespace yeePML -} // namespace maxwellSolver -} // namespace fields + } // namespace Pml + } // namespace maxwellSolver + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/param/png.param b/include/picongpu/param/png.param index 8a57c09de6..06e2832943 100644 --- a/include/picongpu/param/png.param +++ b/include/picongpu/param/png.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Benjamin Worpitz * * This file is part of PIConGPU. @@ -65,7 +65,7 @@ namespace picongpu /* png preview settings for each channel */ DINLINE float_X preChannel1(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) { - return math::abs2(field_J); + return pmacc::math::abs2(field_J); } DINLINE float_X preChannel2(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) @@ -77,6 +77,5 @@ namespace picongpu { return -1.0_X * field_E.y(); } - } -} - + } // namespace visPreview +} // namespace picongpu diff --git a/include/picongpu/param/pngColorScales.param b/include/picongpu/param/pngColorScales.param index f626d83595..01e453ecf4 100644 --- a/include/picongpu/param/pngColorScales.param +++ b/include/picongpu/param/pngColorScales.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once namespace picongpu @@ -27,88 +26,66 @@ namespace picongpu { namespace none { - HDINLINE void addRGB( const float3_X&, - const float_X, - const float_X ) + HDINLINE void addRGB(const float3_X&, const float_X, const float_X) { return; } - } + } // namespace none namespace gray { - HDINLINE void addRGB( float3_X& img, - const float_X value, - const float_X opacity ) + HDINLINE void addRGB(float3_X& img, const float_X value, const float_X opacity) { - const float3_X myChannel( 1.0, 1.0, 1.0 ); + const float3_X myChannel(1.0, 1.0, 1.0); img = img - - opacity * float3_X( myChannel.x() * img.x(), - myChannel.y() * img.y(), - myChannel.z() * img.z() ) - + myChannel * value * opacity; + - opacity * float3_X(myChannel.x() * img.x(), myChannel.y() * img.y(), myChannel.z() * img.z()) + + myChannel * value * opacity; } - } + } // namespace gray namespace grayInv { - HDINLINE void addRGB( float3_X& img, - const float_X value, - const float_X opacity ) + HDINLINE void addRGB(float3_X& img, const float_X value, const float_X opacity) { - const float3_X myChannel( 1.0, 1.0, 1.0 ); + const float3_X myChannel(1.0, 1.0, 1.0); img = img - - opacity * float3_X( myChannel.x() * img.x(), - myChannel.y() * img.y(), - myChannel.z() * img.z() ) - + myChannel * ( 1.0_X - value ) * opacity; + - opacity * float3_X(myChannel.x() * img.x(), myChannel.y() * img.y(), myChannel.z() * img.z()) + + myChannel * (1.0_X - value) * opacity; } - } + } // namespace grayInv namespace red { - HDINLINE void addRGB( float3_X& img, - const float_X value, - const float_X opacity ) + HDINLINE void addRGB(float3_X& img, const float_X value, const float_X opacity) { - const float3_X myChannel( 1.0, 0.0, 0.0 ); + const float3_X myChannel(1.0, 0.0, 0.0); img = img - - opacity * float3_X( myChannel.x() * img.x(), - myChannel.y() * img.y(), - myChannel.z() * img.z() ) - + myChannel * value * opacity; + - opacity * float3_X(myChannel.x() * img.x(), myChannel.y() * img.y(), myChannel.z() * img.z()) + + myChannel * value * opacity; } - } + } // namespace red namespace green { - HDINLINE void addRGB( float3_X& img, - const float_X value, - const float_X opacity ) + HDINLINE void addRGB(float3_X& img, const float_X value, const float_X opacity) { - const float3_X myChannel( 0.0, 1.0, 0.0 ); + const float3_X myChannel(0.0, 1.0, 0.0); img = img - - opacity * float3_X( myChannel.x() * img.x(), - myChannel.y() * img.y(), - myChannel.z() * img.z() ) - + myChannel * value * opacity; + - opacity * float3_X(myChannel.x() * img.x(), myChannel.y() * img.y(), myChannel.z() * img.z()) + + myChannel * value * opacity; } - } + } // namespace green namespace blue { - HDINLINE void addRGB( float3_X& img, - const float_X value, - const float_X opacity ) + HDINLINE void addRGB(float3_X& img, const float_X value, const float_X opacity) { - const float3_X myChannel( 0.0, 0.0, 1.0 ); + const float3_X myChannel(0.0, 0.0, 1.0); img = img - - opacity * float3_X( myChannel.x() * img.x(), - myChannel.y() * img.y(), - myChannel.z() * img.z() ) - + myChannel * value * opacity; + - opacity * float3_X(myChannel.x() * img.x(), myChannel.y() * img.y(), myChannel.z() * img.z()) + + myChannel * value * opacity; } - } + } // namespace blue - } -} + } // namespace colorScales +} // namespace picongpu diff --git a/include/picongpu/param/precision.param b/include/picongpu/param/precision.param index cb8e27269f..969d84d5c6 100644 --- a/include/picongpu/param/precision.param +++ b/include/picongpu/param/precision.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -33,23 +33,22 @@ namespace picongpu { - -/*! Select a precision for the simulation data - * - precision32Bit : use 32Bit floating point numbers - * [significant digits 7 to 8] - * - precision64Bit : use 64Bit floating point numbers - * [significant digits 15 to 16] - */ -namespace precisionPIConGPU = precision32Bit; - -/*! Select a precision special operations (can be different from simulation precision) - * - precisionPIConGPU : use precision which is selected on top (precisionPIConGPU) - * - precision32Bit : use 32Bit floating point numbers - * - precision64Bit : use 64Bit floating point numbers - */ -namespace precisionSqrt = precisionPIConGPU; -namespace precisionExp = precisionPIConGPU; -namespace precisionTrigonometric = precisionPIConGPU; + /*! Select a precision for the simulation data + * - precision32Bit : use 32Bit floating point numbers + * [significant digits 7 to 8] + * - precision64Bit : use 64Bit floating point numbers + * [significant digits 15 to 16] + */ + namespace precisionPIConGPU = precision32Bit; + + /*! Select a precision special operations (can be different from simulation precision) + * - precisionPIConGPU : use precision which is selected on top (precisionPIConGPU) + * - precision32Bit : use 32Bit floating point numbers + * - precision64Bit : use 64Bit floating point numbers + */ + namespace precisionSqrt = precisionPIConGPU; + namespace precisionExp = precisionPIConGPU; + namespace precisionTrigonometric = precisionPIConGPU; } // namespace picongpu diff --git a/include/picongpu/param/pusher.param b/include/picongpu/param/pusher.param index dcf5597be0..2a55ffe881 100644 --- a/include/picongpu/param/pusher.param +++ b/include/picongpu/param/pusher.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -60,11 +60,19 @@ namespace picongpu * - precision64Bit */ namespace sqrt_Vay = precision64Bit; - } + } // namespace particlePusherVay - namespace particlePusherAxel + namespace particlePusherHigueraCary { + /** Precision of the square roots during the push step + * - precision32Bit + * - precision64Bit + */ + namespace sqrt_HigueraCary = precision64Bit; + } // namespace particlePusherHigueraCary + namespace particlePusherAxel + { enum TrajectoryInterpolationType { LINEAR = 1u, @@ -72,23 +80,28 @@ namespace picongpu }; constexpr TrajectoryInterpolationType TrajectoryInterpolation = LINEAR; - } + } // namespace particlePusherAxel namespace particles { - namespace pusher - { - struct Vay; - struct Boris; - struct Photon; - struct Acceleration; - struct Free; - struct Probe; - struct ReducedLandauLifshitz; -#if(SIMDIM==DIM3) - struct Axel; + namespace pusher + { + struct HigueraCary; + struct Vay; + struct Boris; + struct Photon; + struct Acceleration; + struct Free; + struct Probe; + struct ReducedLandauLifshitz; +#if(SIMDIM == DIM3) + struct Axel; #endif - } // namespace pusher + template + struct Composite; + template + struct CompositeBinarySwitchActivationFunctor; + } // namespace pusher } // namespace particles namespace particlePusherProbe @@ -105,6 +118,6 @@ namespace picongpu * - void (no push) */ using ActualPusher = void; - } + } // namespace particlePusherProbe } // namespace picongpu diff --git a/include/picongpu/param/radiation.param b/include/picongpu/param/radiation.param index 009ee20c9e..bff4b3e133 100644 --- a/include/picongpu/param/radiation.param +++ b/include/picongpu/param/radiation.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch +/* Copyright 2013-2021 Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -44,163 +44,180 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ -namespace linear_frequencies -{ -namespace SI -{ - /** mimimum frequency of the linear frequency scale in units of [1/s] */ - constexpr float_64 omega_min = 0.0; - /** maximum frequency of the linear frequency scale in units of [1/s] */ - constexpr float_64 omega_max = 1.06e16; -} // namespace SI - - /** number of frequency values to compute in the linear frequency [unitless] */ - constexpr unsigned int N_omega = 2048; -} // namespace linear_frequencies - -namespace log_frequencies -{ -namespace SI -{ - /** mimimum frequency of the logarithmic frequency scale in units of [1/s] */ - constexpr float_64 omega_min = 1.0e14; - /** maximum frequency of the logarithmic frequency scale in units of [1/s] */ - constexpr float_64 omega_max = 1.0e17; -} // namespace SI - - /** number of frequency values to compute in the logarithmic frequency [unitless] */ - constexpr unsigned int N_omega = 2048; -} // namespace log_frequencies - - -namespace frequencies_from_list -{ - /** path to text file with frequencies */ - constexpr const char * listLocation = "/path/to/frequency_list"; - /** number of frequency values to compute if frequencies are given in a file [unitless] */ - constexpr unsigned int N_omega = 2048; -} // namespace frequencies_from_list - - /** selected mode of frequency scaling: - * - * options: - * - linear_frequencies - * - log_frequencies - * - frequencies_from_list - */ - namespace radiation_frequencies = linear_frequencies; - -namespace radiationNyquist -{ - /** Nyquist factor: fraction of the local Nyquist frequency above which the spectra is set to zero - * should be in (0, 1). - */ - constexpr float_32 NyquistFactor = 0.5; -} // namespace radiationNyquist - - - /////////////////////////////////////////////////// - - - /** correct treatment of coherent and incoherent radiation from macro particles - * - * Choose different form factors in order to consider different particle shapes for radiation - * - radFormFactor_CIC_3D ... CIC charge distribution - * - radFormFactor_TSC_3D ... TSC charge distribution - * - radFormFactor_PCS_3D ... PCS charge distribution - * - radFormFactor_CIC_1Dy ... only CIC charge distribution in y - * - radFormFactor_Gauss_spherical ... symmetric Gauss charge distribution - * - radFormFactor_Gauss_cell ... Gauss charge distribution according to cell size - * - radFormFactor_incoherent ... only incoherent radiation - * - radFormFactor_coherent ... only coherent radiation - */ - namespace radFormFactor_CIC_3D { } - namespace radFormFactor_TSC_3D { } - namespace radFormFactor_PCS_3D { } - namespace radFormFactor_CIC_1Dy { } - namespace radFormFactor_Gauss_spherical { } - namespace radFormFactor_Gauss_cell { } - namespace radFormFactor_incoherent { } - namespace radFormFactor_coherent { } - - namespace radFormFactor = radFormFactor_Gauss_spherical; - - - /////////////////////////////////////////////////////////// - - -namespace parameters -{ - - /** number of observation directions */ - constexpr unsigned int N_observer = 256; - -} // namespace parameters - - /** select particles for radiation - * example of a filter for the relativistic Lorentz factor gamma - */ - struct GammaFilterFunctor + namespace plugins { - /** Gamma value above which the radiation is calculated */ - static constexpr float_X radiationGamma = 5.0; - - template< typename T_Particle > - HDINLINE void operator()( T_Particle& particle ) + namespace radiation { - if( - picongpu::gamma( - particle[ picongpu::momentum_ ], - picongpu::traits::attribute::getMass( - particle[ picongpu::weighting_ ], - particle - ) - ) >= radiationGamma - ) - particle[ picongpu::radiationMask_ ] = true; - } - }; - - - /** filter to (de)select particles for the radiation calculation - * - * to activate the filter: - * - goto file `speciesDefinition.param` - * - add the attribute `radiationMask` to the particle species - */ - using RadiationParticleFilter = picongpu::particles::manipulators::generic::Free< - GammaFilterFunctor - >; - - - - ////////////////////////////////////////////////// - - - /** add a window function weighting to the radiation in order - * to avoid ringing effects from sharpe boundaries - * default: no window function via `radWindowFunctionNone` - * - * Choose different window function in order to get better ringing reduction - * radWindowFunctionTriangle - * radWindowFunctionHamming - * radWindowFunctionTriplett - * radWindowFunctionGauss - * radWindowFunctionNone - */ - namespace radWindowFunctionTriangle { } - namespace radWindowFunctionHamming { } - namespace radWindowFunctionTriplett { } - namespace radWindowFunctionGauss { } - namespace radWindowFunctionNone { } - - namespace radWindowFunction = radWindowFunctionNone; - - -} // namespace radiation -} // namespace plugins + namespace linear_frequencies + { + namespace SI + { + /** mimimum frequency of the linear frequency scale in units of [1/s] */ + constexpr float_64 omega_min = 0.0; + /** maximum frequency of the linear frequency scale in units of [1/s] */ + constexpr float_64 omega_max = 1.06e16; + } // namespace SI + + /** number of frequency values to compute in the linear frequency [unitless] */ + constexpr unsigned int N_omega = 2048; + } // namespace linear_frequencies + + namespace log_frequencies + { + namespace SI + { + /** mimimum frequency of the logarithmic frequency scale in units of [1/s] */ + constexpr float_64 omega_min = 1.0e14; + /** maximum frequency of the logarithmic frequency scale in units of [1/s] */ + constexpr float_64 omega_max = 1.0e17; + } // namespace SI + + /** number of frequency values to compute in the logarithmic frequency [unitless] */ + constexpr unsigned int N_omega = 2048; + } // namespace log_frequencies + + + namespace frequencies_from_list + { + /** path to text file with frequencies */ + constexpr const char* listLocation = "/path/to/frequency_list"; + /** number of frequency values to compute if frequencies are given in a file [unitless] */ + constexpr unsigned int N_omega = 2048; + } // namespace frequencies_from_list + + /** selected mode of frequency scaling: + * + * options: + * - linear_frequencies + * - log_frequencies + * - frequencies_from_list + */ + namespace radiation_frequencies = linear_frequencies; + + namespace radiationNyquist + { + /** Nyquist factor: fraction of the local Nyquist frequency above which the spectra is set to zero + * should be in (0, 1). + */ + constexpr float_32 NyquistFactor = 0.5; + } // namespace radiationNyquist + + + /////////////////////////////////////////////////// + + + /** correct treatment of coherent and incoherent radiation from macro particles + * + * Choose different form factors in order to consider different particle shapes for radiation + * - radFormFactor_CIC_3D ... CIC charge distribution + * - radFormFactor_TSC_3D ... TSC charge distribution + * - radFormFactor_PCS_3D ... PCS charge distribution + * - radFormFactor_CIC_1Dy ... only CIC charge distribution in y + * - radFormFactor_Gauss_spherical ... symmetric Gauss charge distribution + * - radFormFactor_Gauss_cell ... Gauss charge distribution according to cell size + * - radFormFactor_incoherent ... only incoherent radiation + * - radFormFactor_coherent ... only coherent radiation + */ + namespace radFormFactor_CIC_3D + { + } + namespace radFormFactor_TSC_3D + { + } + namespace radFormFactor_PCS_3D + { + } + namespace radFormFactor_CIC_1Dy + { + } + namespace radFormFactor_Gauss_spherical + { + } + namespace radFormFactor_Gauss_cell + { + } + namespace radFormFactor_incoherent + { + } + namespace radFormFactor_coherent + { + } + + namespace radFormFactor = radFormFactor_Gauss_spherical; + + + /////////////////////////////////////////////////////////// + + + namespace parameters + { + /** number of observation directions */ + constexpr unsigned int N_observer = 256; + + } // namespace parameters + + /** select particles for radiation + * example of a filter for the relativistic Lorentz factor gamma + */ + struct GammaFilterFunctor + { + /** Gamma value above which the radiation is calculated */ + static constexpr float_X radiationGamma = 5.0; + + template + HDINLINE void operator()(T_Particle& particle) + { + if(picongpu::gamma( + particle[picongpu::momentum_], + picongpu::traits::attribute::getMass(particle[picongpu::weighting_], particle)) + >= radiationGamma) + particle[picongpu::radiationMask_] = true; + } + }; + + + /** filter to (de)select particles for the radiation calculation + * + * to activate the filter: + * - goto file `speciesDefinition.param` + * - add the attribute `radiationMask` to the particle species + */ + using RadiationParticleFilter = picongpu::particles::manipulators::generic::Free; + + + ////////////////////////////////////////////////// + + + /** add a window function weighting to the radiation in order + * to avoid ringing effects from sharpe boundaries + * default: no window function via `radWindowFunctionNone` + * + * Choose different window function in order to get better ringing reduction + * radWindowFunctionTriangle + * radWindowFunctionHamming + * radWindowFunctionTriplett + * radWindowFunctionGauss + * radWindowFunctionNone + */ + namespace radWindowFunctionTriangle + { + } + namespace radWindowFunctionHamming + { + } + namespace radWindowFunctionTriplett + { + } + namespace radWindowFunctionGauss + { + } + namespace radWindowFunctionNone + { + } + + namespace radWindowFunction = radWindowFunctionNone; + + + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/param/radiationObserver.param b/include/picongpu/param/radiationObserver.param index 308a8fc2fc..f2663f76ec 100644 --- a/include/picongpu/param/radiationObserver.param +++ b/include/picongpu/param/radiationObserver.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -29,106 +29,105 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ -namespace radiation_observer -{ - /** Compute observation angles - * - * This function is used in the Radiation plug-in kernel to compute - * the observation directions given as a unit vector pointing - * towards a 'virtual' detector - * - * This default setup is an example of a 2D detector array. It computes - * observation directions for 2D virtual detector field - * with its center pointing toward the +y direction (for theta=0, phi=0) - * with observation angles ranging from - * theta = [angle_theta_start : angle_theta_end] - * phi = [angle_phi_start : angle_phi_end ] - * Every observation_id_extern index moves the phi angle from its - * start value toward its end value until the observation_id_extern - * reaches N_split. After that the theta angle moves further from its - * start value towards its end value while phi is reset to its start - * value. - * - * The unit vector pointing towards the observing virtual detector - * can be described using theta and phi by: - * x_value = sin(theta) * cos(phi) - * y_value = cos(theta) - * z_value = sin(theta) * sin(phi) - * These are the standard spherical coordinates. - * - * The example setup describes an detector array of - * 16x16 detectors ranging from -pi/8= -22.5 degrees - * to +pi/8= +22.5 degrees for both angles with the center - * pointing toward the y-axis (laser propagation direction). - * - * @param observation_id_extern - * int index that identifies each block on the GPU - * to compute the observation direction - * - * @return unit vector pointing in observation direction - * type: vector_64 - * - */ - HDINLINE vector_64 observation_direction(const int observation_id_extern) + namespace plugins { - /* generate two indices from single block index */ - /** split distance of given index - * pseudo-code: - * index_a = index / split_distance - * index_b = index % split_distance - */ - constexpr int N_angle_split = 16; - /** get index for computing angle theta: */ - const int my_index_theta = observation_id_extern / N_angle_split; - /** get index for computing angle phi: */ - const int my_index_phi = observation_id_extern % N_angle_split; - + namespace radiation + { + namespace radiation_observer + { + /** Compute observation angles + * + * This function is used in the Radiation plug-in kernel to compute + * the observation directions given as a unit vector pointing + * towards a 'virtual' detector + * + * This default setup is an example of a 2D detector array. It computes + * observation directions for 2D virtual detector field + * with its center pointing toward the +y direction (for theta=0, phi=0) + * with observation angles ranging from + * theta = [angle_theta_start : angle_theta_end] + * phi = [angle_phi_start : angle_phi_end ] + * Every observation_id_extern index moves the phi angle from its + * start value toward its end value until the observation_id_extern + * reaches N_split. After that the theta angle moves further from its + * start value towards its end value while phi is reset to its start + * value. + * + * The unit vector pointing towards the observing virtual detector + * can be described using theta and phi by: + * x_value = sin(theta) * cos(phi) + * y_value = cos(theta) + * z_value = sin(theta) * sin(phi) + * These are the standard spherical coordinates. + * + * The example setup describes an detector array of + * 16x16 detectors ranging from -pi/8= -22.5 degrees + * to +pi/8= +22.5 degrees for both angles with the center + * pointing toward the y-axis (laser propagation direction). + * + * @param observation_id_extern + * int index that identifies each block on the GPU + * to compute the observation direction + * + * @return unit vector pointing in observation direction + * type: vector_64 + * + */ + HDINLINE vector_64 observation_direction(const int observation_id_extern) + { + /* generate two indices from single block index */ + /** split distance of given index + * pseudo-code: + * index_a = index / split_distance + * index_b = index % split_distance + */ + constexpr int N_angle_split = 16; + /** get index for computing angle theta: */ + const int my_index_theta = observation_id_extern / N_angle_split; + /** get index for computing angle phi: */ + const int my_index_phi = observation_id_extern % N_angle_split; - /* set up observation angle range */ - /* angles range for theta */ - /** minimum theta angle [rad] */ - const picongpu::float_64 angle_theta_start = - picongpu::PI/8.0; - /** maximum theta angle [rad] */ - const picongpu::float_64 angle_theta_end = + picongpu::PI/8.0; - /* angles range for phi */ - /** minimum phi angle [rad] */ - constexpr picongpu::float_64 angle_phi_start = - picongpu::PI/8.0; - /** maximum phi angle [rad] */ - constexpr picongpu::float_64 angle_phi_end = + picongpu::PI/8.0; + /* set up observation angle range */ + /* angles range for theta */ + /** minimum theta angle [rad] */ + const picongpu::float_64 angle_theta_start = -picongpu::PI / 8.0; + /** maximum theta angle [rad] */ + const picongpu::float_64 angle_theta_end = +picongpu::PI / 8.0; + /* angles range for phi */ + /** minimum phi angle [rad] */ + constexpr picongpu::float_64 angle_phi_start = -picongpu::PI / 8.0; + /** maximum phi angle [rad] */ + constexpr picongpu::float_64 angle_phi_end = +picongpu::PI / 8.0; - /* compute step with between two angles for range [angle_??_start : angle_??_end] */ - /** number of theta angles */ - constexpr int N_theta = parameters::N_observer / N_angle_split; - /** step width angle theta */ - const picongpu::float_64 delta_angle_theta = (angle_theta_end - - angle_theta_start) / (N_theta-1.0); - /** step width angle phi */ - const picongpu::float_64 delta_angle_phi = (angle_phi_end - - angle_phi_start) / (N_angle_split-1.0); - /** compute observation angles theta */ - const picongpu::float_64 theta( my_index_theta * delta_angle_theta + angle_theta_start ); - /** compute observation angles theta */ - const picongpu::float_64 phi( my_index_phi * delta_angle_phi - angle_phi_start ); + /* compute step with between two angles for range [angle_??_start : angle_??_end] */ + /** number of theta angles */ + constexpr int N_theta = parameters::N_observer / N_angle_split; + /** step width angle theta */ + const picongpu::float_64 delta_angle_theta + = (angle_theta_end - angle_theta_start) / (N_theta - 1.0); + /** step width angle phi */ + const picongpu::float_64 delta_angle_phi + = (angle_phi_end - angle_phi_start) / (N_angle_split - 1.0); - /* helper functions for efficient trigonometric calculations */ - picongpu::float_32 sinPhi; - picongpu::float_32 cosPhi; - picongpu::float_32 sinTheta; - picongpu::float_32 cosTheta; - math::sincos(precisionCast(phi), sinPhi, cosPhi); - math::sincos(precisionCast(theta), sinTheta, cosTheta); - /** compute observation unit vector */ - return vector_64( sinTheta*cosPhi , cosTheta, sinTheta*sinPhi ) ; + /** compute observation angles theta */ + const picongpu::float_64 theta(my_index_theta * delta_angle_theta + angle_theta_start); + /** compute observation angles theta */ + const picongpu::float_64 phi(my_index_phi * delta_angle_phi - angle_phi_start); - } + /* helper functions for efficient trigonometric calculations */ + picongpu::float_32 sinPhi; + picongpu::float_32 cosPhi; + picongpu::float_32 sinTheta; + picongpu::float_32 cosTheta; + pmacc::math::sincos(precisionCast(phi), sinPhi, cosPhi); + pmacc::math::sincos(precisionCast(theta), sinTheta, cosTheta); + /** compute observation unit vector */ + return vector_64(sinTheta * cosPhi, cosTheta, sinTheta * sinPhi); + } -} // namespace radiation_observer -} // namespace radiation -} // namespace plugins + } // namespace radiation_observer + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/param/random.param b/include/picongpu/param/random.param index c27b82d799..051944819d 100644 --- a/include/picongpu/param/random.param +++ b/include/picongpu/param/random.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Rene Widera +/* Copyright 2014-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -33,28 +33,28 @@ namespace picongpu { -namespace random -{ - /** Random number generation methods - * - * It is not allowed to change the method and restart an already existing checkpoint. - * - * - pmacc::random::methods::XorMin - * - pmacc::random::methods::MRG32k3aMin - * - pmacc::random::methods::AlpakaRand - */ - using Generator = pmacc::random::methods::XorMin< >; + namespace random + { + /** Random number generation methods + * + * It is not allowed to change the method and restart an already existing checkpoint. + * + * - pmacc::random::methods::XorMin + * - pmacc::random::methods::MRG32k3aMin + * - pmacc::random::methods::AlpakaRand + */ + using Generator = pmacc::random::methods::XorMin<>; - /** random number start seed - * - * Generator to create a seed for the random number generator. - * Depending of the generator the seed is reproducible or - * or changed with each program execution. - * - * - seed::Value< 42 > - * - seed::FromTime - * - seed::FromEnvironment - */ - using SeedGenerator = seed::Value< 42 > ; -} // namespace random + /** random number start seed + * + * Generator to create a seed for the random number generator. + * Depending of the generator the seed is reproducible or + * or changed with each program execution. + * + * - seed::Value< 42 > + * - seed::FromTime + * - seed::FromEnvironment + */ + using SeedGenerator = seed::Value<42>; + } // namespace random } // namespace picongpu diff --git a/include/picongpu/param/species.param b/include/picongpu/param/species.param index e3e7a2b528..524337f2a9 100644 --- a/include/picongpu/param/species.param +++ b/include/picongpu/param/species.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Richard Pausch +/* Copyright 2014-2021 Rene Widera, Richard Pausch, Annegret Roeszler, Klaus Steiniger * * This file is part of PIConGPU. * @@ -19,9 +19,19 @@ /** @file * - * Forward declarations for speciesDefinition.param in case one wants to use - * the same particle shape, interpolation, current solver and particle pusher - * for all particle species. + * Particle shape, field to particle interpolation, current solver, and particle pusher + * can be declared here for usage in `speciesDefinition.param`. + * + * @see + * **MODELS / Hierarchy of Charge Assignment Schemes** + * in the online documentation for information on particle shapes. + * + * + * \attention + * The higher order shape names are redefined with release 0.6.0 in order to provide a consistent naming: + * * PQS is the name of the 3rd order assignment function (instead of PCS) + * * PCS is the name of the 4th order assignment function (instead of P4S) + * * P4S does not exist anymore */ #pragma once @@ -36,53 +46,61 @@ namespace picongpu { + /** select macroparticle shape + * + * **WARNING** the shape names are redefined and diverge from PIConGPU versions before 0.6.0. + * + * - particles::shapes::CIC : Assignment function is a piecewise linear spline + * - particles::shapes::TSC : Assignment function is a piecewise quadratic spline + * - particles::shapes::PQS : Assignment function is a piecewise cubic spline + * - particles::shapes::PCS : Assignment function is a piecewise quartic spline + */ + using UsedParticleShape = particles::shapes::TSC; -/** Particle Shape definitions - * - particles::shapes::CIC : 1st order - * - particles::shapes::TSC : 2nd order - * - particles::shapes::PCS : 3rd order - * - particles::shapes::P4S : 4th order - * - * example: using UsedParticleShape = particles::shapes::CIC; - */ -using UsedParticleShape = particles::shapes::TSC; + /** select interpolation method to be used for interpolation of grid-based field values to particle positions + */ + using UsedField2Particle = FieldToParticleInterpolation; -/** define which interpolation method is used to interpolate fields to particles - */ -using UsedField2Particle = FieldToParticleInterpolation< - UsedParticleShape, - AssignedTrilinearInterpolation ->; + /*! select current solver method + * - currentSolver::Esirkepov< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * - currentSolver::VillaBune< SHAPE, STRATEGY > : particle shapes - CIC (1st order) only + * - currentSolver::EmZ< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * + * For development purposes: + * - currentSolver::EsirkepovNative< SHAPE, STRATEGY > : generic version of currentSolverEsirkepov + * without optimization (~4x slower and needs more shared memory) + * + * STRATEGY (optional): + * - currentSolver::strategy::StridedCachedSupercells + * - currentSolver::strategy::StridedCachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::CachedSupercells + * - currentSolver::strategy::CachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::NonCachedSupercells + * - currentSolver::strategy::NonCachedSupercellsScaled with N >= 1 + */ + using UsedParticleCurrentSolver = currentSolver::Esirkepov; -/** select current solver method - * - currentSolver::Esirkepov< SHAPE > : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - currentSolver::VillaBune<> : particle shapes - CIC (1st order) only - * - currentSolver::EmZ< SHAPE > : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - * For development purposes: - * - currentSolver::currentSolver::EsirkepovNative< SHAPE > : generic version of currentSolverEsirkepov - * without optimization (~4x slower and needs more shared memory) - */ -using UsedParticleCurrentSolver = currentSolver::Esirkepov< UsedParticleShape >; - -/** particle pusher configuration - * - * Defining a pusher is optional for particles - * - * - particles::pusher::Vay : better suited relativistic boris pusher - * - particles::pusher::Boris : standard boris pusher - * - particles::pusher::ReducedLandauLifshitz : 4th order RungeKutta pusher - * with classical radiation reaction - * - * For diagnostics & modeling: ------------------------------------------------ - * - particles::pusher::Acceleration : Accelerate particles by applying a constant electric field - * - particles::pusher::Free : free propagation, ignore fields - * (= free stream model) - * - particles::pusher::Photon : propagate with c in direction of normalized mom. - * - particles::pusher::Probe : Probe particles that interpolate E & B - * For development purposes: -------------------------------------------------- - * - particles::pusher::Axel : a pusher developed at HZDR during 2011 (testing) - */ -using UsedParticlePusher = particles::pusher::Boris; + /** particle pusher configuration + * + * Defining a pusher is optional for particles + * + * - particles::pusher::HigueraCary : Higuera & Cary's relativistic pusher preserving both volume and ExB velocity + * - particles::pusher::Vay : Vay's relativistic pusher preserving ExB velocity + * - particles::pusher::Boris : Boris' relativistic pusher preserving volume + * - particles::pusher::ReducedLandauLifshitz : 4th order RungeKutta pusher + * with classical radiation reaction + * - particles::pusher::Composite : composite of two given pushers, + * switches between using one (or none) of those + * + * For diagnostics & modeling: ------------------------------------------------ + * - particles::pusher::Acceleration : Accelerate particles by applying a constant electric field + * - particles::pusher::Free : free propagation, ignore fields + * (= free stream model) + * - particles::pusher::Photon : propagate with c in direction of normalized mom. + * - particles::pusher::Probe : Probe particles that interpolate E & B + * For development purposes: -------------------------------------------------- + * - particles::pusher::Axel : a pusher developed at HZDR during 2011 (testing) + */ + using UsedParticlePusher = particles::pusher::Boris; } // namespace picongpu diff --git a/include/picongpu/param/speciesAttributes.param b/include/picongpu/param/speciesAttributes.param index 51aba8374c..12cb764000 100644 --- a/include/picongpu/param/speciesAttributes.param +++ b/include/picongpu/param/speciesAttributes.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Marco Garten, Alexander Grund, Axel Huebl, +/* Copyright 2014-2021 Rene Widera, Marco Garten, Alexander Grund, Axel Huebl, * Heiko Burau * * This file is part of PIConGPU. @@ -53,63 +53,31 @@ namespace picongpu * float3_64, ... * This is only a name without a specialization. */ - alias( position ); + alias(position); //! unique identifier for a particle - value_identifier( - uint64_t, - particleId, - IdProvider< simDim >::getNewId() - ); + value_identifier(uint64_t, particleId, IdProvider::getNewId()); //! specialization for the relative in-cell position - value_identifier( - floatD_X, - position_pic, - floatD_X::create( 0. ) - ); + value_identifier(floatD_X, position_pic, floatD_X::create(0.)); //! momentum at timestep t - value_identifier( - float3_X, - momentum, - float3_X::create( 0. ) - ); + value_identifier(float3_X, momentum, float3_X::create(0.)); //! momentum at (previous) timestep t-1 - value_identifier( - float3_X, - momentumPrev1, - float3_X::create( 0._X ) - ); + value_identifier(float3_X, momentumPrev1, float3_X::create(0._X)); //! weighting of the macro particle - value_identifier( - float_X, - weighting, - 0._X - ); + value_identifier(float_X, weighting, 0._X); //! Voronoi cell of the macro particle - value_identifier( - int16_t, - voronoiCellId, - -1 - ); + value_identifier(int16_t, voronoiCellId, -1); //! interpolated electric field with respect to particle shape - value_identifier( - float3_X, - probeE, - float3_X::create( 0. ) - ); + value_identifier(float3_X, probeE, float3_X::create(0.)); //! interpolated electric field with respect to particle shape - value_identifier( - float3_X, - probeB, - float3_X::create( 0. ) - ); + value_identifier(float3_X, probeB, float3_X::create(0.)); /** masking a particle for radiation * @@ -117,11 +85,7 @@ namespace picongpu * `RadiationParticleFilter` in radiation.param to (de)select * particles for the radiation calculation. */ - value_identifier( - bool, - radiationMask, - false - ); + value_identifier(bool, radiationMask, false); /** masking a particle for transition radiation * @@ -129,11 +93,7 @@ namespace picongpu * `TransitionRadiationParticleFilter` in transitionRadiation.param to (de)select * particles for the transition radiation calculation. */ - value_identifier( - bool, - transitionRadiationMask, - false - ); + value_identifier(bool, transitionRadiationMask, false); /** number of electrons bound to the atom / ion * @@ -145,22 +105,14 @@ namespace picongpu * * @todo connect default to proton number */ - value_identifier( - float_X, - boundElectrons, - 0._X - ); + value_identifier(float_X, boundElectrons, 0._X); /** atomic superconfiguration * * atomic configuration of an ion for collisional-radiative modeling, * see also flylite.param */ - value_identifier( - flylite::Superconfig, - superconfig, - flylite::Superconfig::create( 0. ) - ); + value_identifier(flylite::Superconfig, superconfig, flylite::Superconfig::create(0.)); /** Total cell index of a particle. * @@ -169,57 +121,53 @@ namespace picongpu * `globalDomain.offset` + `localDomain.offset` * added to the N-dimensional cell index the particle belongs to on that GPU. */ - value_identifier( - DataSpace< simDim >, - totalCellIdx, - DataSpace< simDim >( ) - ); + value_identifier(DataSpace, totalCellIdx, DataSpace()); //! alias for particle shape, see also species.param - alias( shape ); + alias(shape); //! alias for particle pusher, see alsospecies.param - alias( particlePusher ); + alias(particlePusher); //! alias for particle ionizers, see also ionizer.param - alias( ionizers ); + alias(ionizers); //! alias for ionization energy container, see also ionizationEnergies.param - alias( ionizationEnergies ); + alias(ionizationEnergies); //! alias for synchrotronPhotons, see also speciesDefinition.param - alias( synchrotronPhotons ) + alias(synchrotronPhotons); //! alias for ion species used for bremsstrahlung - alias( bremsstrahlungIons ); + alias(bremsstrahlungIons); //! alias for photon species used for bremsstrahlung - alias( bremsstrahlungPhotons ); + alias(bremsstrahlungPhotons); //! alias for particle to field interpolation, see also species.param - alias( interpolation ); + alias(interpolation); //! alias for particle current solver, see also species.param - alias( current ); + alias(current); /** alias for particle flag: atomic numbers, see also ionizer.param * - only reasonable for atoms / ions / nuclei * - is required when boundElectrons is set */ - alias( atomicNumbers ); + alias(atomicNumbers); /** alias for particle flag: effective nuclear charge, * * - see also ionizer.param * - only reasonable for atoms / ions / nuclei */ - alias( effectiveNuclearCharge ); + alias(effectiveNuclearCharge); /** alias for particle population kinetics model (e.g. FLYlite) * * see also flylite.param */ - alias( populationKinetics ); + alias(populationKinetics); /** alias for particle mass ratio * @@ -228,7 +176,7 @@ namespace picongpu * * default value: 1.0 if unset */ - alias( massRatio ); + alias(massRatio); /** alias for particle charge ratio * @@ -237,7 +185,7 @@ namespace picongpu * * default value: 1.0 if unset */ - alias( chargeRatio ); + alias(chargeRatio); /** alias for particle density ratio * @@ -246,7 +194,7 @@ namespace picongpu * * default value: 1.0 if unset */ - alias( densityRatio ); + alias(densityRatio); /** alias to reserved bytes for each communication direction * @@ -263,10 +211,12 @@ namespace picongpu * static constexpr uint32_t BYTES_EXCHANGE_Z = 5 * 1024 * 1024; * static constexpr uint32_t BYTES_CORNER = 16 * 1024; * static constexpr uint32_t BYTES_EDGES = 16 * 1024; + * using REF_LOCAL_DOM_SIZE = mCT::Int<0, 0, 0>; + * const std::array DIR_SCALING_FACTOR = {{0.0, 0.0, 0.0}}; * }; * @endcode */ - alias( exchangeMemCfg ); + alias(exchangeMemCfg); /** alias to specify the boundary condition for particles * @@ -277,6 +227,6 @@ namespace picongpu * Note: alias `boundaryCondition` will be ignored if the runtime parameter * `--periodic` is set. */ - alias( boundaryCondition ); + alias(boundaryCondition); } // namespace picongpu diff --git a/include/picongpu/param/speciesConstants.param b/include/picongpu/param/speciesConstants.param index 5915567a33..1771428203 100644 --- a/include/picongpu/param/speciesConstants.param +++ b/include/picongpu/param/speciesConstants.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -65,5 +65,5 @@ namespace picongpu * unit: C */ constexpr float_64 BASE_CHARGE_SI = ELECTRON_CHARGE_SI; - } -} + } // namespace SI +} // namespace picongpu diff --git a/include/picongpu/param/speciesDefinition.param b/include/picongpu/param/speciesDefinition.param index 3307508c34..11f27a4d47 100644 --- a/include/picongpu/param/speciesDefinition.param +++ b/include/picongpu/param/speciesDefinition.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Heiko Burau +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Heiko Burau * * This file is part of PIConGPU. * @@ -43,105 +43,88 @@ namespace picongpu { + /*########################### define particle attributes #####################*/ -/*########################### define particle attributes #####################*/ - -/** describe attributes of a particle*/ -using DefaultParticleAttributes = MakeSeq_t< - position< position_pic >, - momentum, - weighting ->; - -/*########################### end particle attributes ########################*/ - -/*########################### define species #################################*/ - -/*--------------------------- photons -------------------------------------------*/ - -value_identifier( float_X, MassRatioPhotons, 0.0 ); -value_identifier( float_X, ChargeRatioPhotons, 0.0 ); - -using ParticleFlagsPhotons = MakeSeq_t< - particlePusher< particles::pusher::Photon >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - massRatio< MassRatioPhotons >, - chargeRatio< ChargeRatioPhotons > ->; - -/* define species photons */ -using PIC_Photons = Particles< - PMACC_CSTRING( "ph" ), - ParticleFlagsPhotons, - DefaultParticleAttributes ->; - -/*--------------------------- electrons --------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioElectrons, 1.0 ); -value_identifier( float_X, ChargeRatioElectrons, 1.0 ); - -using ParticleFlagsElectrons = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioElectrons >, - chargeRatio< ChargeRatioElectrons > -#if( ENABLE_SYNCHROTRON_PHOTONS == 1 ) - , synchrotronPhotons< PIC_Photons > + /** describe attributes of a particle*/ + using DefaultParticleAttributes = MakeSeq_t, momentum, weighting>; + + /*########################### end particle attributes ########################*/ + + /*########################### define species #################################*/ + + /*--------------------------- photons -------------------------------------------*/ + + value_identifier(float_X, MassRatioPhotons, 0.0); + value_identifier(float_X, ChargeRatioPhotons, 0.0); + + using ParticleFlagsPhotons = MakeSeq_t< + particlePusher, + shape, + interpolation, + massRatio, + chargeRatio>; + + /* define species photons */ + using PIC_Photons = Particles; + + /*--------------------------- electrons --------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + + using ParticleFlagsElectrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio +#if(ENABLE_SYNCHROTRON_PHOTONS == 1) + , + synchrotronPhotons #endif ->; - -/* define species electrons */ -using PIC_Electrons = Particles< - PMACC_CSTRING( "e" ), - ParticleFlagsElectrons, - DefaultParticleAttributes ->; - -/*--------------------------- ions -------------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioIons, 1836.152672 ); -value_identifier( float_X, ChargeRatioIons, -1.0 ); - -/* ratio relative to BASE_DENSITY */ -value_identifier( float_X, DensityRatioIons, 1.0 ); - -using ParticleFlagsIons = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioIons >, - chargeRatio< ChargeRatioIons >, - densityRatio< DensityRatioIons >, - atomicNumbers< ionization::atomicNumbers::Hydrogen_t > ->; - -/* define species ions */ -using PIC_Ions = Particles< - PMACC_CSTRING( "i" ), - ParticleFlagsIons, - DefaultParticleAttributes ->; - -/*########################### end species ####################################*/ - -/** All known particle species of the simulation - * - * List all defined particle species from above in this list - * to make them available to the PIC algorithm. - */ -using VectorAllSpecies = MakeSeq_t< - PIC_Electrons, - PIC_Ions -#if( ENABLE_SYNCHROTRON_PHOTONS == 1 ) - , PIC_Photons + >; + + /* define species electrons */ + using PIC_Electrons = Particles; + + /*--------------------------- ions -------------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioIons, 1836.152672); + value_identifier(float_X, ChargeRatioIons, -1.0); + + /* ratio relative to BASE_DENSITY */ + value_identifier(float_X, DensityRatioIons, 1.0); + + using ParticleFlagsIons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio, + densityRatio, + atomicNumbers>; + + /* define species ions */ + using PIC_Ions = Particles; + + /*########################### end species ####################################*/ + + /** All known particle species of the simulation + * + * List all defined particle species from above in this list + * to make them available to the PIC algorithm. + */ + using VectorAllSpecies = MakeSeq_t< + PIC_Electrons, + PIC_Ions +#if(ENABLE_SYNCHROTRON_PHOTONS == 1) + , + PIC_Photons #endif ->; + >; } // namespace picongpu diff --git a/include/picongpu/param/speciesInitialization.param b/include/picongpu/param/speciesInitialization.param index 7850be3a25..e9b1216837 100644 --- a/include/picongpu/param/speciesInitialization.param +++ b/include/picongpu/param/speciesInitialization.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,13 +33,13 @@ namespace picongpu { -namespace particles -{ - /** InitPipeline defines in which order species are initialized - * - * the functors are called in order (from first to last functor) - */ - using InitPipeline = bmpl::vector<>; + namespace particles + { + /** InitPipeline defines in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector<>; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/param/starter.param b/include/picongpu/param/starter.param index 5e6c700755..a7ca54ee55 100644 --- a/include/picongpu/param/starter.param +++ b/include/picongpu/param/starter.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once @@ -26,9 +25,5 @@ namespace picongpu { namespace defaultPIConGPU { - } -} - - - +} // namespace picongpu diff --git a/include/picongpu/param/synchrotronPhotons.param b/include/picongpu/param/synchrotronPhotons.param index 1ffaaf5761..53ec1e2b45 100644 --- a/include/picongpu/param/synchrotronPhotons.param +++ b/include/picongpu/param/synchrotronPhotons.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -21,36 +21,36 @@ namespace picongpu { -namespace particles -{ -namespace synchrotronPhotons -{ - + namespace particles + { + namespace synchrotronPhotons + { /** enable synchrotron photon emission */ #ifndef ENABLE_SYNCHROTRON_PHOTONS -#define ENABLE_SYNCHROTRON_PHOTONS 0 +# define ENABLE_SYNCHROTRON_PHOTONS 0 #endif -/** enable (disable) QED (classical) photon emission spectrum */ -constexpr bool enableQEDTerm = false; + /** enable (disable) QED (classical) photon emission spectrum */ + constexpr bool enableQEDTerm = false; -/** Above this value (to the power of three, see comments on mapping) the synchrotron functions are nearly zero. */ -constexpr float_64 SYNC_FUNCS_CUTOFF = 5.0; + /** Above this value (to the power of three, see comments on mapping) the synchrotron functions are nearly + * zero. */ + constexpr float_64 SYNC_FUNCS_CUTOFF = 5.0; -/** stepwidth for the numerical integration of the bessel function for the first synchrotron function */ -constexpr float_64 SYNC_FUNCS_BESSEL_INTEGRAL_STEPWIDTH = 1.0e-3; + /** stepwidth for the numerical integration of the bessel function for the first synchrotron function */ + constexpr float_64 SYNC_FUNCS_BESSEL_INTEGRAL_STEPWIDTH = 1.0e-3; -/** Number of sampling points of the lookup table */ -constexpr uint32_t SYNC_FUNCS_NUM_SAMPLES = 8192; + /** Number of sampling points of the lookup table */ + constexpr uint32_t SYNC_FUNCS_NUM_SAMPLES = 8192; -/** Photons of oscillation periods greater than a timestep are not created since the grid already accounts for them. - * This cutoff ratio is defined as: photon-oscillation-period / timestep */ -constexpr float_64 SOFT_PHOTONS_CUTOFF_RATIO = 1.0; + /** Photons of oscillation periods greater than a timestep are not created since the grid already accounts + * for them. This cutoff ratio is defined as: photon-oscillation-period / timestep */ + constexpr float_64 SOFT_PHOTONS_CUTOFF_RATIO = 1.0; -/** if the emission probability per timestep is higher than this value and the log level is set to - * "CRITICAL" a warning will be raised. */ -constexpr float_64 SINGLE_EMISSION_PROB_LIMIT = 0.4; + /** if the emission probability per timestep is higher than this value and the log level is set to + * "CRITICAL" a warning will be raised. */ + constexpr float_64 SINGLE_EMISSION_PROB_LIMIT = 0.4; -} // namespace synchrotronPhotons -} // namespace particles + } // namespace synchrotronPhotons + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/param/transitionRadiation.param b/include/picongpu/param/transitionRadiation.param index 95024ac20a..21ca7f8464 100644 --- a/include/picongpu/param/transitionRadiation.param +++ b/include/picongpu/param/transitionRadiation.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch, Finn-Ole Carstens +/* Copyright 2013-2021 Rene Widera, Richard Pausch, Finn-Ole Carstens * * This file is part of PIConGPU. * @@ -43,232 +43,244 @@ namespace picongpu { -namespace plugins -{ -// initiate the formfactor namespaces from the radiation plugin -namespace radiation -{ - namespace radFormFactor_CIC_3D { } - namespace radFormFactor_TSC_3D { } - namespace radFormFactor_PCS_3D { } - namespace radFormFactor_CIC_1Dy { } - namespace radFormFactor_Gauss_spherical { } - namespace radFormFactor_Gauss_cell { } - namespace radFormFactor_incoherent { } - namespace radFormFactor_coherent { } -} // namespace radiation - -namespace transitionRadiation -{ -namespace linearFrequencies -{ - namespace SI + namespace plugins { - //! mimimum frequency of the linear frequency scale in units of [1/s] - constexpr float_64 omegaMin = 0.0; - //! maximum frequency of the linear frequency scale in units of [1/s] - constexpr float_64 omegaMax = 1.06e16; - } - - //! number of frequency values to compute in the linear frequency [unitless] - constexpr unsigned int nOmega = 512; - -} // namespace linearFrequencies - -namespace logFrequencies -{ - namespace SI - { - //! mimimum frequency of the logarithmic frequency scale in units of [1/s] - constexpr float_64 omegaMin = 1.0e13; - //! maximum frequency of the logarithmic frequency scale in units of [1/s] - constexpr float_64 omegaMax = 1.0e17; - } - - //! number of frequency values to compute in the logarithmic frequency [unitless] - constexpr unsigned int nOmega = 256; - -} // namespace logFrequencies - - -namespace listFrequencies -{ - //! path to text file with frequencies - constexpr char listLocation[] = "/path/to/frequency_list"; - //! number of frequency values to compute if frequencies are given in a file [unitless] - constexpr unsigned int nOmega = 512; - -} // namespace listFrequencies - - - /** selected mode of frequency scaling: - * - * options: - * - linearFrequencies - * - logFrequencies - * - listFrequencies - */ - namespace frequencies = logFrequencies; - - /////////////////////////////////////////////////// - - - /** correct treatment of coherent radiation from macro particles - * - * These formfactors are the same as in the radiation plugin! - * Choose different form factors in order to consider different particle shapes for radiation - * - ::picongpu::plugins::radiation::radFormFactor_CIC_3D ... CIC charge distribution - * - ::picongpu::plugins::radiation::radFormFactor_TSC_3D ... TSC charge distribution - * - ::picongpu::plugins::radiation::radFormFactor_PCS_3D ... PCS charge distribution - * - ::picongpu::plugins::radiation::radFormFactor_CIC_1Dy ... only CIC charge distribution in y - * - ::picongpu::plugins::radiation::radFormFactor_Gauss_spherical ... symmetric Gauss charge distribution - * - ::picongpu::plugins::radiation::radFormFactor_Gauss_cell ... Gauss charge distribution according to cell size - * - ::picongpu::plugins::radiation::radFormFactor_incoherent ... only incoherent radiation - * - ::picongpu::plugins::radiation::radFormFactor_coherent ... only coherent radiation - */ - namespace macroParticleFormFactor = ::picongpu::plugins::radiation::radFormFactor_Gauss_spherical; - - /////////////////////////////////////////////////////////// - - namespace parameters - { - /** Number of observation directions - * - * If nPhi or nTheta is equal to 1, the transition radiation will be calculated - * for phiMin or thetaMin respectively. - */ - constexpr unsigned int nPhi = 128; - constexpr unsigned int nTheta = 128; - constexpr unsigned int nObserver = nPhi * nTheta; - - // theta goes from 0 to pi - constexpr float_64 thetaMin = 0.0; - constexpr float_64 thetaMax = picongpu::PI; - - // phi goes from 0 to 2*pi - constexpr float_64 phiMin = 0.0; - constexpr float_64 phiMax = 2 * picongpu::PI; - - namespace SI + // initiate the formfactor namespaces from the radiation plugin + namespace radiation { - // y position of the foil to calculate transition radiation at - // leave at 0 for no virtual particle propagation - constexpr float_64 foilPosition = 0.0; - } - - } /* end namespace parameters */ - - - //! example of a filter for the relativistic Lorentz factor gamma - struct GammaFilterFunctor - { - //! Gamma value above which the radiation is calculated - static constexpr float_X filterGamma = 5.0; - - template< typename T_Particle > - HDINLINE void operator()( T_Particle& particle ) + namespace radFormFactor_CIC_3D + { + } + namespace radFormFactor_TSC_3D + { + } + namespace radFormFactor_PCS_3D + { + } + namespace radFormFactor_CIC_1Dy + { + } + namespace radFormFactor_Gauss_spherical + { + } + namespace radFormFactor_Gauss_cell + { + } + namespace radFormFactor_incoherent + { + } + namespace radFormFactor_coherent + { + } + } // namespace radiation + + namespace transitionRadiation { - if( - picongpu::gamma( - particle[ picongpu::momentum_ ], - picongpu::traits::attribute::getMass( - particle[ picongpu::weighting_ ], - particle - ) - ) >= filterGamma - ) - particle[ picongpu::transitionRadiationMask_ ] = true; - } - }; - - /** filter to (de)select particles for the radiation calculation - * - * to activate the filter: - * - goto file `speciesDefinition.param` - * - add the attribute `transitionRadiationMask` to the particle species - */ - using GammaFilter = picongpu::particles::manipulators::generic::Free< - GammaFilterFunctor - >; - - /** Compute observation angles - * - * This function is used in the transition radiation plugin kernel to compute - * the observation directions given as a unit vector pointing - * towards a 'virtual' detector - * - * This default setup is an example of a 2D detector array. It computes - * observation directions for 2D virtual detector field - * with its center pointing toward the +y direction (for theta=0, phi=0) - * with observation angles ranging from - * theta = [angle_theta_start : angle_theta_end] - * phi = [angle_phi_start : angle_phi_end ] - * Every observation_id_extern index moves the phi angle from its - * start value toward its end value until the observation_id_extern - * reaches N_split. After that the theta angle moves further from its - * start value towards its end value while phi is reset to its start - * value. - * - * The unit vector pointing towards the observing virtual detector - * can be described using theta and phi by: - * x_value = sin(theta) * cos(phi) - * y_value = cos(theta) - * z_value = sin(theta) * sin(phi) - * These are the standard spherical coordinates. - * - * The example setup describes an detector array of - * 128X128 detectors ranging from 0 to pi for the azimuth angle - * theta and from 0 to 2 pi for the polar angle phi. - * - * If the calculation is only supposed to be done for a single azimuth - * or polar angle, it will use the respective minimal angle. - * - * @param observation_id_extern - * int index that identifies each block on the GPU - * to compute the observation direction - * - * @return unit vector pointing in observation direction - * type: float3_X - */ - HDINLINE float3_X observationDirection(const int observation_id_extern) - { - /* generate two indices from single block index */ - /** split distance of given index - * pseudo-code: - * index_a = index / split_distance - * index_b = index % split_distance - */ - /** get index for computing angle theta: */ - const int indexTheta = observation_id_extern / parameters::nPhi; - - /** step width angle theta */ - const picongpu::float_64 deltaTheta = ( parameters::nTheta > 1 ) ? - ( parameters::thetaMax - parameters::thetaMin ) / ( parameters::nTheta - 1.0 ) : 0.0; - - /** compute observation angles theta */ - const picongpu::float_64 theta = indexTheta * deltaTheta + parameters::thetaMin; - - /** get index for computing angle phi: */ - const int indexPhi = observation_id_extern % parameters::nPhi; - - /** step width angle phi */ - const picongpu::float_64 deltaPhi = ( parameters::nPhi > 1 ) ? - ( parameters::phiMax - parameters::phiMin ) / ( parameters::nPhi - 1.0 ) : 0.0; - - /** compute observation angles phi */ - const picongpu::float_64 phi = indexPhi * deltaPhi - parameters::phiMin; - - /* helper functions for efficient trigonometric calculations */ - picongpu::float_32 sinPhi; - picongpu::float_32 cosPhi; - picongpu::float_32 sinTheta; - picongpu::float_32 cosTheta; - math::sincos( precisionCast< picongpu::float_32 >( phi ), sinPhi, cosPhi ); - math::sincos( precisionCast< picongpu::float_32 >( theta ), sinTheta, cosTheta ); - /** compute observation unit vector */ - return float3_X( sinTheta * cosPhi , cosTheta, sinTheta * sinPhi ); - } - -} // namespace transitionRadiation -} // namespace plugins + namespace linearFrequencies + { + namespace SI + { + //! mimimum frequency of the linear frequency scale in units of [1/s] + constexpr float_64 omegaMin = 0.0; + //! maximum frequency of the linear frequency scale in units of [1/s] + constexpr float_64 omegaMax = 1.06e16; + } // namespace SI + + //! number of frequency values to compute in the linear frequency [unitless] + constexpr unsigned int nOmega = 512; + + } // namespace linearFrequencies + + namespace logFrequencies + { + namespace SI + { + //! mimimum frequency of the logarithmic frequency scale in units of [1/s] + constexpr float_64 omegaMin = 1.0e13; + //! maximum frequency of the logarithmic frequency scale in units of [1/s] + constexpr float_64 omegaMax = 1.0e17; + } // namespace SI + + //! number of frequency values to compute in the logarithmic frequency [unitless] + constexpr unsigned int nOmega = 256; + + } // namespace logFrequencies + + + namespace listFrequencies + { + //! path to text file with frequencies + constexpr char listLocation[] = "/path/to/frequency_list"; + //! number of frequency values to compute if frequencies are given in a file [unitless] + constexpr unsigned int nOmega = 512; + + } // namespace listFrequencies + + + /** selected mode of frequency scaling: + * + * options: + * - linearFrequencies + * - logFrequencies + * - listFrequencies + */ + namespace frequencies = logFrequencies; + + /////////////////////////////////////////////////// + + + /** correct treatment of coherent radiation from macro particles + * + * These formfactors are the same as in the radiation plugin! + * Choose different form factors in order to consider different particle shapes for radiation + * - ::picongpu::plugins::radiation::radFormFactor_CIC_3D ... CIC charge distribution + * - ::picongpu::plugins::radiation::radFormFactor_TSC_3D ... TSC charge distribution + * - ::picongpu::plugins::radiation::radFormFactor_PCS_3D ... PCS charge distribution + * - ::picongpu::plugins::radiation::radFormFactor_CIC_1Dy ... only CIC charge distribution in y + * - ::picongpu::plugins::radiation::radFormFactor_Gauss_spherical ... symmetric Gauss charge distribution + * - ::picongpu::plugins::radiation::radFormFactor_Gauss_cell ... Gauss charge distribution according to + * cell size + * - ::picongpu::plugins::radiation::radFormFactor_incoherent ... only incoherent radiation + * - ::picongpu::plugins::radiation::radFormFactor_coherent ... only coherent radiation + */ + namespace macroParticleFormFactor = ::picongpu::plugins::radiation::radFormFactor_Gauss_spherical; + + /////////////////////////////////////////////////////////// + + namespace parameters + { + /** Number of observation directions + * + * If nPhi or nTheta is equal to 1, the transition radiation will be calculated + * for phiMin or thetaMin respectively. + */ + constexpr unsigned int nPhi = 128; + constexpr unsigned int nTheta = 128; + constexpr unsigned int nObserver = nPhi * nTheta; + + // theta goes from 0 to pi + constexpr float_64 thetaMin = 0.0; + constexpr float_64 thetaMax = picongpu::PI; + + // phi goes from 0 to 2*pi + constexpr float_64 phiMin = 0.0; + constexpr float_64 phiMax = 2 * picongpu::PI; + + namespace SI + { + // y position of the foil to calculate transition radiation at + // leave at 0 for no virtual particle propagation + constexpr float_64 foilPosition = 0.0; + } // namespace SI + + } /* end namespace parameters */ + + + //! example of a filter for the relativistic Lorentz factor gamma + struct GammaFilterFunctor + { + //! Gamma value above which the radiation is calculated + static constexpr float_X filterGamma = 5.0; + + template + HDINLINE void operator()(T_Particle& particle) + { + if(picongpu::gamma( + particle[picongpu::momentum_], + picongpu::traits::attribute::getMass(particle[picongpu::weighting_], particle)) + >= filterGamma) + particle[picongpu::transitionRadiationMask_] = true; + } + }; + + /** filter to (de)select particles for the radiation calculation + * + * to activate the filter: + * - goto file `speciesDefinition.param` + * - add the attribute `transitionRadiationMask` to the particle species + */ + using GammaFilter = picongpu::particles::manipulators::generic::Free; + + /** Compute observation angles + * + * This function is used in the transition radiation plugin kernel to compute + * the observation directions given as a unit vector pointing + * towards a 'virtual' detector + * + * This default setup is an example of a 2D detector array. It computes + * observation directions for 2D virtual detector field + * with its center pointing toward the +y direction (for theta=0, phi=0) + * with observation angles ranging from + * theta = [angle_theta_start : angle_theta_end] + * phi = [angle_phi_start : angle_phi_end ] + * Every observation_id_extern index moves the phi angle from its + * start value toward its end value until the observation_id_extern + * reaches N_split. After that the theta angle moves further from its + * start value towards its end value while phi is reset to its start + * value. + * + * The unit vector pointing towards the observing virtual detector + * can be described using theta and phi by: + * x_value = sin(theta) * cos(phi) + * y_value = cos(theta) + * z_value = sin(theta) * sin(phi) + * These are the standard spherical coordinates. + * + * The example setup describes an detector array of + * 128X128 detectors ranging from 0 to pi for the azimuth angle + * theta and from 0 to 2 pi for the polar angle phi. + * + * If the calculation is only supposed to be done for a single azimuth + * or polar angle, it will use the respective minimal angle. + * + * @param observation_id_extern + * int index that identifies each block on the GPU + * to compute the observation direction + * + * @return unit vector pointing in observation direction + * type: float3_X + */ + HDINLINE float3_X observationDirection(const int observation_id_extern) + { + /* generate two indices from single block index */ + /** split distance of given index + * pseudo-code: + * index_a = index / split_distance + * index_b = index % split_distance + */ + /** get index for computing angle theta: */ + const int indexTheta = observation_id_extern / parameters::nPhi; + + /** step width angle theta */ + const picongpu::float_64 deltaTheta = (parameters::nTheta > 1) + ? (parameters::thetaMax - parameters::thetaMin) / (parameters::nTheta - 1.0) + : 0.0; + + /** compute observation angles theta */ + const picongpu::float_64 theta = indexTheta * deltaTheta + parameters::thetaMin; + + /** get index for computing angle phi: */ + const int indexPhi = observation_id_extern % parameters::nPhi; + + /** step width angle phi */ + const picongpu::float_64 deltaPhi = (parameters::nPhi > 1) + ? (parameters::phiMax - parameters::phiMin) / (parameters::nPhi - 1.0) + : 0.0; + + /** compute observation angles phi */ + const picongpu::float_64 phi = indexPhi * deltaPhi - parameters::phiMin; + + /* helper functions for efficient trigonometric calculations */ + picongpu::float_32 sinPhi; + picongpu::float_32 cosPhi; + picongpu::float_32 sinTheta; + picongpu::float_32 cosTheta; + pmacc::math::sincos(precisionCast(phi), sinPhi, cosPhi); + pmacc::math::sincos(precisionCast(theta), sinTheta, cosTheta); + /** compute observation unit vector */ + return float3_X(sinTheta * cosPhi, cosTheta, sinTheta * sinPhi); + } + + } // namespace transitionRadiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/param/unit.param b/include/picongpu/param/unit.param index 3f8fed5e7b..2f5c181c8c 100644 --- a/include/picongpu/param/unit.param +++ b/include/picongpu/param/unit.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Marco Garten, Heiko Burau +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Marco Garten, Heiko Burau * * This file is part of PIConGPU. * @@ -32,22 +32,23 @@ namespace picongpu /** Unit of time */ constexpr float_64 UNIT_TIME = SI::DELTA_T_SI; /** Unit of length */ - constexpr float_64 UNIT_LENGTH = UNIT_TIME*UNIT_SPEED; + constexpr float_64 UNIT_LENGTH = UNIT_TIME * UNIT_SPEED; namespace particles { /** Number of particles per makro particle (= macro particle weighting) * unit: none */ - constexpr float_X TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE = - float_64( SI::BASE_DENSITY_SI * SI::CELL_WIDTH_SI * SI::CELL_HEIGHT_SI * SI::CELL_DEPTH_SI ) / - float_64( particles::TYPICAL_PARTICLES_PER_CELL ); - } + constexpr float_X TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE + = float_64(SI::BASE_DENSITY_SI * SI::CELL_WIDTH_SI * SI::CELL_HEIGHT_SI * SI::CELL_DEPTH_SI) + / float_64(particles::TYPICAL_PARTICLES_PER_CELL); + } // namespace particles /** Unit of mass */ constexpr float_64 UNIT_MASS = SI::BASE_MASS_SI * double(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE); /** Unit of charge */ - constexpr float_64 UNIT_CHARGE = -1.0 * SI::BASE_CHARGE_SI * double(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE); + constexpr float_64 UNIT_CHARGE + = -1.0 * SI::BASE_CHARGE_SI * double(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE); /** Unit of energy */ constexpr float_64 UNIT_ENERGY = (UNIT_MASS * UNIT_LENGTH * UNIT_LENGTH / (UNIT_TIME * UNIT_TIME)); /** Unit of EField: V/m */ @@ -55,4 +56,4 @@ namespace picongpu //** Unit of BField: Tesla [T] = Vs/m^2 */ constexpr float_64 UNIT_BFIELD = (UNIT_MASS / (UNIT_TIME * UNIT_CHARGE)); -} +} // namespace picongpu diff --git a/include/picongpu/param/xrayScattering.param b/include/picongpu/param/xrayScattering.param new file mode 100644 index 0000000000..58985ac712 --- /dev/null +++ b/include/picongpu/param/xrayScattering.param @@ -0,0 +1,53 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/plugins/xrayScattering/beam/Side.hpp" + +/* preprocessor struct generator */ +#include + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + namespace beam + { + using namespace picongpu::plugins::xrayScattering::beam; + /* Choose from: + * - ZSide + * - YSide + * - XSide + * - ZRSide + * - YRSide + * - XRSide + */ + using ProbingSide = ZSide; + + PMACC_STRUCT( + RotationParam, + (PMACC_C_VALUE(float_X, yawAngle, 0))(PMACC_C_VALUE(float_X, pitchAngle, 0))); + } // namespace beam + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/particles/InitFunctors.hpp b/include/picongpu/particles/InitFunctors.hpp index aca4bb1c6f..d5c52ed217 100644 --- a/include/picongpu/particles/InitFunctors.hpp +++ b/include/picongpu/particles/InitFunctors.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -44,220 +44,184 @@ namespace picongpu { - -namespace particles -{ - -/** call a functor - * - * @tparam T_Functor unary lambda functor - * operator() must take two params - * - first: storage tuple - * - second: current time step - */ -template -struct CallFunctor -{ - using Functor = T_Functor; - - HINLINE void operator()( - const uint32_t currentStep - ) - { - Functor()( currentStep ); - } -}; - -/** Create particle distribution from a normalized density profile - * - * Create particles inside a species. The created particles are macroscopically - * distributed according to a given normalized density profile - * (`T_DensityFunctor`). Their microscopic position inside individual cells is - * determined by the `T_PositionFunctor`. - * - * @note FillAllGaps is automatically called after creation. - * - * @tparam T_DensityFunctor unary lambda functor with profile description, - * see density.param, - * example: picongpu::particles::densityProfiles::Homogenous - * @tparam T_PositionFunctor unary lambda functor with position description, - * see particle.param, - * examples: picongpu::particles::startPosition::Quiet, - * picongpu::particles::startPosition::Random - * @tparam T_SpeciesType type or name as boost::mpl::string of the used species, - * see speciesDefinition.param - */ -template< - typename T_DensityFunctor, - typename T_PositionFunctor, - typename T_SpeciesType = bmpl::_1 -> -struct CreateDensity -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using FrameType = typename SpeciesType::FrameType; - - - using UserDensityFunctor = typename bmpl::apply1::type; - /* add interface for compile time interface validation*/ - using DensityFunctor = densityProfiles::IProfile; - - using UserPositionFunctor = typename bmpl::apply1::type; - /* add interface for compile time interface validation*/ - using PositionFunctor = manipulators::IUnary; - - HINLINE void operator()( const uint32_t currentStep ) - { - DataConnector &dc = Environment<>::get().DataConnector(); - auto speciesPtr = dc.get< SpeciesType >( FrameType::getName(), true ); - - DensityFunctor densityFunctor(currentStep); - PositionFunctor positionFunctor(currentStep); - speciesPtr->initDensityProfile(densityFunctor, positionFunctor, currentStep); - - dc.releaseData( FrameType::getName() ); - } -}; - - -/** Generate particles in a species by deriving and manipulating from another species' particles - * - * Create particles in `T_DestSpeciesType` by deriving (copying) all particles - * and their matching attributes (except `particleId`) from `T_SrcSpeciesType`. - * During the derivation, the particle attributes in can be manipulated with - * `T_ManipulateFunctor`. - * - * @note FillAllGaps is called on on T_DestSpeciesType after the derivation is - * finished. - * If the derivation also manipulates the T_SrcSpeciesType, e.g. in order - * to deactivate some particles for a move, FillAllGaps needs to be - * called for the T_SrcSpeciesType manually in the next step! - * - * @tparam T_Manipulator a pseudo-binary functor accepting two particle species: - * destination and source, - * @see picongpu::particles::manipulators - * @tparam T_SrcSpeciesType type or name as boost::mpl::string of the source species - * @tparam T_DestSpeciesType type or name as boost::mpl::string of the destination species - * @tparam T_SrcFilter picongpu::particles::filter, particle filter type to - * select particles in T_SrcSpeciesType to derive into - * T_DestSpeciesType - */ -template< - typename T_Manipulator, - typename T_SrcSpeciesType, - typename T_DestSpeciesType = bmpl::_1, - typename T_SrcFilter = filter::All -> -struct ManipulateDerive -{ - using DestSpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_DestSpeciesType - >; - using DestFrameType = typename DestSpeciesType::FrameType; - using SrcSpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SrcSpeciesType - >; - using SrcFrameType = typename SrcSpeciesType::FrameType; - - using DestFunctor = typename bmpl::apply1< - T_Manipulator, - DestSpeciesType - >::type; - - using SrcFilter = typename bmpl::apply1< - T_SrcFilter, - SrcSpeciesType - >::type; - - /* note: this is a FilteredManipulator with filter::All for - * destination species, users can filter the destination directly via if's - * in the T_Manipulator. - */ - using FilteredManipulator = manipulators::IBinary< DestFunctor >; - using SrcFilterInterfaced = filter::IUnary< SrcFilter >; - - HINLINE void operator()( const uint32_t currentStep ) - { - DataConnector &dc = Environment<>::get().DataConnector(); - auto speciesPtr = dc.get< DestSpeciesType >( DestFrameType::getName(), true ); - auto srcSpeciesPtr = dc.get< SrcSpeciesType >( SrcFrameType::getName(), true ); - - FilteredManipulator filteredManipulator( currentStep ); - SrcFilterInterfaced srcFilter( currentStep ); - - speciesPtr->deviceDeriveFrom( *srcSpeciesPtr, filteredManipulator, srcFilter ); - - dc.releaseData( DestFrameType::getName() ); - dc.releaseData( SrcFrameType::getName() ); - } -}; - - -/** Generate particles in a species by deriving from another species' particles - * - * Create particles in `T_DestSpeciesType` by deriving (copying) all particles - * and their matching attributes (except `particleId`) from `T_SrcSpeciesType`. - * - * @note FillAllGaps is called on on `T_DestSpeciesType` after the derivation is - * finished. - * - * @tparam T_SrcSpeciesType type or name as boost::mpl::string of the source species - * @tparam T_DestSpeciesType type or name as boost::mpl::string of the destination species - * @tparam T_Filter picongpu::particles::filter, - * particle filter type to select source particles to derive - */ -template< - typename T_SrcSpeciesType, - typename T_DestSpeciesType = bmpl::_1, - typename T_Filter = filter::All -> -struct Derive : ManipulateDerive< - manipulators::generic::None, - T_SrcSpeciesType, - T_DestSpeciesType, - T_Filter -> -{ -}; - - -/** Generate a valid, contiguous list of particle frames - * - * Some operations, such as deactivating or adding particles to a particle - * species can generate "gaps" in our internal particle storage, a list - * of frames. - * - * This operation copies all particles from the end of the frame list to - * "gaps" in the beginning of the frame list. - * After execution, the requirement that all particle frames must be filled - * contiguously with valid particles and that all frames but the last are full - * is fulfilled. - * - * @tparam T_SpeciesType type or name as boost::mpl::string of the particle species - * to fill gaps in memory - */ -template< typename T_SpeciesType = bmpl::_1 > -struct FillAllGaps -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using FrameType = typename SpeciesType::FrameType; - - HINLINE void operator()( const uint32_t currentStep ) + namespace particles { - DataConnector &dc = Environment<>::get().DataConnector(); - auto speciesPtr = dc.get< SpeciesType >( FrameType::getName(), true ); - speciesPtr->fillAllGaps(); - dc.releaseData( FrameType::getName() ); - } -}; - -} // namespace particles + /** call a functor + * + * @tparam T_Functor unary lambda functor + * operator() must take two params + * - first: storage tuple + * - second: current time step + */ + template + struct CallFunctor + { + using Functor = T_Functor; + + HINLINE void operator()(const uint32_t currentStep) + { + Functor()(currentStep); + } + }; + + /** Create particle distribution from a normalized density profile + * + * Create particles inside a species. The created particles are macroscopically + * distributed according to a given normalized density profile + * (`T_DensityFunctor`). Their microscopic position inside individual cells is + * determined by the `T_PositionFunctor`. + * + * @note FillAllGaps is automatically called after creation. + * + * @tparam T_DensityFunctor unary lambda functor with profile description, + * see density.param, + * example: picongpu::particles::densityProfiles::Homogenous + * @tparam T_PositionFunctor unary lambda functor with position description, + * see particle.param, + * examples: picongpu::particles::startPosition::Quiet, + * picongpu::particles::startPosition::Random + * @tparam T_SpeciesType type or name as boost::mpl::string of the used species, + * see speciesDefinition.param + */ + template + struct CreateDensity + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using FrameType = typename SpeciesType::FrameType; + + + using UserDensityFunctor = typename bmpl::apply1::type; + /* add interface for compile time interface validation*/ + using DensityFunctor = densityProfiles::IProfile; + + using UserPositionFunctor = typename bmpl::apply1::type; + /* add interface for compile time interface validation*/ + using PositionFunctor = manipulators::IUnary; + + HINLINE void operator()(const uint32_t currentStep) + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto speciesPtr = dc.get(FrameType::getName(), true); + + DensityFunctor densityFunctor(currentStep); + PositionFunctor positionFunctor(currentStep); + speciesPtr->initDensityProfile(densityFunctor, positionFunctor, currentStep); + + dc.releaseData(FrameType::getName()); + } + }; + + + /** Generate particles in a species by deriving and manipulating from another species' particles + * + * Create particles in `T_DestSpeciesType` by deriving (copying) all particles + * and their matching attributes (except `particleId`) from `T_SrcSpeciesType`. + * During the derivation, the particle attributes in can be manipulated with + * `T_ManipulateFunctor`. + * + * @note FillAllGaps is called on on T_DestSpeciesType after the derivation is + * finished. + * If the derivation also manipulates the T_SrcSpeciesType, e.g. in order + * to deactivate some particles for a move, FillAllGaps needs to be + * called for the T_SrcSpeciesType manually in the next step! + * + * @tparam T_Manipulator a pseudo-binary functor accepting two particle species: + * destination and source, + * @see picongpu::particles::manipulators + * @tparam T_SrcSpeciesType type or name as boost::mpl::string of the source species + * @tparam T_DestSpeciesType type or name as boost::mpl::string of the destination species + * @tparam T_SrcFilter picongpu::particles::filter, particle filter type to + * select particles in T_SrcSpeciesType to derive into + * T_DestSpeciesType + */ + template< + typename T_Manipulator, + typename T_SrcSpeciesType, + typename T_DestSpeciesType = bmpl::_1, + typename T_SrcFilter = filter::All> + struct ManipulateDerive + { + using DestSpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using DestFrameType = typename DestSpeciesType::FrameType; + using SrcSpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using SrcFrameType = typename SrcSpeciesType::FrameType; + + using DestFunctor = typename bmpl::apply1::type; + + using SrcFilter = typename bmpl::apply1::type; + + /* note: this is a FilteredManipulator with filter::All for + * destination species, users can filter the destination directly via if's + * in the T_Manipulator. + */ + using FilteredManipulator = manipulators::IBinary; + using SrcFilterInterfaced = filter::IUnary; + + HINLINE void operator()(const uint32_t currentStep) + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto speciesPtr = dc.get(DestFrameType::getName(), true); + auto srcSpeciesPtr = dc.get(SrcFrameType::getName(), true); + + FilteredManipulator filteredManipulator(currentStep); + SrcFilterInterfaced srcFilter(currentStep); + + speciesPtr->deviceDeriveFrom(*srcSpeciesPtr, filteredManipulator, srcFilter); + + dc.releaseData(DestFrameType::getName()); + dc.releaseData(SrcFrameType::getName()); + } + }; + + + /** Generate particles in a species by deriving from another species' particles + * + * Create particles in `T_DestSpeciesType` by deriving (copying) all particles + * and their matching attributes (except `particleId`) from `T_SrcSpeciesType`. + * + * @note FillAllGaps is called on on `T_DestSpeciesType` after the derivation is + * finished. + * + * @tparam T_SrcSpeciesType type or name as boost::mpl::string of the source species + * @tparam T_DestSpeciesType type or name as boost::mpl::string of the destination species + * @tparam T_Filter picongpu::particles::filter, + * particle filter type to select source particles to derive + */ + template + struct Derive : ManipulateDerive + { + }; + + + /** Generate a valid, contiguous list of particle frames + * + * Some operations, such as deactivating or adding particles to a particle + * species can generate "gaps" in our internal particle storage, a list + * of frames. + * + * This operation copies all particles from the end of the frame list to + * "gaps" in the beginning of the frame list. + * After execution, the requirement that all particle frames must be filled + * contiguously with valid particles and that all frames but the last are full + * is fulfilled. + * + * @tparam T_SpeciesType type or name as boost::mpl::string of the particle species + * to fill gaps in memory + */ + template + struct FillAllGaps + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using FrameType = typename SpeciesType::FrameType; + + HINLINE void operator()(const uint32_t currentStep) + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto speciesPtr = dc.get(FrameType::getName(), true); + speciesPtr->fillAllGaps(); + dc.releaseData(FrameType::getName()); + } + }; + + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/InterpolationForPusher.hpp b/include/picongpu/particles/InterpolationForPusher.hpp index 53645f8c00..0215962f9e 100644 --- a/include/picongpu/particles/InterpolationForPusher.hpp +++ b/include/picongpu/particles/InterpolationForPusher.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Richard Pausch +/* Copyright 2015-2021 Richard Pausch * * This file is part of PIConGPU. * @@ -18,72 +18,63 @@ */ - - #pragma once namespace picongpu { - -/** functor for particle field interpolator - * - * This functor is a simplification of the full - * field to particle interpolator that can be used in the - * particle pusher - */ -template< typename T_Field2PartInt, typename T_MemoryType, typename T_FieldPosition > -struct InterpolationForPusher -{ - using Field2PartInt = T_Field2PartInt; - - HDINLINE - InterpolationForPusher( const T_MemoryType& mem, const T_FieldPosition& fieldPos ) - : m_mem( mem ), m_fieldPos( fieldPos ) + /** functor for particle field interpolator + * + * This functor is a simplification of the full + * field to particle interpolator that can be used in the + * particle pusher + */ + template + struct InterpolationForPusher { - } + using Field2PartInt = T_Field2PartInt; - /* apply shift policy before interpolation */ - template< typename T_PosType, typename T_ShiftPolicy > - HDINLINE - float3_X operator()( const T_PosType& pos, const T_ShiftPolicy& shiftPolicy ) const - { - return Field2PartInt()( shiftPolicy.memory(m_mem, pos), - shiftPolicy.position(pos), - m_fieldPos ); - } + HDINLINE + InterpolationForPusher(const T_MemoryType& mem, const T_FieldPosition& fieldPos) + : m_mem(mem) + , m_fieldPos(fieldPos) + { + } - /* interpolation using given memory and position */ - template< typename T_PosType > - HDINLINE - float3_X operator()( const T_PosType& pos ) const - { - return Field2PartInt()( m_mem, - pos, - m_fieldPos ); - } + /* apply shift policy before interpolation */ + template + HDINLINE float3_X operator()(const T_PosType& pos, const T_ShiftPolicy& shiftPolicy) const + { + return Field2PartInt()(shiftPolicy.memory(m_mem, pos), shiftPolicy.position(pos), m_fieldPos); + } + /* interpolation using given memory and position */ + template + HDINLINE float3_X operator()(const T_PosType& pos) const + { + return Field2PartInt()(m_mem, pos, m_fieldPos); + } -private: - PMACC_ALIGN( m_mem, T_MemoryType ); - PMACC_ALIGN( m_fieldPos, const T_FieldPosition ); -}; + private: + PMACC_ALIGN(m_mem, T_MemoryType); + PMACC_ALIGN(m_fieldPos, const T_FieldPosition); + }; -/** functor to create particle field interpolator - * - * required to get interpolator for pusher - */ -template -struct CreateInterpolationForPusher -{ - template< typename T_MemoryType, typename T_FieldPosition > - HDINLINE - InterpolationForPusher< T_Field2PartInt, T_MemoryType, T_FieldPosition > - operator()( const T_MemoryType& mem, const T_FieldPosition& fieldPos ) + /** functor to create particle field interpolator + * + * required to get interpolator for pusher + */ + template + struct CreateInterpolationForPusher { - return InterpolationForPusher< T_Field2PartInt, T_MemoryType, T_FieldPosition >( mem, fieldPos ); - } -}; + template + HDINLINE InterpolationForPusher operator()( + const T_MemoryType& mem, + const T_FieldPosition& fieldPos) + { + return InterpolationForPusher(mem, fieldPos); + } + }; } // namespace picongpu diff --git a/include/picongpu/particles/Manipulate.hpp b/include/picongpu/particles/Manipulate.hpp index d587140563..24c49137c8 100644 --- a/include/picongpu/particles/Manipulate.hpp +++ b/include/picongpu/particles/Manipulate.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Sergei Bastrakov +/* Copyright 2014-2021 Rene Widera, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -32,121 +32,84 @@ namespace picongpu { -namespace particles -{ -namespace detail -{ - /** Operator to create a filtered functor - */ - template< - typename T_Manipulator, - typename T_Species, - typename T_Filter - > - struct MakeUnaryFilteredFunctor + namespace particles { - private: - using Species = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_Species - >; - using SpeciesFunctor = typename bmpl::apply1< - T_Manipulator, - Species - >::type; - using ParticleFilter = typename bmpl::apply1< - T_Filter, - Species - >::type; - public: - using type = manipulators::IUnary< - SpeciesFunctor, - ParticleFilter - >; - }; -} // namespace detail + namespace detail + { + /** Operator to create a filtered functor + */ + template + struct MakeUnaryFilteredFunctor + { + private: + using Species = pmacc::particles::meta::FindByNameOrType_t; + using SpeciesFunctor = typename bmpl::apply1::type; + using ParticleFilter = typename bmpl::apply1::type; - /** Run a user defined manipulation for each particle of a species - * - * Allows to manipulate attributes of existing particles in a species with - * arbitrary unary functors ("manipulators"). - * - * @warning Does NOT call FillAllGaps after manipulation! If the - * manipulation deactivates particles or creates "gaps" in any - * other way, FillAllGaps needs to be called for the - * `T_Species` manually in the next step! - * - * @tparam T_Manipulator unary lambda functor accepting one particle - * species, - * @see picongpu::particles::manipulators - * @tparam T_Species type or name as boost::mpl::string of the used species - * @tparam T_Filter picongpu::particles::filter, particle filter type to - * select particles in `T_Species` to manipulate - */ - template< - typename T_Manipulator, - typename T_Species = bmpl::_1, - typename T_Filter = filter::All - > - struct Manipulate : public pmacc::particles::algorithm::CallForEach< - pmacc::particles::meta::FindByNameOrType< - VectorAllSpecies, - T_Species - >, - detail::MakeUnaryFilteredFunctor< - T_Manipulator, - T_Species, - T_Filter - > - > - { - }; + public: + using type = manipulators::IUnary; + }; + } // namespace detail + /** Run a user defined manipulation for each particle of a species + * + * Allows to manipulate attributes of existing particles in a species with + * arbitrary unary functors ("manipulators"). + * + * @warning Does NOT call FillAllGaps after manipulation! If the + * manipulation deactivates particles or creates "gaps" in any + * other way, FillAllGaps needs to be called for the + * `T_Species` manually in the next step! + * + * @tparam T_Manipulator unary lambda functor accepting one particle + * species, + * @see picongpu::particles::manipulators + * @tparam T_Species type or name as boost::mpl::string of the used species + * @tparam T_Filter picongpu::particles::filter, particle filter type to + * select particles in `T_Species` to manipulate + */ + template + struct Manipulate + : public pmacc::particles::algorithm::CallForEach< + pmacc::particles::meta::FindByNameOrType, + detail::MakeUnaryFilteredFunctor> + { + }; - /** Apply a manipulation for each particle of a species or a sequence of - * species - * - * This function provides a high-level interface to particle manipulation - * from simulation stages and plugins, but not .param files. The common - * workflow is as follows: - * - select the species to manipulate, often by filtering VectorAllSpecies - * - define a manipulator type; in case the manipulator has a species type - * as a template parameter, use the bmpl::_1 placeholder instead - * - define a filter type when necessary - * - call manipulate() - * - * This is a function-style wrapper around creating a Manipulate object and - * calling its operator(). Unlike Manipulate, it supports both single - * species and sequences of species. - * - * @tparam T_Manipulator unary lambda functor accepting one particle - * species, @see picongpu::particles::manipulators - * @tparam T_Species a single species or a sequence of species; in both - * cases each species is defined by a type or a name - * @tparam T_Filter picongpu::particles::filter, particle filter type to - * select particles in `T_Species` to manipulate via - * `T_DestSpeciesType` - * - * @param currentStep index of the current time iteration - */ - template< - typename T_Manipulator, - typename T_Species, - typename T_Filter = filter::All - > - inline void manipulate( uint32_t const currentStep ) - { - using SpeciesSeq = typename pmacc::ToSeq< T_Species >::type; - using Functor = Manipulate< - T_Manipulator, - bmpl::_1, - T_Filter - >; - pmacc::meta::ForEach< - SpeciesSeq, - Functor - > forEach; - forEach( currentStep ); - } -} //namespace particles -} //namespace picongpu + + /** Apply a manipulation for each particle of a species or a sequence of + * species + * + * This function provides a high-level interface to particle manipulation + * from simulation stages and plugins, but not .param files. The common + * workflow is as follows: + * - select the species to manipulate, often by filtering VectorAllSpecies + * - define a manipulator type; in case the manipulator has a species type + * as a template parameter, use the bmpl::_1 placeholder instead + * - define a filter type when necessary + * - call manipulate() + * + * This is a function-style wrapper around creating a Manipulate object and + * calling its operator(). Unlike Manipulate, it supports both single + * species and sequences of species. + * + * @tparam T_Manipulator unary lambda functor accepting one particle + * species, @see picongpu::particles::manipulators + * @tparam T_Species a single species or a sequence of species; in both + * cases each species is defined by a type or a name + * @tparam T_Filter picongpu::particles::filter, particle filter type to + * select particles in `T_Species` to manipulate via + * `T_DestSpeciesType` + * + * @param currentStep index of the current time iteration + */ + template + inline void manipulate(uint32_t const currentStep) + { + using SpeciesSeq = typename pmacc::ToSeq::type; + using Functor = Manipulate; + pmacc::meta::ForEach forEach; + forEach(currentStep); + } + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/particles/Particles.hpp b/include/picongpu/particles/Particles.hpp index 0fad4e54fe..e55de31dc5 100644 --- a/include/picongpu/particles/Particles.hpp +++ b/include/picongpu/particles/Particles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Marco Garten, Alexander Grund * * This file is part of PIConGPU. @@ -47,227 +47,181 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -#if( PMACC_CUDA_ENABLED != 1 ) -/* dummy because we are not using mallocMC with cupla - * DeviceHeap is defined in `mallocMC.param` - */ -struct DeviceHeap -{ - using AllocatorHandle = int; - - int getAllocatorHandle() +#if(!BOOST_LANG_CUDA && !BOOST_COMP_HIP) + /* dummy because we are not using mallocMC with cupla + * DeviceHeap is defined in `mallocMC.param` + */ + struct DeviceHeap { - return 0; - } -}; + using AllocatorHandle = int; + + int getAllocatorHandle() + { + return 0; + } + }; #endif -/** particle species - * - * @tparam T_Name name of the species [type boost::mpl::string] - * @tparam T_Attributes sequence with attributes [type boost::mpl forward sequence] - * @tparam T_Flags sequence with flags e.g. solver [type boost::mpl forward sequence] - */ -template< - typename T_Name, - typename T_Flags, - typename T_Attributes -> -class Particles : public ParticlesBase< - ParticleDescription< - T_Name, - SuperCellSize, - T_Attributes, - T_Flags, - typename bmpl::if_< - // check if alias boundaryCondition is defined for the species - bmpl::contains< - T_Flags, - typename GetKeyFromAlias< - T_Flags, - boundaryCondition< > - >::type - >, - // resolve the alias - typename pmacc::traits::Resolve< - typename GetKeyFromAlias< - T_Flags, - boundaryCondition< > - >::type - >::type, - // fallback if the species has not defined the alias boundaryCondition - pmacc::HandleGuardRegion< - pmacc::particles::policies::ExchangeParticles, - particles::boundary::CallPluginsAndDeleteParticles - > - >::type - >, - MappingDesc, - DeviceHeap ->, public ISimulationData -{ -public: - - using SpeciesParticleDescription = pmacc::ParticleDescription< - T_Name, - SuperCellSize, - T_Attributes, - T_Flags, - typename bmpl::if_< - // check if alias boundaryCondition is defined for the species - bmpl::contains< - T_Flags, - typename GetKeyFromAlias< - T_Flags, - boundaryCondition< > - >::type - >, - // resolve the alias - typename pmacc::traits::Resolve< - typename GetKeyFromAlias< - T_Flags, - boundaryCondition< > - >::type - >::type, - // fallback if the species has not defined the alias boundaryCondition - pmacc::HandleGuardRegion< - pmacc::particles::policies::ExchangeParticles, - particles::boundary::CallPluginsAndDeleteParticles - > - >::type - >; - using ParticlesBaseType = ParticlesBase; - using FrameType = typename ParticlesBaseType::FrameType; - using FrameTypeBorder = typename ParticlesBaseType::FrameTypeBorder; - using ParticlesBoxType = typename ParticlesBaseType::ParticlesBoxType; - - - Particles(const std::shared_ptr& heap, picongpu::MappingDesc cellDescription, SimulationDataId datasetID); - - void createParticleBuffer(); - - void update( uint32_t const currentStep ); - - template - void initDensityProfile(T_DensityFunctor& densityFunctor, T_PositionFunctor& positionFunctor, const uint32_t currentStep); - - template< - typename T_SrcName, - typename T_SrcAttributes, - typename T_SrcFlags, - typename T_ManipulateFunctor, - typename T_SrcFilterFunctor - > - void deviceDeriveFrom( - Particles< - T_SrcName, - T_SrcAttributes, - T_SrcFlags - >& src, - T_ManipulateFunctor& manipulateFunctor, - T_SrcFilterFunctor& srcFilterFunctor - ); - - SimulationDataId getUniqueId() override; - - /* sync device data to host + /** particle species * - * ATTENTION: - in the current implementation only supercell meta data are copied! - * - the shared (between all species) mallocMC buffer must be copied once - * by the user + * @tparam T_Name name of the species [type boost::mpl::string] + * @tparam T_Attributes sequence with attributes [type boost::mpl forward sequence] + * @tparam T_Flags sequence with flags e.g. solver [type boost::mpl forward sequence] */ - void synchronize() override; - - void syncToDevice() override; - - static pmacc::traits::StringProperty getStringProperties() + template + class Particles + : public ParticlesBase< + ParticleDescription< + T_Name, + SuperCellSize, + T_Attributes, + T_Flags, + typename bmpl::if_< + // check if alias boundaryCondition is defined for the species + bmpl::contains>::type>, + // resolve the alias + typename pmacc::traits::Resolve< + typename GetKeyFromAlias>::type>::type, + // fallback if the species has not defined the alias boundaryCondition + pmacc::HandleGuardRegion< + pmacc::particles::policies::ExchangeParticles, + particles::boundary::CallPluginsAndDeleteParticles>>::type>, + MappingDesc, + DeviceHeap> + , public ISimulationData { - pmacc::traits::StringProperty propList; - const DataSpace periodic = - Environment::get().EnvironmentController().getCommunicator().getPeriodic(); - - for( uint32_t i = 1; i < NumberOfExchanges::value; ++i ) + public: + using SpeciesParticleDescription = pmacc::ParticleDescription< + T_Name, + SuperCellSize, + T_Attributes, + T_Flags, + typename bmpl::if_< + // check if alias boundaryCondition is defined for the species + bmpl::contains>::type>, + // resolve the alias + typename pmacc::traits::Resolve>::type>::type, + // fallback if the species has not defined the alias boundaryCondition + pmacc::HandleGuardRegion< + pmacc::particles::policies::ExchangeParticles, + particles::boundary::CallPluginsAndDeleteParticles>>::type>; + using ParticlesBaseType = ParticlesBase; + using FrameType = typename ParticlesBaseType::FrameType; + using FrameTypeBorder = typename ParticlesBaseType::FrameTypeBorder; + using ParticlesBoxType = typename ParticlesBaseType::ParticlesBoxType; + + + Particles( + const std::shared_ptr& heap, + picongpu::MappingDesc cellDescription, + SimulationDataId datasetID); + + void createParticleBuffer(); + + void update(uint32_t const currentStep); + + template + void initDensityProfile( + T_DensityFunctor& densityFunctor, + T_PositionFunctor& positionFunctor, + const uint32_t currentStep); + + template< + typename T_SrcName, + typename T_SrcAttributes, + typename T_SrcFlags, + typename T_ManipulateFunctor, + typename T_SrcFilterFunctor> + void deviceDeriveFrom( + Particles& src, + T_ManipulateFunctor& manipulateFunctor, + T_SrcFilterFunctor& srcFilterFunctor); + + SimulationDataId getUniqueId() override; + + /* sync device data to host + * + * ATTENTION: - in the current implementation only supercell meta data are copied! + * - the shared (between all species) mallocMC buffer must be copied once + * by the user + */ + void synchronize() override; + + void syncToDevice() override; + + static pmacc::traits::StringProperty getStringProperties() { - // for each planar direction: left right top bottom back front - if( FRONT % i == 0 ) + pmacc::traits::StringProperty propList; + const DataSpace periodic + = Environment::get().EnvironmentController().getCommunicator().getPeriodic(); + + for(uint32_t i = 1; i < NumberOfExchanges::value; ++i) { - const std::string directionName = ExchangeTypeNames()[i]; - const DataSpace relDir = Mask::getRelativeDirections(i); + // for each planar direction: left right top bottom back front + if(FRONT % i == 0) + { + const std::string directionName = ExchangeTypeNames()[i]; + const DataSpace relDir = Mask::getRelativeDirections(i); - const bool isPeriodic = - (relDir * periodic) != DataSpace::create(0); + const bool isPeriodic = (relDir * periodic) != DataSpace::create(0); - std::string boundaryName = "absorbing"; - if( isPeriodic ) - boundaryName = "periodic"; + std::string boundaryName = "absorbing"; + if(isPeriodic) + boundaryName = "periodic"; - if( boundaryName == "absorbing" ) - { - propList[directionName]["param"] = std::string("without field correction"); - } - else - { - propList[directionName]["param"] = std::string("none"); - } + if(boundaryName == "absorbing") + { + propList[directionName]["param"] = std::string("without field correction"); + } + else + { + propList[directionName]["param"] = std::string("none"); + } - propList[directionName]["name"] = boundaryName; + propList[directionName]["name"] = boundaryName; + } } + return propList; } - return propList; - } -private: - SimulationDataId m_datasetID; + template + void push(uint32_t const currentStep); - FieldE *fieldE; - FieldB *fieldB; -}; + private: + SimulationDataId m_datasetID; -namespace traits -{ - template< - typename T_Name, - typename T_Attributes, - typename T_Flags - > - struct GetDataBoxType< - picongpu::Particles< - T_Name, - T_Attributes, - T_Flags - > - > - { - using type = typename picongpu::Particles< - T_Name, - T_Attributes, - T_Flags - >::ParticlesBoxType; + /** Get exchange memory size. + * + * @param ex exchange index calculated from pmacc::typ::ExchangeType, valid range: [0;27) + * @return exchange size in bytes + */ + size_t exchangeMemorySize(uint32_t ex) const; + + FieldE* fieldE; + FieldB* fieldB; }; -} //namespace traits -} //namespace picongpu + + namespace traits + { + template + struct GetDataBoxType> + { + using type = typename picongpu::Particles::ParticlesBoxType; + }; + } // namespace traits +} // namespace picongpu namespace pmacc { -namespace traits -{ - template< - typename T_Name, - typename T_Flags, - typename T_Attributes - > - struct GetCTName< - ::picongpu::Particles< - T_Name, - T_Flags, - T_Attributes - > - > + namespace traits { - using type = T_Name; - }; + template + struct GetCTName<::picongpu::Particles> + { + using type = T_Name; + }; -} // namepsace traits + } // namespace traits } // namespace pmacc diff --git a/include/picongpu/particles/Particles.kernel b/include/picongpu/particles/Particles.kernel index a06c24a03f..9906c1f88b 100644 --- a/include/picongpu/particles/Particles.kernel +++ b/include/picongpu/particles/Particles.kernel @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Wen Fu, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Wen Fu, * Marco Garten, Alexander Grund, Richard Pausch * * This file is part of PIConGPU. @@ -35,10 +35,6 @@ #include #include -#include -#include -#include - #include #include #include @@ -52,521 +48,383 @@ namespace picongpu { - -/** derive new particles from a source species - * - * This functor prepares a source and destination particle box to call - * a user defined functor which allows to derive new particles out of - * another species. - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelDeriveParticles -{ - /** frame-wise derive new particles + /** derive new particles from a source species * - * @tparam T_DestParBox pmacc::ParticlesBox, type of the destination species box - * @tparam T_SrcParBox pmacc::ParticlesBox, type of the source species box - * @tparam T_ManipulateFunctor type of the user functor to derive a particle - * @tparam T_Mapping mapping functor type + * This functor prepares a source and destination particle box to call + * a user defined functor which allows to derive new particles out of + * another species. * - * @param destBox particles box for the destination species - * @param srcBox particles box of the source species - * @param manipulateFunctor functor to derive a particle out of another one - * must fulfill the interface particles::manipulators::IManipulator - * @param srcFilterFunctor unary filter to select in the source species - * which particles to derive - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_DestParBox, - typename T_SrcParBox, - typename T_ManipulateFunctor, - typename T_SrcFilterFunctor, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_DestParBox destBox, - T_SrcParBox srcBox, - T_ManipulateFunctor manipulateFunctor, - T_SrcFilterFunctor srcFilterFunctor, - T_Mapping const mapper - ) const + template + struct KernelDeriveParticles { - using namespace pmacc::particles::operations; - using namespace mappings::threads; - - using DestFramePtr = typename T_DestParBox::FramePtr; - using SrcFramePtr = typename T_SrcParBox::FramePtr; - - constexpr uint32_t frameSize = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorker = T_numWorkers; - - uint32_t const workerIdx = threadIdx.x; - - PMACC_SMEM( - acc, - srcFrame, - SrcFramePtr - ); - PMACC_SMEM( - acc, - destFrame, - DestFramePtr - ); - - DataSpace< simDim > const superCellIdx = mapper.getSuperCellIndex( DataSpace< simDim >( blockIdx ) ); - - // offset of the superCell (in cells, without any guards) to the origin of the local domain - DataSpace< simDim > const localSuperCellOffset = - superCellIdx - mapper.getGuardingSuperCells( ); - - ForEachIdx< - IdxConfig< - 1, - numWorker - > - > onlyMaster{ workerIdx }; - - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - srcFrame = srcBox.getFirstFrame( superCellIdx ); - if( srcFrame.isValid( ) ) + /** frame-wise derive new particles + * + * @tparam T_DestParBox pmacc::ParticlesBox, type of the destination species box + * @tparam T_SrcParBox pmacc::ParticlesBox, type of the source species box + * @tparam T_ManipulateFunctor type of the user functor to derive a particle + * @tparam T_Mapping mapping functor type + * + * @param destBox particles box for the destination species + * @param srcBox particles box of the source species + * @param manipulateFunctor functor to derive a particle out of another one + * must fulfill the interface particles::manipulators::IManipulator + * @param srcFilterFunctor unary filter to select in the source species + * which particles to derive + * @param mapper functor to map a block to a supercell + */ + template< + typename T_DestParBox, + typename T_SrcParBox, + typename T_ManipulateFunctor, + typename T_SrcFilterFunctor, + typename T_Mapping, + typename T_Acc> + DINLINE void operator()( + T_Acc const& acc, + T_DestParBox destBox, + T_SrcParBox srcBox, + T_ManipulateFunctor manipulateFunctor, + T_SrcFilterFunctor srcFilterFunctor, + T_Mapping const mapper) const + { + using namespace pmacc::particles::operations; + using namespace mappings::threads; + + using DestFramePtr = typename T_DestParBox::FramePtr; + using SrcFramePtr = typename T_SrcParBox::FramePtr; + + constexpr uint32_t frameSize = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorker = T_numWorkers; + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + PMACC_SMEM(acc, srcFrame, SrcFramePtr); + PMACC_SMEM(acc, destFrame, DestFramePtr); + + DataSpace const superCellIdx = mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))); + + // offset of the superCell (in cells, without any guards) to the origin of the local domain + DataSpace const localSuperCellOffset = superCellIdx - mapper.getGuardingSuperCells(); + + ForEachIdx> onlyMaster{workerIdx}; + + onlyMaster([&](uint32_t const, uint32_t const) { + srcFrame = srcBox.getFirstFrame(superCellIdx); + if(srcFrame.isValid()) { // we have something to clone - destFrame = destBox.getEmptyFrame( ); + destFrame = destBox.getEmptyFrame(acc); } - } - ); - - auto accManipulator = manipulateFunctor( - acc, - localSuperCellOffset, - WorkerCfg< numWorker >{ workerIdx } - ); - auto accSrcFilter = srcFilterFunctor( - acc, - localSuperCellOffset, - WorkerCfg< numWorker >{ workerIdx } - ); - - __syncthreads( ); - - // move over all Frames - while( srcFrame.isValid( ) ) - { - using ParticleDomCfg = IdxConfig< - frameSize, - numWorker - >; - - // loop over all particles in the frame - ForEachIdx< ParticleDomCfg >{ workerIdx } - ( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - auto parDest = destFrame[ linearIdx ]; - auto parSrc = srcFrame[ linearIdx ]; - if( parSrc[ multiMask_ ] != 1 ) - parSrc.setHandleInvalid( ); + }); - if( accSrcFilter( acc, parSrc ) ) + auto accManipulator = manipulateFunctor(acc, localSuperCellOffset, WorkerCfg{workerIdx}); + auto accSrcFilter = srcFilterFunctor(acc, localSuperCellOffset, WorkerCfg{workerIdx}); + + cupla::__syncthreads(acc); + + // move over all Frames + while(srcFrame.isValid()) + { + using ParticleDomCfg = IdxConfig; + + // loop over all particles in the frame + ForEachIdx{workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + auto parDest = destFrame[linearIdx]; + auto parSrc = srcFrame[linearIdx]; + if(parSrc[multiMask_] != 1) + parSrc.setHandleInvalid(); + + if(accSrcFilter(acc, parSrc)) { - assign( - parDest, - deselect< particleId >( parSrc ) - ); - - accManipulator( - acc, - parDest, - parSrc - ); + assign(parDest, deselect(parSrc)); + + accManipulator(acc, parDest, parSrc); } - } - ); + }); - __syncthreads( ); + cupla::__syncthreads(acc); - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - destBox.setAsLastFrame( - acc, - destFrame, - superCellIdx - ); - - srcFrame = srcBox.getNextFrame( srcFrame ); - if( srcFrame.isValid( ) ) + onlyMaster([&](uint32_t const, uint32_t const) { + destBox.setAsLastFrame(acc, destFrame, superCellIdx); + + srcFrame = srcBox.getNextFrame(srcFrame); + if(srcFrame.isValid()) { - destFrame = destBox.getEmptyFrame( ); + destFrame = destBox.getEmptyFrame(acc); } - } - ); - __syncthreads( ); + }); + cupla::__syncthreads(acc); + } } - } -}; + }; -/** move over all particles - * - * Move frame-wise over a species and call a functor for each particle. - * This kernel is optimized for the particle push step and handles the - * special flag `mustShift` of the supercell to optimize the kernel shift particles - * in pmacc. - * - * @tparam T_numWorkers number of workers - * @tparam T_DataDomain pmacc::SuperCellDescription, compile time data domain - * description with a CORE and GUARD - */ -template< - uint32_t T_numWorkers, - typename T_DataDomain -> -struct KernelMoveAndMarkParticles -{ - /** update all particles + /** move over all particles * - * @tparam T_ParBox pmacc::ParticlesBox, particle box type - * @tparam T_EBox pmacc::DataBox, electric field box type - * @tparam T_BBox pmacc::DataBox, magnetic field box type - * @tparam T_ParticleFunctor particle functor type - * @tparam T_Mapping mapper functor type - * @tparam T_Acc alpaka accelerator type + * Move frame-wise over a species and call a functor for each particle. + * This kernel is optimized for the particle push step and handles the + * special flag `mustShift` of the supercell to optimize the kernel shift particles + * in pmacc. * - * @param alpaka accelerator - * @param pb particle memory - * @param fieldE electric field data - * @param fieldB magnetic field data - * @param particleFunctor functor to manipulate (update) a particle - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers + * @tparam T_DataDomain pmacc::SuperCellDescription, compile time data domain + * description with a CORE and GUARD */ - template< - typename T_ParBox, - typename T_EBox, - typename T_BBox, - typename T_ParticleFunctor, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_ParBox pb, - T_EBox fieldE, - T_BBox fieldB, - uint32_t const currentStep, - T_ParticleFunctor particleFunctor, - T_Mapping mapper - ) const + template + struct KernelMoveAndMarkParticles { - using namespace mappings::threads; + /** update all particles + * + * @tparam T_ParBox pmacc::ParticlesBox, particle box type + * @tparam T_EBox pmacc::DataBox, electric field box type + * @tparam T_BBox pmacc::DataBox, magnetic field box type + * @tparam T_ParticleFunctor particle functor type + * @tparam T_Mapping mapper functor type + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param pb particle memory + * @param fieldE electric field data + * @param fieldB magnetic field data + * @param particleFunctor functor to manipulate (update) a particle + * @param mapper functor to map a block to a supercell + */ + template< + typename T_ParBox, + typename T_EBox, + typename T_BBox, + typename T_ParticleFunctor, + typename T_Mapping, + typename T_Acc> + DINLINE void operator()( + T_Acc const& acc, + T_ParBox pb, + T_EBox fieldE, + T_BBox fieldB, + uint32_t const currentStep, + T_ParticleFunctor particleFunctor, + T_Mapping mapper) const + { + using namespace mappings::threads; + + constexpr uint32_t frameSize = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + using FramePtr = typename T_ParBox::FramePtr; + + DataSpace const block(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + + // relative offset (in cells) to the supercell (including the guard) + DataSpace const superCellOffset = block * SuperCellSize::toRT(); + + using ParticleDomCfg = IdxConfig; + + PMACC_SMEM(acc, mustShift, int); + + // current processed frame + FramePtr frame; + lcellId_t particlesInSuperCell; - constexpr uint32_t frameSize = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; + ForEachIdx> onlyMaster{workerIdx}; - uint32_t const workerIdx = threadIdx.x; + onlyMaster([&](uint32_t const, uint32_t const) { mustShift = 0; }); - using FramePtr = typename T_ParBox::FramePtr; + frame = pb.getLastFrame(block); + particlesInSuperCell = pb.getSuperCell(block).getSizeLastFrame(); - DataSpace< simDim > const block( - mapper.getSuperCellIndex( DataSpace< simDim >( blockIdx ) ) - ); + auto cachedB = CachedBox::create<0, typename T_BBox::ValueType>(acc, T_DataDomain()); + auto cachedE = CachedBox::create<1, typename T_EBox::ValueType>(acc, T_DataDomain()); - // relative offset (in cells) to the supercell (including the guard) - DataSpace< simDim > const superCellOffset = block * SuperCellSize::toRT(); + cupla::__syncthreads(acc); - using ParticleDomCfg = IdxConfig< - frameSize, - numWorkers - >; + // end kernel if we have no frames + if(!frame.isValid()) + return; - PMACC_SMEM( - acc, - mustShift, - int - ); + nvidia::functors::Assign assign; + ThreadCollective collective{workerIdx}; - // current processed frame - FramePtr frame; - lcellId_t particlesInSuperCell; + auto fieldBBlock = fieldB.shift(superCellOffset); + collective(acc, assign, cachedB, fieldBBlock); - ForEachIdx< - IdxConfig< - 1, - numWorkers - > - > onlyMaster{ workerIdx }; + auto fieldEBlock = fieldE.shift(superCellOffset); + collective(acc, assign, cachedE, fieldEBlock); - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) + cupla::__syncthreads(acc); + + // move over frames and call frame solver + while(frame.isValid()) { - mustShift = 0; - } - ); - - frame = pb.getLastFrame( block ); - particlesInSuperCell = pb.getSuperCell( block ).getSizeLastFrame( ); - - auto cachedB = CachedBox::create< - 0, - typename T_BBox::ValueType - >( - acc, - T_DataDomain( ) - ); - auto cachedE = CachedBox::create< - 1, - typename T_EBox::ValueType - >( - acc, - T_DataDomain( ) - ); - - __syncthreads(); - - // end kernel if we have no frames - if( !frame.isValid( ) ) - return; - - nvidia::functors::Assign assign; - ThreadCollective< - T_DataDomain, - numWorkers - > collective{ workerIdx }; - - auto fieldBBlock = fieldB.shift( superCellOffset ); - collective( - acc, - assign, - cachedB, - fieldBBlock - ); - - auto fieldEBlock = fieldE.shift( superCellOffset ); - collective( - acc, - assign, - cachedE, - fieldEBlock - ); - - __syncthreads(); - - // move over frames and call frame solver - while( frame.isValid( ) ) - { - // loop over all particles in the frame - ForEachIdx< ParticleDomCfg >{ workerIdx } - ( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - if( linearIdx < particlesInSuperCell ) + // loop over all particles in the frame + ForEachIdx{workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + if(linearIdx < particlesInSuperCell) { - particleFunctor( - acc, - *frame, - linearIdx, - cachedB, - cachedE, - currentStep, - mustShift - ); + particleFunctor(acc, *frame, linearIdx, cachedB, cachedE, currentStep, mustShift); } - } - ); - // independent for each worker - frame = pb.getPreviousFrame( frame ); - particlesInSuperCell = frameSize; - } + }); + // independent for each worker + frame = pb.getPreviousFrame(frame); + particlesInSuperCell = frameSize; + } - __syncthreads(); + cupla::__syncthreads(acc); - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { + onlyMaster([&](uint32_t const, uint32_t const) { /* set in SuperCell the mustShift flag which is an optimization * for shift particles (pmacc::KernelShiftParticles) */ - if( mustShift == 1 ) + if(mustShift == 1) { - pb.getSuperCell( - mapper.getSuperCellIndex( DataSpace< simDim >( blockIdx ) ) - ).setMustShift( true ); + pb.getSuperCell(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))) + .setMustShift(true); } - } - ); - } -}; - -template -struct PushParticlePerFrame -{ + }); + } + }; - template - DINLINE void operator()( - T_Acc const & acc, - FrameType& frame, - int localIdx, - BoxB& bBox, - BoxE& eBox, - uint32_t const currentStep, - int& mustShift - ) + template + struct PushParticlePerFrame { + template + DINLINE void operator()( + T_Acc const& acc, + FrameType& frame, + int localIdx, + BoxB& bBox, + BoxE& eBox, + uint32_t const currentStep, + int& mustShift) + { + using Block = TVec; + using Field2ParticleInterpolation = T_Field2ParticleInterpolation; - using Block = TVec; - using Field2ParticleInterpolation = T_Field2ParticleInterpolation; + using BType = typename BoxB::ValueType; + using EType = typename BoxE::ValueType; - using BType = typename BoxB::ValueType; - using EType = typename BoxE::ValueType; + auto particle = frame[localIdx]; - auto particle = frame[localIdx]; + floatD_X pos = particle[position_]; + const int particleCellIdx = particle[localCellIdx_]; - floatD_X pos = particle[position_]; - const int particleCellIdx = particle[localCellIdx_]; + DataSpace localCell(DataSpaceOperations::template map(particleCellIdx)); - DataSpace localCell(DataSpaceOperations::template map (particleCellIdx)); + const traits::FieldPosition fieldPosE; + const traits::FieldPosition fieldPosB; - const traits::FieldPosition fieldPosE; - const traits::FieldPosition fieldPosB; + auto functorEfield = CreateInterpolationForPusher()( + eBox.shift(localCell).toCursor(), + fieldPosE()); + auto functorBfield = CreateInterpolationForPusher()( + bBox.shift(localCell).toCursor(), + fieldPosB()); - auto functorEfield = CreateInterpolationForPusher()( eBox.shift(localCell).toCursor(), fieldPosE() ); - auto functorBfield = CreateInterpolationForPusher()( bBox.shift(localCell).toCursor(), fieldPosB() ); + /** @todo this functor should only manipulate the momentum and all changes + * in position and cell below need to go into a separate kernel + */ + PushAlgo push; + push(functorBfield, functorEfield, particle, pos, currentStep); - /** @todo this functor should only manipulate the momentum and all changes - * in position and cell below need to go into a separate kernel - */ - PushAlgo push; - push( - functorBfield, - functorEfield, - particle, - pos, - currentStep - ); - - DataSpace dir; - for (uint32_t i = 0; i < simDim; ++i) - { - /* ATTENTION we must handle float rounding errors - * pos in range [-1;2) - * - * If pos is negative and very near to 0 (e.g. pos < -1e-8) - * and we move pos with pos+=1.0 back to normal in cell postion - * we get a rounding error and pos is assigned to 1. This breaks - * our in cell definition range [0,1) - * - * if pos negativ moveDir is set to -1 - * if pos positive and >1 moveDir is set to +1 - * 0 (zero) if particle stays in cell + DataSpace dir; + for(uint32_t i = 0; i < simDim; ++i) + { + /* ATTENTION we must handle float rounding errors + * pos in range [-1;2) + * + * If pos is negative and very near to 0 (e.g. pos < -1e-8) + * and we move pos with pos+=1.0 back to normal in cell postion + * we get a rounding error and pos is assigned to 1. This breaks + * our in cell definition range [0,1) + * + * if pos negativ moveDir is set to -1 + * if pos positive and >1 moveDir is set to +1 + * 0 (zero) if particle stays in cell + */ + float_X moveDir = math::floor(pos[i]); + /* shift pos back to cell range [0;1)*/ + pos[i] -= moveDir; + /* check for rounding errors and correct them + * if position now is 1 we have a rounding error + * + * We correct moveDir that we not have left the cell + */ + const float_X valueCorrector = math::floor(pos[i]); + /* One has also to correct moveDir for the following reason: + * Imagine a new particle moves to -1e-20, leaving the cell to the left, + * setting moveDir to -1. + * The new in-cell position will be -1e-20 + 1.0, + * which can flip to 1.0 (wrong value). + * We move the particle back to the old cell at position 0.0 and + * moveDir has to be corrected back, too (add +1 again).*/ + moveDir += valueCorrector; + /* If we have corrected moveDir we must set pos to 0 */ + pos[i] -= valueCorrector; + dir[i] = precisionCast(moveDir); + } + particle[position_] = pos; + + /* new local cell position after particle move + * can be out of supercell */ - float_X moveDir = math::floor(pos[i]); - /* shift pos back to cell range [0;1)*/ - pos[i] -= moveDir; - /* check for rounding errors and correct them - * if position now is 1 we have a rounding error + localCell += dir; + + /* ATTENTION ATTENTION we cast to unsigned, this means that a negative + * direction is know a very very big number, than we compare with supercell! * - * We correct moveDir that we not have left the cell + * if particle is inside of the supercell the **unsigned** representation + * of dir is always >= size of the supercell */ - const float_X valueCorrector = math::floor(pos[i]); - /* One has also to correct moveDir for the following reason: - * Imagine a new particle moves to -1e-20, leaving the cell to the left, - * setting moveDir to -1. - * The new in-cell position will be -1e-20 + 1.0, - * which can flip to 1.0 (wrong value). - * We move the particle back to the old cell at position 0.0 and - * moveDir has to be corrected back, too (add +1 again).*/ - moveDir += valueCorrector; - /* If we have corrected moveDir we must set pos to 0 */ - pos[i] -= valueCorrector; - dir[i] = precisionCast(moveDir); - } - particle[position_] = pos; - - /* new local cell position after particle move - * can be out of supercell - */ - localCell += dir; - - /* ATTENTION ATTENTION we cast to unsigned, this means that a negative - * direction is know a very very big number, than we compare with supercell! - * - * if particle is inside of the supercell the **unsigned** representation - * of dir is always >= size of the supercell - */ - for (uint32_t i = 0; i < simDim; ++i) - dir[i] *= precisionCast(localCell[i]) >= precisionCast(TVec::toRT()[i]) ? 1 : 0; + for(uint32_t i = 0; i < simDim; ++i) + dir[i] *= precisionCast(localCell[i]) >= precisionCast(TVec::toRT()[i]) ? 1 : 0; - /* if partice is outside of the supercell we use mod to - * set particle at cell supercellSize to 1 - * and partticle at cell -1 to supercellSize-1 - * % (mod) can't use with negativ numbers, we add one supercellSize to hide this - * - localCell.x() = (localCell.x() + TVec::x) % TVec::x; - localCell.y() = (localCell.y() + TVec::y) % TVec::y; - localCell.z() = (localCell.z() + TVec::z) % TVec::z; - */ + /* if partice is outside of the supercell we use mod to + * set particle at cell supercellSize to 1 + * and partticle at cell -1 to supercellSize-1 + * % (mod) can't use with negativ numbers, we add one supercellSize to hide this + * + localCell.x() = (localCell.x() + TVec::x) % TVec::x; + localCell.y() = (localCell.y() + TVec::y) % TVec::y; + localCell.z() = (localCell.z() + TVec::z) % TVec::z; + */ - /*dir is only +1 or -1 if particle is outside of supercell - * y=cell-(dir*superCell_size) - * y=0 if dir==-1 - * y=superCell_size if dir==+1 - * for dir 0 localCel is not changed - */ - localCell -= (dir * TVec::toRT()); - /*calculate one dimensional cell index*/ - particle[localCellIdx_] = DataSpaceOperations::template map (localCell); - - /* [ dir + int(dir < 0)*3 ] == [ (dir + 3) %3 = y ] - * but without modulo - * y=0 for dir = 0 - * y=1 for dir = 1 - * y=2 for dir = -1 - */ - int direction = 1; - uint32_t exchangeType = 1; // see inlcude/pmacc/types.h for RIGHT, BOTTOM and BACK - for (uint32_t i = 0; i < simDim; ++i) - { - direction += (dir[i] == -1 ? 2 : dir[i]) * exchangeType; - exchangeType *= 3; // =3^i (1=RIGHT, 3=BOTTOM; 9=BACK) - } + /*dir is only +1 or -1 if particle is outside of supercell + * y=cell-(dir*superCell_size) + * y=0 if dir==-1 + * y=superCell_size if dir==+1 + * for dir 0 localCel is not changed + */ + localCell -= (dir * TVec::toRT()); + /*calculate one dimensional cell index*/ + particle[localCellIdx_] = DataSpaceOperations::template map(localCell); + + /* [ dir + int(dir < 0)*3 ] == [ (dir + 3) %3 = y ] + * but without modulo + * y=0 for dir = 0 + * y=1 for dir = 1 + * y=2 for dir = -1 + */ + int direction = 1; + uint32_t exchangeType = 1; // see inlcude/pmacc/types.h for RIGHT, BOTTOM and BACK + for(uint32_t i = 0; i < simDim; ++i) + { + direction += (dir[i] == -1 ? 2 : dir[i]) * exchangeType; + exchangeType *= 3; // =3^i (1=RIGHT, 3=BOTTOM; 9=BACK) + } - particle[multiMask_] = direction; + particle[multiMask_] = direction; - /* set our tuning flag if minimal one particle leave the supercell - * This flag is needed for later fast shift of particles only if needed - */ - if (direction >= 2) - { - /* if we did not use atomic we would get a WAW error */ - nvidia::atomicAllExch(acc, &mustShift, 1, ::alpaka::hierarchy::Threads{}); + /* set our tuning flag if minimal one particle leave the supercell + * This flag is needed for later fast shift of particles only if needed + */ + if(direction >= 2) + { + /* if we did not use atomic we would get a WAW error */ + nvidia::atomicAllExch(acc, &mustShift, 1, ::alpaka::hierarchy::Threads{}); + } } - } -}; - + }; -} //namespace +} // namespace picongpu diff --git a/include/picongpu/particles/Particles.tpp b/include/picongpu/particles/Particles.tpp index f900f0d25a..9bceedcb06 100644 --- a/include/picongpu/particles/Particles.tpp +++ b/include/picongpu/particles/Particles.tpp @@ -1,5 +1,5 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, Felix Schmitt, - * Alexander Grund +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, Felix Schmitt, + * Alexander Grund, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -24,6 +24,7 @@ #include "picongpu/particles/Particles.hpp" #include "picongpu/particles/Particles.kernel" +#include "picongpu/particles/pusher/Traits.hpp" #include "picongpu/particles/traits/GetExchangeMemCfg.hpp" #include @@ -47,343 +48,370 @@ #include #include #include - +#include namespace picongpu { - - -using namespace pmacc; - -template< - typename T_Name, - typename T_Flags, - typename T_Attributes -> -Particles< - T_Name, - T_Flags, - T_Attributes ->::Particles( - const std::shared_ptr& heap, - picongpu::MappingDesc cellDescription, - SimulationDataId datasetID -) : - ParticlesBase< - SpeciesParticleDescription, - picongpu::MappingDesc, - DeviceHeap - >( - heap, - cellDescription - ), - m_datasetID( datasetID ) -{ - using ExchangeMemCfg = GetExchangeMemCfg_t< Particles >; - - size_t sizeOfExchanges = 0u; - - const uint32_t commTag = pmacc::traits::GetUniqueTypeId::uid() + SPECIES_FIRSTTAG; - log ( "communication tag for species %1%: %2%" ) % FrameType::getName( ) % commTag; - - this->particlesBuffer->addExchange( Mask( LEFT ) + Mask( RIGHT ), - ExchangeMemCfg::BYTES_EXCHANGE_X, - commTag); - sizeOfExchanges += ExchangeMemCfg::BYTES_EXCHANGE_X * 2u; - - this->particlesBuffer->addExchange( Mask( TOP ) + Mask( BOTTOM ), - ExchangeMemCfg::BYTES_EXCHANGE_Y, - commTag); - sizeOfExchanges += ExchangeMemCfg::BYTES_EXCHANGE_Y * 2u; - - //edges of the simulation area - this->particlesBuffer->addExchange( Mask( RIGHT + TOP ) + Mask( LEFT + TOP ) + - Mask( LEFT + BOTTOM ) + Mask( RIGHT + BOTTOM ), ExchangeMemCfg::BYTES_EDGES, - commTag); - sizeOfExchanges += ExchangeMemCfg::BYTES_EDGES * 4u; - -#if(SIMDIM==DIM3) - this->particlesBuffer->addExchange( Mask( FRONT ) + Mask( BACK ), ExchangeMemCfg::BYTES_EXCHANGE_Z, - commTag); - sizeOfExchanges += ExchangeMemCfg::BYTES_EXCHANGE_Z * 2u; - - //edges of the simulation area - this->particlesBuffer->addExchange( Mask( FRONT + TOP ) + Mask( BACK + TOP ) + - Mask( FRONT + BOTTOM ) + Mask( BACK + BOTTOM ), - ExchangeMemCfg::BYTES_EDGES, - commTag); - sizeOfExchanges += ExchangeMemCfg::BYTES_EDGES * 4u; - - this->particlesBuffer->addExchange( Mask( FRONT + RIGHT ) + Mask( BACK + RIGHT ) + - Mask( FRONT + LEFT ) + Mask( BACK + LEFT ), - ExchangeMemCfg::BYTES_EDGES, - commTag); - sizeOfExchanges += ExchangeMemCfg::BYTES_EDGES * 4u; - - //corner of the simulation area - this->particlesBuffer->addExchange( Mask( TOP + FRONT + RIGHT ) + Mask( TOP + BACK + RIGHT ) + - Mask( BOTTOM + FRONT + RIGHT ) + Mask( BOTTOM + BACK + RIGHT ), - ExchangeMemCfg::BYTES_CORNER, - commTag); - sizeOfExchanges += ExchangeMemCfg::BYTES_CORNER * 4u; - - this->particlesBuffer->addExchange( Mask( TOP + FRONT + LEFT ) + Mask( TOP + BACK + LEFT ) + - Mask( BOTTOM + FRONT + LEFT ) + Mask( BOTTOM + BACK + LEFT ), - ExchangeMemCfg::BYTES_CORNER, - commTag); - sizeOfExchanges += ExchangeMemCfg::BYTES_CORNER * 4u; -#endif - - /* The buffer size must be multiplied by two because PMacc generates a send - * and receive buffer for each direction. + using namespace pmacc; + + namespace detail + { + /* Helper to check if a member exists + * + * Derived from C++17 std::void_t. + * This implementation will be removed with Void provided by alpaka 0.6.0 release (not included in the 0.6.0rc3 + * we currently using). + */ + template + using Void = void; + + /** Calculate the scaling factor for each direction. + * + * The scaling factor is derived from the reference size of the local domain and a scaling factor provided by + * the user. + * + * @tparam T_ExchangeMemCfg exchange configuration for a species + * @tparam T_Sfinae Type for conditionally specialization (no input parameter) + * @{ + */ + template + struct DirScalingFactor + { + //! @return factor to scale the amount of memory for each direction + static floatD_64 get() + { + return floatD_64::create(1.0); + } + }; + + /** Specialization for species with exchange memory information which provides + * DIR_SCALING_FACTOR and REF_LOCAL_DOM_SIZE + */ + template + struct DirScalingFactor< + T_ExchangeMemCfg, + Void< + decltype(std::declval().DIR_SCALING_FACTOR), + typename T_ExchangeMemCfg::REF_LOCAL_DOM_SIZE>> + { + static floatD_64 get() + { + auto baseLocalCells = T_ExchangeMemCfg::REF_LOCAL_DOM_SIZE::toRT(); + auto userScalingFactor = T_ExchangeMemCfg{}.DIR_SCALING_FACTOR; + + auto localDomSize = Environment::get().SubGrid().getLocalDomain().size; + // set too local domain size in case there is no base volume defined + for(uint32_t d = 0; d < simDim; ++d) + { + if(baseLocalCells[d] <= 0) + baseLocalCells[d] = localDomSize[d]; + } + + auto scale = floatD_64::create(1.0); + for(uint32_t d = 0; d < simDim; ++d) + { + auto dir1 = (d + 1) % simDim; + auto dir2 = (d + 2) % simDim; + // precision: numbers are small, therefore the usage of double is fine + auto scaleDirection = std::ceil( + float_64(localDomSize[dir1]) / float_64(baseLocalCells[dir1]) * float_64(localDomSize[dir2]) + / float_64(baseLocalCells[dir2])); + float_64 scalingFactor = scaleDirection * userScalingFactor[d]; + // do not scale down + scale[d] = std::max(scalingFactor, 1.0); + } + + return scale; + } + }; + + //! @} + } // namespace detail + template + size_t Particles::exchangeMemorySize(uint32_t ex) const + { + // no communication direction + if(ex == 0u) + return 0u; + + using ExchangeMemCfg = GetExchangeMemCfg_t; + // scaling factor for each direction + auto dirScalingFactors = picongpu::detail::DirScalingFactor::get(); + + /* type of the exchange direction + * 1 = plane + * 2 = edge + * 3 = corner + */ + uint32_t relDirType = 0u; + + // scaling factor for the current exchange + float_64 exchangeScalingFactor = 1.0; + + auto relDir = Mask::getRelativeDirections(ex); + for(uint32_t d = 0; d < simDim; ++d) + { + // calculate the exchange type + relDirType += std::abs(relDir[d]); + exchangeScalingFactor *= relDir[d] != 0 ? dirScalingFactors[d] : 1.0; + } + size_t exchangeBytes = 0; + + using ExchangeMemCfg = GetExchangeMemCfg_t; + + // it is a exachange + if(relDirType == 1u) + { + // x, y, z, edge, corner + pmacc::math::Vector requiredMem( + ExchangeMemCfg::BYTES_EXCHANGE_X, + ExchangeMemCfg::BYTES_EXCHANGE_Y, + ExchangeMemCfg::BYTES_EXCHANGE_Z); + + for(uint32_t d = 0; d < simDim; ++d) + if(std::abs(relDir[d]) == 1) + { + exchangeBytes = requiredMem[d]; + break; + } + } + // it is an edge + else if(relDirType == 2u) + exchangeBytes = ExchangeMemCfg::BYTES_EDGES; + // it is a corner + else + exchangeBytes = ExchangeMemCfg::BYTES_CORNER; + + // using double to calculate the memory size is fine, double can precise store integer values up too 2^53 + return exchangeBytes * exchangeScalingFactor; + } + + template + Particles::Particles( + const std::shared_ptr& heap, + picongpu::MappingDesc cellDescription, + SimulationDataId datasetID) + : ParticlesBase(heap, cellDescription) + , m_datasetID(datasetID) + { + size_t sizeOfExchanges = 0u; + + const uint32_t commTag = pmacc::traits::GetUniqueTypeId::uid() + SPECIES_FIRSTTAG; + log("communication tag for species %1%: %2%") % FrameType::getName() % commTag; + + auto const numExchanges = NumberOfExchanges::value; + for(uint32_t exchange = 1u; exchange < numExchanges; ++exchange) + { + auto mask = Mask(exchange); + auto mem = exchangeMemorySize(exchange); + + this->particlesBuffer->addExchange(mask, mem, commTag); + /* The buffer size must be multiplied by two because PMacc generates a send + * and receive buffer for each direction. + */ + sizeOfExchanges += mem * 2u; + }; + + constexpr size_t byteToMiB = 1024u * 1024u; + + log("size for all exchange of species %1% = %2% MiB") % FrameType::getName() + % (static_cast(sizeOfExchanges) / static_cast(byteToMiB)); + } + + template + void Particles::createParticleBuffer() + { + this->particlesBuffer->createParticleBuffer(); + } + + template + SimulationDataId Particles::getUniqueId() + { + return m_datasetID; + } + + template + void Particles::synchronize() + { + this->particlesBuffer->deviceToHost(); + } + + template + void Particles::syncToDevice() + { + } + + /** Launcher of the particle push + * + * @tparam T_Pusher pusher type + * @tparam T_isComposite if the pusher is composite */ - sizeOfExchanges *= 2u; - - constexpr size_t byteToMiB = 1024u * 1024u; - - log< picLog::MEMORY >( "size for all exchange of species %1% = %2% MiB" ) % - FrameType::getName( ) % - ( static_cast< float_64 >( sizeOfExchanges ) / static_cast< float_64 >( byteToMiB ) ); -} - -template< - typename T_Name, - typename T_Flags, - typename T_Attributes -> -void -Particles< - T_Name, - T_Flags, - T_Attributes ->::createParticleBuffer( ) -{ - this->particlesBuffer->createParticleBuffer( ); -} - -template< - typename T_Name, - typename T_Flags, - typename T_Attributes -> -SimulationDataId -Particles< - T_Name, - T_Flags, - T_Attributes ->::getUniqueId( ) -{ - return m_datasetID; -} - -template< - typename T_Name, - typename T_Flags, - typename T_Attributes -> -void -Particles< - T_Name, - T_Flags, - T_Attributes ->::synchronize( ) -{ - this->particlesBuffer->deviceToHost(); -} - -template< - typename T_Name, - typename T_Flags, - typename T_Attributes -> -void -Particles< - T_Name, - T_Flags, - T_Attributes ->::syncToDevice( ) -{ + template::value> + struct PushLauncher; -} - -template< - typename T_Name, - typename T_Flags, - typename T_Attributes -> -void -Particles< - T_Name, - T_Flags, - T_Attributes ->::update( uint32_t const currentStep ) -{ - using PusherAlias = typename GetFlagType >::type; - using ParticlePush = typename pmacc::traits::Resolve::type; - - using InterpolationScheme = typename pmacc::traits::Resolve< - typename GetFlagType< - FrameType, - interpolation< > - >::type - >::type; - - using FrameSolver = PushParticlePerFrame< - ParticlePush, - MappingDesc::SuperCellSize, - InterpolationScheme - >; - - DataConnector & dc = Environment< >::get( ).DataConnector( ); - auto fieldE = dc.get< FieldE >( - FieldE::getName(), - true - ); - auto fieldB = dc.get< FieldB >( - FieldB::getName(), - true - ); - - // adjust interpolation area in particle pusher to allow sub-sampling pushes - using LowerMargin = typename GetLowerMarginPusher< Particles >::type; - using UpperMargin = typename GetUpperMarginPusher< Particles >::type; - - using BlockArea = SuperCellDescription< - typename MappingDesc::SuperCellSize, - LowerMargin, - UpperMargin - >; - - AreaMapping< - CORE + BORDER, - picongpu::MappingDesc - > mapper( this->cellDescription ); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( KernelMoveAndMarkParticles< numWorkers, BlockArea >{ } )( - mapper.getGridDim(), - numWorkers - )( - this->getDeviceParticlesBox( ), - fieldE->getDeviceDataBox( ), - fieldB->getDeviceDataBox( ), - currentStep, - FrameSolver( ), - mapper - ); - - dc.releaseData( FieldE::getName() ); - dc.releaseData( FieldB::getName() ); - - ParticlesBaseType::template shiftParticles < CORE + BORDER > ( ); -} - -template< - typename T_Name, - typename T_Flags, - typename T_Attributes -> -template< - typename T_DensityFunctor, - typename T_PositionFunctor -> -void -Particles< - T_Name, - T_Flags, - T_Attributes ->::initDensityProfile( - T_DensityFunctor& densityFunctor, - T_PositionFunctor& positionFunctor, - const uint32_t currentStep -) -{ - log( "initialize density profile for species %1%" ) % FrameType::getName( ); - - uint32_t const numSlides = MovingWindow::getInstance( ).getSlideCounter( currentStep ); - SubGrid< simDim > const & subGrid = Environment< simDim >::get( ).SubGrid( ); - DataSpace< simDim > localCells = subGrid.getLocalDomain( ).size; - DataSpace< simDim > totalGpuCellOffset = subGrid.getLocalDomain( ).offset; - totalGpuCellOffset.y( ) += numSlides * localCells.y( ); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - AreaMapping< - CORE + BORDER, - picongpu::MappingDesc - > mapper( this->cellDescription ); - PMACC_KERNEL( - KernelFillGridWithParticles< - numWorkers, - Particles - >{} - ) - ( - mapper.getGridDim( ), - numWorkers - ) - ( - densityFunctor, - positionFunctor, - totalGpuCellOffset, - this->particlesBuffer->getDeviceParticleBox( ), - mapper - ); - - this->fillAllGaps( ); -} - -template< - typename T_Name, - typename T_Flags, - typename T_Attributes -> -template< - typename T_SrcName, - typename T_SrcAttributes, - typename T_SrcFlags, - typename T_ManipulateFunctor, - typename T_SrcFilterFunctor -> -void -Particles< - T_Name, - T_Flags, - T_Attributes ->::deviceDeriveFrom( - Particles< - T_SrcName, - T_SrcAttributes, - T_SrcFlags - >& src, - T_ManipulateFunctor& manipulatorFunctor, - T_SrcFilterFunctor& srcFilterFunctor -) -{ - log< picLog::SIMULATION_STATE > ( "clone species %1%" ) % FrameType::getName( ); - - AreaMapping mapper(this->cellDescription); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( KernelDeriveParticles< numWorkers >{ } )( - mapper.getGridDim(), - numWorkers - )( - this->getDeviceParticlesBox( ), - src.getDeviceParticlesBox( ), - manipulatorFunctor, - srcFilterFunctor, - mapper - ); - this->fillAllGaps( ); -} + /** Launcher of the particle push for non-composite pushers + * + * @tparam T_Pusher pusher type + */ + template + struct PushLauncher + { + /** Launch the pusher for all particles of a species + * + * @tparam T_Particles particles type + * @param currentStep current time iteration + */ + template + void operator()(T_Particles&& particles, uint32_t const currentStep) const + { + particles.template push(currentStep); + } + }; + + /** Launcher of the particle push for composite pushers + * + * @tparam T_Pusher pusher type + */ + template + struct PushLauncher + { + /** Launch the pusher for all particles of a species + * + * @tparam T_Particles particles type + * @param currentStep current time iteration + */ + template + void operator()(T_Particles&& particles, uint32_t const currentStep) const + { + /* Here we check for the active pusher and only call PushLauncher for + * that one. Note that we still instantiate both templates, but this + * should be fine as both pushers are eventually getting used (otherwise + * using the composite does not make sense). + */ + auto activePusherIdx = T_CompositePusher::activePusherIdx(currentStep); + if(activePusherIdx == 1) + PushLauncher{}(particles, currentStep); + else if(activePusherIdx == 2) + PushLauncher{}(particles, currentStep); + } + }; + + template + void Particles::update(uint32_t const currentStep) + { + using PusherAlias = typename GetFlagType>::type; + using ParticlePush = typename pmacc::traits::Resolve::type; + // Because of composite pushers, we have to defer using the launcher + PushLauncher{}(*this, currentStep); + } + + /** Do the particle push stage using the given pusher + * + * @tparam T_Pusher non-composite pusher type + * @param currentStep current time iteration + */ + template + template + void Particles::push(uint32_t const currentStep) + { + PMACC_CASSERT_MSG( + _internal_error_particle_push_instantiated_for_composite_pusher, + particles::pusher::IsComposite::type::value == false); + + using InterpolationScheme = + typename pmacc::traits::Resolve>::type>::type; + + using FrameSolver = PushParticlePerFrame; + + DataConnector& dc = Environment<>::get().DataConnector(); + auto fieldE = dc.get(FieldE::getName(), true); + auto fieldB = dc.get(FieldB::getName(), true); + + /* Adjust interpolation area in particle pusher to allow sub-stepping pushes. + * Here were provide an actual pusher and use its actual margins + */ + using LowerMargin = typename GetLowerMarginForPusher::type; + using UpperMargin = typename GetUpperMarginForPusher::type; + + using BlockArea = SuperCellDescription; + + AreaMapping mapper(this->cellDescription); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(KernelMoveAndMarkParticles{}) + (mapper.getGridDim(), numWorkers)( + this->getDeviceParticlesBox(), + fieldE->getDeviceDataBox(), + fieldB->getDeviceDataBox(), + currentStep, + FrameSolver(), + mapper); + + dc.releaseData(FieldE::getName()); + dc.releaseData(FieldB::getName()); + + ParticlesBaseType::template shiftParticles(); + } + + template + template + void Particles::initDensityProfile( + T_DensityFunctor& densityFunctor, + T_PositionFunctor& positionFunctor, + const uint32_t currentStep) + { + log("initialize density profile for species %1%") % FrameType::getName(); + + uint32_t const numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + SubGrid const& subGrid = Environment::get().SubGrid(); + DataSpace localCells = subGrid.getLocalDomain().size; + DataSpace totalGpuCellOffset = subGrid.getLocalDomain().offset; + totalGpuCellOffset.y() += numSlides * localCells.y(); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + AreaMapping mapper(this->cellDescription); + PMACC_KERNEL(KernelFillGridWithParticles{}) + (mapper.getGridDim(), numWorkers)( + densityFunctor, + positionFunctor, + totalGpuCellOffset, + this->particlesBuffer->getDeviceParticleBox(), + mapper); + + this->fillAllGaps(); + } + + template + template< + typename T_SrcName, + typename T_SrcAttributes, + typename T_SrcFlags, + typename T_ManipulateFunctor, + typename T_SrcFilterFunctor> + void Particles::deviceDeriveFrom( + Particles& src, + T_ManipulateFunctor& manipulatorFunctor, + T_SrcFilterFunctor& srcFilterFunctor) + { + log("clone species %1%") % FrameType::getName(); + + AreaMapping mapper(this->cellDescription); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(KernelDeriveParticles{}) + (mapper.getGridDim(), numWorkers)( + this->getDeviceParticlesBox(), + src.getDeviceParticlesBox(), + manipulatorFunctor, + srcFilterFunctor, + mapper); + this->fillAllGaps(); + } } // namespace picongpu diff --git a/include/picongpu/particles/ParticlesFunctors.hpp b/include/picongpu/particles/ParticlesFunctors.hpp index d7565f2e92..a30e5b7fb2 100644 --- a/include/picongpu/particles/ParticlesFunctors.hpp +++ b/include/picongpu/particles/ParticlesFunctors.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Marco Garten, Alexander Grund, +/* Copyright 2014-2021 Rene Widera, Marco Garten, Alexander Grund, * Heiko Burau, Axel Huebl * * This file is part of PIConGPU. @@ -28,11 +28,10 @@ #include #include #include -#include #include "picongpu/particles/traits/GetIonizerList.hpp" -#if( PMACC_CUDA_ENABLED == 1 ) -# include "picongpu/particles/bremsstrahlung/Bremsstrahlung.hpp" +#if(PMACC_CUDA_ENABLED == 1) +# include "picongpu/particles/bremsstrahlung/Bremsstrahlung.hpp" #endif #include "picongpu/particles/traits/GetPhotonCreator.hpp" #include "picongpu/particles/synchrotronPhotons/SynchrotronFunctions.hpp" @@ -49,522 +48,426 @@ namespace picongpu { - -namespace particles -{ - -/** assign nullptr to all attributes of a species - * - * @tparam T_SpeciesType type or name as boost::mpl::string of the species - */ -template -struct AssignNull -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using FrameType = typename SpeciesType::FrameType; - - void operator()() + namespace particles { - DataConnector &dc = Environment<>::get().DataConnector(); - auto species = dc.get< SpeciesType >( FrameType::getName(), true ); - species = nullptr; - dc.releaseData( FrameType::getName() ); - } -}; - -/** create memory for the given species type - * - * @tparam T_SpeciesType type or name as boost::mpl::string of the species - */ -template< typename T_SpeciesType > -struct CreateSpecies -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using FrameType = typename SpeciesType::FrameType; - - template< - typename T_DeviceHeap, - typename T_CellDescription - > - HINLINE void operator()( - const std::shared_ptr& deviceHeap, - T_CellDescription* cellDesc - ) const - { - DataConnector &dc = Environment<>::get().DataConnector(); - dc.consume( - pmacc::memory::makeUnique( - deviceHeap, - *cellDesc, - FrameType::getName() - ) - ); - } -}; - -/** write memory statistics to the terminal - * - * @tparam T_SpeciesType type or name as boost::mpl::string of the species - */ -template< typename T_SpeciesType > -struct LogMemoryStatisticsForSpecies -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using FrameType = typename SpeciesType::FrameType; - - template - HINLINE void operator()( - const std::shared_ptr& deviceHeap - ) const - { -#if( PMACC_CUDA_ENABLED == 1 ) - log("mallocMC: free slots for species %3%: %1% a %2%") % - deviceHeap->getAvailableSlots(sizeof (FrameType)) % - sizeof (FrameType) % - FrameType::getName(); + /** assign nullptr to all attributes of a species + * + * @tparam T_SpeciesType type or name as boost::mpl::string of the species + */ + template + struct AssignNull + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using FrameType = typename SpeciesType::FrameType; + + void operator()() + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto species = dc.get(FrameType::getName(), true); + species = nullptr; + dc.releaseData(FrameType::getName()); + } + }; + + /** create memory for the given species type + * + * @tparam T_SpeciesType type or name as boost::mpl::string of the species + */ + template + struct CreateSpecies + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using FrameType = typename SpeciesType::FrameType; + + template + HINLINE void operator()(const std::shared_ptr& deviceHeap, T_CellDescription* cellDesc) const + { + DataConnector& dc = Environment<>::get().DataConnector(); + dc.consume(std::make_unique(deviceHeap, *cellDesc, FrameType::getName())); + } + }; + + /** write memory statistics to the terminal + * + * @tparam T_SpeciesType type or name as boost::mpl::string of the species + */ + template + struct LogMemoryStatisticsForSpecies + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using FrameType = typename SpeciesType::FrameType; + + template + HINLINE void operator()(const std::shared_ptr& deviceHeap) const + { +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) + log("mallocMC: free slots for species %3%: %1% a %2%") + % deviceHeap->getAvailableSlots( + cupla::manager::Device::get().current(), + cupla::manager::Stream::get().stream(0), + sizeof(FrameType)) + % sizeof(FrameType) % FrameType::getName(); #endif - } -}; - -/** call method reset for the given species - * - * @tparam T_SpeciesType type or name as boost::mpl::string of the species to reset - */ -template< typename T_SpeciesType > -struct CallReset -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using FrameType = typename SpeciesType::FrameType; - - HINLINE void operator()( const uint32_t currentStep ) - { - DataConnector &dc = Environment<>::get().DataConnector(); - auto species = dc.get< SpeciesType >( FrameType::getName(), true ); - species->reset( currentStep ); - dc.releaseData( FrameType::getName() ); - } -}; - -/** Allocate helper fields for FLYlite population kinetics for atomic physics - * - * energy histograms, rate matrix, etc. - * - * @tparam T_SpeciesType type or name as boost::mpl::string of ion species - */ -template< typename T_SpeciesType > -struct CallPopulationKineticsInit -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using FrameType = typename SpeciesType::FrameType; - - using PopulationKineticsSolver = typename pmacc::traits::Resolve< - typename GetFlagType< - FrameType, - populationKinetics<> - >::type - >::type; - - HINLINE void operator()( - pmacc::DataSpace< simDim > gridSizeLocal - ) const - { - PopulationKineticsSolver flylite; - flylite.init( gridSizeLocal, FrameType::getName() ); - } -}; - -/** Calculate FLYlite population kinetics evolving one time step - * - * @tparam T_SpeciesType type or name as boost::mpl::string of ion species - */ -template< typename T_SpeciesType > -struct CallPopulationKinetics -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - - using FrameType = typename SpeciesType::FrameType; - - using PopulationKineticsSolver = typename pmacc::traits::Resolve< - typename GetFlagType< - FrameType, - populationKinetics<> - >::type - >::type; - - HINLINE void operator()( uint32_t currentStep ) const - { - PopulationKineticsSolver flylite{}; - flylite.template update< SpeciesType >( - FrameType::getName(), - currentStep - ); - } -}; - -/** push a species - * - * push is only triggered for species with a pusher - * - * @tparam T_SpeciesType type or name as boost::mpl::string of particle species that is checked - */ -template -struct PushSpecies -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using FrameType = typename SpeciesType::FrameType; - - template - HINLINE void operator()( - const uint32_t currentStep, - const EventTask& eventInt, - T_EventList& updateEvent - ) const - { - DataConnector &dc = Environment<>::get().DataConnector(); - auto species = dc.get< SpeciesType >( FrameType::getName(), true ); - - __startTransaction(eventInt); - species->update(currentStep); - dc.releaseData( FrameType::getName() ); - EventTask ev = __endTransaction(); - updateEvent.push_back(ev); - } -}; - -/** Communicate a species - * - * communication is only triggered for species with a pusher - * - * @tparam T_SpeciesType type or name as boost::mpl::string of particle species that is checked - */ -template -struct CommunicateSpecies -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using FrameType = typename SpeciesType::FrameType; - - template - HINLINE void operator()( - T_EventList& updateEventList, - T_EventList& commEventList - ) const - { - DataConnector &dc = Environment<>::get().DataConnector(); - auto species = dc.get< SpeciesType >( FrameType::getName(), true ); - - EventTask updateEvent(*(updateEventList.begin())); - - updateEventList.pop_front(); - commEventList.push_back( communication::asyncCommunication(*species, updateEvent) ); - - dc.releaseData( FrameType::getName() ); - } -}; + } + }; -/** update momentum, move and communicate all species */ -struct PushAllSpecies -{ - /** push and communicate all species - * - * @param currentStep current simulation step - * @param pushEvent[out] grouped event that marks the end of the species push - * @param commEvent[out] grouped event that marks the end of the species communication - */ - HINLINE void operator()( - const uint32_t currentStep, - const EventTask& eventInt, - EventTask& pushEvent, - EventTask& commEvent - ) const - { - using EventList = std::list; - EventList updateEventList; - EventList commEventList; - - /* push all species */ - using VectorSpeciesWithPusher = typename pmacc::particles::traits::FilterByFlag - < - VectorAllSpecies, - particlePusher<> - >::type; - meta::ForEach< VectorSpeciesWithPusher, particles::PushSpecies< bmpl::_1 > > pushSpecies; - pushSpecies( currentStep, eventInt, updateEventList ); - - /* join all push events */ - for (typename EventList::iterator iter = updateEventList.begin(); - iter != updateEventList.end(); - ++iter) + /** call method reset for the given species + * + * @tparam T_SpeciesType type or name as boost::mpl::string of the species to reset + */ + template + struct CallReset { - pushEvent += *iter; - } - - /* call communication for all species */ - meta::ForEach< VectorSpeciesWithPusher, particles::CommunicateSpecies< bmpl::_1> > communicateSpecies; - communicateSpecies( updateEventList, commEventList ); - - /* join all communication events */ - for (typename EventList::iterator iter = commEventList.begin(); - iter != commEventList.end(); - ++iter) + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using FrameType = typename SpeciesType::FrameType; + + HINLINE void operator()(const uint32_t currentStep) + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto species = dc.get(FrameType::getName(), true); + species->reset(currentStep); + dc.releaseData(FrameType::getName()); + } + }; + + /** Allocate helper fields for FLYlite population kinetics for atomic physics + * + * energy histograms, rate matrix, etc. + * + * @tparam T_SpeciesType type or name as boost::mpl::string of ion species + */ + template + struct CallPopulationKineticsInit { - commEvent += *iter; - } - } -}; - -/** Call an ionization method upon an ion species - * - * \tparam T_SpeciesType type or name as boost::mpl::string of particle species that is going to be ionized with - * ionization scheme T_SelectIonizer - */ -template< typename T_SpeciesType, typename T_SelectIonizer > -struct CallIonizationScheme -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using SelectIonizer = T_SelectIonizer; - using FrameType = typename SpeciesType::FrameType; - - /* define the type of the species to be created - * from inside the ionization model specialization - */ - using DestSpecies = typename SelectIonizer::DestSpecies; - using DestFrameType = typename DestSpecies::FrameType; - - /** Functor implementation - * - * \tparam T_CellDescription contains the number of blocks and blocksize - * that is later passed to the kernel - * \param cellDesc logical block information like dimension and cell sizes - * \param currentStep The current time step - */ - template - HINLINE void operator()( - T_CellDescription cellDesc, - const uint32_t currentStep - ) const - { - DataConnector &dc = Environment<>::get().DataConnector(); - - // alias for pointer on source species - auto srcSpeciesPtr = dc.get< SpeciesType >( FrameType::getName(), true ); - // alias for pointer on destination species - auto electronsPtr = dc.get< DestSpecies >( DestFrameType::getName(), true ); - - SelectIonizer selectIonizer(currentStep); - - creation::createParticlesFromSpecies(*srcSpeciesPtr, *electronsPtr, selectIonizer, cellDesc); - - /* fill the gaps in the created species' particle frames to ensure that only - * the last frame is not completely filled but every other before is full + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using FrameType = typename SpeciesType::FrameType; + + using PopulationKineticsSolver = + typename pmacc::traits::Resolve>::type>::type; + + HINLINE void operator()(pmacc::DataSpace gridSizeLocal) const + { + PopulationKineticsSolver flylite; + flylite.init(gridSizeLocal, FrameType::getName()); + } + }; + + /** Calculate FLYlite population kinetics evolving one time step + * + * @tparam T_SpeciesType type or name as boost::mpl::string of ion species */ - electronsPtr->fillAllGaps(); - - dc.releaseData( FrameType::getName() ); - dc.releaseData( DestFrameType::getName() ); - - } - -}; - -/** Call all ionization schemes of an ion species - * - * Tests if species can be ionized and calls the kernels to do that - * - * \tparam T_SpeciesType type or name as boost::mpl::string of particle species that is checked for ionization - */ -template< typename T_SpeciesType > -struct CallIonization -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using FrameType = typename SpeciesType::FrameType; - - // SelectIonizer will be either the specified one or fallback: None - using SelectIonizerList = typename traits::GetIonizerList< SpeciesType >::type; - - /** Functor implementation - * - * \tparam T_CellDescription contains the number of blocks and blocksize - * that is later passed to the kernel - * \param cellDesc logical block information like dimension and cell sizes - * \param currentStep The current time step - */ - template - HINLINE void operator()( - T_CellDescription cellDesc, - const uint32_t currentStep - ) const - { - DataConnector &dc = Environment<>::get().DataConnector(); - - // only if an ionizer has been specified, this is executed - using hasIonizers = typename HasFlag< FrameType, ionizers<> >::type; - if (hasIonizers::value) + template + struct CallPopulationKinetics { - meta::ForEach< SelectIonizerList, CallIonizationScheme< SpeciesType, bmpl::_1 > > particleIonization; - particleIonization( cellDesc, currentStep ); - } - } + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; -}; + using FrameType = typename SpeciesType::FrameType; -#if( PMACC_CUDA_ENABLED == 1 ) + using PopulationKineticsSolver = + typename pmacc::traits::Resolve>::type>::type; -/** Handles the bremsstrahlung effect for electrons on ions. - * - * @tparam T_ElectronSpecies type or name as boost::mpl::string of electron particle species - */ -template -struct CallBremsstrahlung -{ - using ElectronSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_ElectronSpecies - >; - using ElectronFrameType = typename ElectronSpecies::FrameType; - - using IonSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - typename pmacc::particles::traits::ResolveAliasFromSpecies< - ElectronSpecies, - bremsstrahlungIons<> - >::type - >; - using PhotonSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - typename pmacc::particles::traits::ResolveAliasFromSpecies< - ElectronSpecies, - bremsstrahlungPhotons<> - >::type - >; - using PhotonFrameType = typename PhotonSpecies::FrameType; - using BremsstrahlungFunctor = bremsstrahlung::Bremsstrahlung< - IonSpecies, - ElectronSpecies, - PhotonSpecies - >; - - /** Functor implementation - * - * \tparam T_CellDescription contains the number of blocks and blocksize - * that is later passed to the kernel - * \param cellDesc logical block information like dimension and cell sizes - * \param currentStep the current time step - */ - template - HINLINE void operator()( - T_CellDescription cellDesc, - const uint32_t currentStep, - const ScaledSpectrumMap& scaledSpectrumMap, - const bremsstrahlung::GetPhotonAngle& photonAngle - ) const - { - DataConnector &dc = Environment<>::get().DataConnector(); + HINLINE void operator()(uint32_t currentStep) const + { + PopulationKineticsSolver flylite{}; + flylite.template update(FrameType::getName(), currentStep); + } + }; - /* alias for pointer on source species */ - auto electronSpeciesPtr = dc.get< ElectronSpecies >( ElectronFrameType::getName(), true ); - /* alias for pointer on destination species */ - auto photonSpeciesPtr = dc.get< PhotonSpecies >( PhotonFrameType::getName(), true ); + /** push a species + * + * push is only triggered for species with a pusher + * + * @tparam T_SpeciesType type or name as boost::mpl::string of particle species that is checked + */ + template + struct PushSpecies + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using FrameType = typename SpeciesType::FrameType; + + template + HINLINE void operator()(const uint32_t currentStep, const EventTask& eventInt, T_EventList& updateEvent) + const + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto species = dc.get(FrameType::getName(), true); + + __startTransaction(eventInt); + species->update(currentStep); + dc.releaseData(FrameType::getName()); + EventTask ev = __endTransaction(); + updateEvent.push_back(ev); + } + }; + + /** Communicate a species + * + * communication is only triggered for species with a pusher + * + * @tparam T_SpeciesType type or name as boost::mpl::string of particle species that is checked + */ + template + struct CommunicateSpecies + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using FrameType = typename SpeciesType::FrameType; - const float_X targetZ = GetAtomicNumbers::type::numberOfProtons; + template + HINLINE void operator()(T_EventList& updateEventList, T_EventList& commEventList) const + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto species = dc.get(FrameType::getName(), true); - using namespace bremsstrahlung; - BremsstrahlungFunctor bremsstrahlungFunctor( - scaledSpectrumMap.at(targetZ).getScaledSpectrumFunctor(), - scaledSpectrumMap.at(targetZ).getStoppingPowerFunctor(), - photonAngle.getPhotonAngleFunctor(), - currentStep); + EventTask updateEvent(*(updateEventList.begin())); - creation::createParticlesFromSpecies(*electronSpeciesPtr, *photonSpeciesPtr, bremsstrahlungFunctor, cellDesc); + updateEventList.pop_front(); + commEventList.push_back(communication::asyncCommunication(*species, updateEvent)); - dc.releaseData( ElectronFrameType::getName() ); - dc.releaseData( PhotonFrameType::getName() ); - } + dc.releaseData(FrameType::getName()); + } + }; -}; + /** update momentum, move and communicate all species */ + struct PushAllSpecies + { + /** push and communicate all species + * + * @param currentStep current simulation step + * @param pushEvent[out] grouped event that marks the end of the species push + * @param commEvent[out] grouped event that marks the end of the species communication + */ + HINLINE void operator()( + const uint32_t currentStep, + const EventTask& eventInt, + EventTask& pushEvent, + EventTask& commEvent) const + { + using EventList = std::list; + EventList updateEventList; + EventList commEventList; + + /* push all species */ + using VectorSpeciesWithPusher = + typename pmacc::particles::traits::FilterByFlag>::type; + meta::ForEach> pushSpecies; + pushSpecies(currentStep, eventInt, updateEventList); + + /* join all push events */ + for(typename EventList::iterator iter = updateEventList.begin(); iter != updateEventList.end(); ++iter) + { + pushEvent += *iter; + } + + /* call communication for all species */ + meta::ForEach> communicateSpecies; + communicateSpecies(updateEventList, commEventList); + + /* join all communication events */ + for(typename EventList::iterator iter = commEventList.begin(); iter != commEventList.end(); ++iter) + { + commEvent += *iter; + } + } + }; + + /** Call an ionization method upon an ion species + * + * \tparam T_SpeciesType type or name as boost::mpl::string of particle species that is going to be ionized + * with ionization scheme T_SelectIonizer + */ + template + struct CallIonizationScheme + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using SelectIonizer = T_SelectIonizer; + using FrameType = typename SpeciesType::FrameType; + + /* define the type of the species to be created + * from inside the ionization model specialization + */ + using DestSpecies = typename SelectIonizer::DestSpecies; + using DestFrameType = typename DestSpecies::FrameType; + + /** Functor implementation + * + * \tparam T_CellDescription contains the number of blocks and blocksize + * that is later passed to the kernel + * \param cellDesc logical block information like dimension and cell sizes + * \param currentStep The current time step + */ + template + HINLINE void operator()(T_CellDescription cellDesc, const uint32_t currentStep) const + { + DataConnector& dc = Environment<>::get().DataConnector(); + + // alias for pointer on source species + auto srcSpeciesPtr = dc.get(FrameType::getName(), true); + // alias for pointer on destination species + auto electronsPtr = dc.get(DestFrameType::getName(), true); + + SelectIonizer selectIonizer(currentStep); + + creation::createParticlesFromSpecies(*srcSpeciesPtr, *electronsPtr, selectIonizer, cellDesc); + + /* fill the gaps in the created species' particle frames to ensure that only + * the last frame is not completely filled but every other before is full + */ + electronsPtr->fillAllGaps(); + + dc.releaseData(FrameType::getName()); + dc.releaseData(DestFrameType::getName()); + } + }; + + /** Call all ionization schemes of an ion species + * + * Tests if species can be ionized and calls the kernels to do that + * + * \tparam T_SpeciesType type or name as boost::mpl::string of particle species that is checked for ionization + */ + template + struct CallIonization + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using FrameType = typename SpeciesType::FrameType; + + // SelectIonizer will be either the specified one or fallback: None + using SelectIonizerList = typename traits::GetIonizerList::type; + + /** Functor implementation + * + * \tparam T_CellDescription contains the number of blocks and blocksize + * that is later passed to the kernel + * \param cellDesc logical block information like dimension and cell sizes + * \param currentStep The current time step + */ + template + HINLINE void operator()(T_CellDescription cellDesc, const uint32_t currentStep) const + { + DataConnector& dc = Environment<>::get().DataConnector(); + + // only if an ionizer has been specified, this is executed + using hasIonizers = typename HasFlag>::type; + if(hasIonizers::value) + { + meta::ForEach> particleIonization; + particleIonization(cellDesc, currentStep); + } + } + }; + +#if(PMACC_CUDA_ENABLED == 1) + + /** Handles the bremsstrahlung effect for electrons on ions. + * + * @tparam T_ElectronSpecies type or name as boost::mpl::string of electron particle species + */ + template + struct CallBremsstrahlung + { + using ElectronSpecies = pmacc::particles::meta::FindByNameOrType_t; + using ElectronFrameType = typename ElectronSpecies::FrameType; + + using IonSpecies = pmacc::particles::meta::FindByNameOrType_t< + VectorAllSpecies, + typename pmacc::particles::traits::ResolveAliasFromSpecies>:: + type>; + using PhotonSpecies = pmacc::particles::meta::FindByNameOrType_t< + VectorAllSpecies, + typename pmacc::particles::traits::ResolveAliasFromSpecies>:: + type>; + using PhotonFrameType = typename PhotonSpecies::FrameType; + using BremsstrahlungFunctor = bremsstrahlung::Bremsstrahlung; + + /** Functor implementation + * + * \tparam T_CellDescription contains the number of blocks and blocksize + * that is later passed to the kernel + * \param cellDesc logical block information like dimension and cell sizes + * \param currentStep the current time step + */ + template + HINLINE void operator()( + T_CellDescription cellDesc, + const uint32_t currentStep, + const ScaledSpectrumMap& scaledSpectrumMap, + const bremsstrahlung::GetPhotonAngle& photonAngle) const + { + DataConnector& dc = Environment<>::get().DataConnector(); + + /* alias for pointer on source species */ + auto electronSpeciesPtr = dc.get(ElectronFrameType::getName(), true); + /* alias for pointer on destination species */ + auto photonSpeciesPtr = dc.get(PhotonFrameType::getName(), true); + + const float_X targetZ = GetAtomicNumbers::type::numberOfProtons; + + using namespace bremsstrahlung; + BremsstrahlungFunctor bremsstrahlungFunctor( + scaledSpectrumMap.at(targetZ).getScaledSpectrumFunctor(), + scaledSpectrumMap.at(targetZ).getStoppingPowerFunctor(), + photonAngle.getPhotonAngleFunctor(), + currentStep); + + creation::createParticlesFromSpecies( + *electronSpeciesPtr, + *photonSpeciesPtr, + bremsstrahlungFunctor, + cellDesc); + + dc.releaseData(ElectronFrameType::getName()); + dc.releaseData(PhotonFrameType::getName()); + } + }; #endif -/** Handles the synchrotron radiation emission of photons from electrons - * - * @tparam T_ElectronSpecies type or name as boost::mpl::string of electron particle species - */ -template -struct CallSynchrotronPhotons -{ - using ElectronSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_ElectronSpecies - >; - using ElectronFrameType = typename ElectronSpecies::FrameType; - - /* SelectedPhotonCreator will be either PhotonCreator or fallback: CreatorBase */ - using SelectedPhotonCreator = typename traits::GetPhotonCreator< ElectronSpecies >::type; - using PhotonSpecies = typename SelectedPhotonCreator::PhotonSpecies; - using PhotonFrameType = typename PhotonSpecies::FrameType; - - /** Functor implementation - * - * \tparam T_CellDescription contains the number of blocks and blocksize - * that is later passed to the kernel - * \param cellDesc logical block information like dimension and cell sizes - * \param currentStep The current time step - * \param synchrotronFunctions synchrotron functions wrapper object - */ - template - HINLINE void operator()( - T_CellDescription cellDesc, - const uint32_t currentStep, - const synchrotronPhotons::SynchrotronFunctions& synchrotronFunctions - ) const - { - DataConnector &dc = Environment<>::get().DataConnector(); - - /* alias for pointer on source species */ - auto electronSpeciesPtr = dc.get< ElectronSpecies >( ElectronFrameType::getName(), true ); - /* alias for pointer on destination species */ - auto photonSpeciesPtr = dc.get< PhotonSpecies >( PhotonFrameType::getName(), true ); - - using namespace synchrotronPhotons; - SelectedPhotonCreator photonCreator( - synchrotronFunctions.getCursor(SynchrotronFunctions::first), - synchrotronFunctions.getCursor(SynchrotronFunctions::second)); - - creation::createParticlesFromSpecies(*electronSpeciesPtr, *photonSpeciesPtr, photonCreator, cellDesc); - - dc.releaseData( ElectronFrameType::getName() ); - dc.releaseData( PhotonFrameType::getName() ); - } - -}; - -} // namespace particles + /** Handles the synchrotron radiation emission of photons from electrons + * + * @tparam T_ElectronSpecies type or name as boost::mpl::string of electron particle species + */ + template + struct CallSynchrotronPhotons + { + using ElectronSpecies = pmacc::particles::meta::FindByNameOrType_t; + using ElectronFrameType = typename ElectronSpecies::FrameType; + + /* SelectedPhotonCreator will be either PhotonCreator or fallback: CreatorBase */ + using SelectedPhotonCreator = typename traits::GetPhotonCreator::type; + using PhotonSpecies = typename SelectedPhotonCreator::PhotonSpecies; + using PhotonFrameType = typename PhotonSpecies::FrameType; + + /** Functor implementation + * + * \tparam T_CellDescription contains the number of blocks and blocksize + * that is later passed to the kernel + * \param cellDesc logical block information like dimension and cell sizes + * \param currentStep The current time step + * \param synchrotronFunctions synchrotron functions wrapper object + */ + template + HINLINE void operator()( + T_CellDescription cellDesc, + const uint32_t currentStep, + const synchrotronPhotons::SynchrotronFunctions& synchrotronFunctions) const + { + DataConnector& dc = Environment<>::get().DataConnector(); + + /* alias for pointer on source species */ + auto electronSpeciesPtr = dc.get(ElectronFrameType::getName(), true); + /* alias for pointer on destination species */ + auto photonSpeciesPtr = dc.get(PhotonFrameType::getName(), true); + + using namespace synchrotronPhotons; + SelectedPhotonCreator photonCreator( + synchrotronFunctions.getCursor(SynchrotronFunctions::first), + synchrotronFunctions.getCursor(SynchrotronFunctions::second)); + + creation::createParticlesFromSpecies(*electronSpeciesPtr, *photonSpeciesPtr, photonCreator, cellDesc); + + dc.releaseData(ElectronFrameType::getName()); + dc.releaseData(PhotonFrameType::getName()); + } + }; + + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ParticlesInit.kernel b/include/picongpu/particles/ParticlesInit.kernel index a6673db7cc..0893ef933e 100644 --- a/include/picongpu/particles/ParticlesInit.kernel +++ b/include/picongpu/particles/ParticlesInit.kernel @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -37,35 +37,29 @@ namespace picongpu { - - /** returns the particle density. + /** Return physical particle density value for the given cell + * + * That is, the number of real (physical, not macro-) particles, + * in PIC units of volume**-3. + * Takes into account base density and density ratio of the species. + * The resulting density is assumed constant inside a cell. * - * That means: - * The REAL number of particles density in units of volume**-3, - * normed to UNIT_LENGHT**3 - * That is NOT the species' macro particle density. + * @tparam T_Species particle species type + * @tparam T_DensityProfile density functor type, + * follows densityProfiles::IProfile concept * - * @param offset The gpu offset (left top front cell in 3D) - * @param cellIdx the current cell on this gpu - * @return a float_X which stands for the real number of particles per volume + * @param totalCellOffset total offset from the start of the global + * simulation area, including all slides [in cells] */ - template< - typename T_Species, - typename T_DensityProfile - > - DINLINE float_X calcRealDensity( - T_DensityProfile & densityFunctor, - DataSpace< simDim > const & totalGpuCellIdx - ) + template + DINLINE float_X calcRealDensity(T_DensityProfile& densityFunctor, DataSpace const& totalCellOffset) { PMACC_CASSERT_MSG( Please_deselect_densityProfileInitMethod_for_your_species_or_set_BASE_DENSITY_to_a_value_greater_than_0, - BASE_DENSITY > float_X( 0.0 ) - ); - - float_X const densityRatioOfSpecies = traits::GetDensityRatio< T_Species >::type::getValue( ); + BASE_DENSITY > float_X(0.0)); - float_X const value = densityFunctor( totalGpuCellIdx ) * BASE_DENSITY * densityRatioOfSpecies; + float_X const densityRatioOfSpecies = traits::GetDensityRatio::type::getValue(); + float_X const value = densityFunctor(totalCellOffset) * BASE_DENSITY * densityRatioOfSpecies; return value; } @@ -74,10 +68,7 @@ namespace picongpu * @tparam T_numWorkers number of workers * @tparam T_Species picongpu::Particles, species type which is initialized */ - template< - uint32_t T_numWorkers, - typename T_Species - > + template struct KernelFillGridWithParticles { /** fill supercell grid with particles @@ -103,270 +94,163 @@ namespace picongpu typename T_PositionFunctor, typename T_ParBox, typename T_Mapping, - typename T_Acc - > - DINLINE void - operator()( - T_Acc const & acc, + typename T_Acc> + DINLINE void operator()( + T_Acc const& acc, T_DensityProfile densityFunctor, T_PositionFunctor positionFunctor, - DataSpace< simDim > const totalGpuCellOffset, + DataSpace const totalGpuCellOffset, T_ParBox pb, - T_Mapping mapper - ) const + T_Mapping mapper) const { using namespace mappings::threads; - constexpr uint32_t frameSize = pmacc::math::CT::volume< SuperCellSize >::type::value; - PMACC_CONSTEXPR_CAPTURE uint32_t cellsPerSupercell = pmacc::math::CT::volume< SuperCellSize >::type::value; + constexpr uint32_t frameSize = pmacc::math::CT::volume::type::value; + PMACC_CONSTEXPR_CAPTURE uint32_t cellsPerSupercell = pmacc::math::CT::volume::type::value; constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; using FramePtr = typename T_ParBox::FramePtr; using FrameType = typename T_ParBox::FrameType; using ParticleType = typename FrameType::ParticleType; - DataSpace< simDim > const superCells( mapper.getGridSuperCells( ) ); - - PMACC_SMEM( - acc, - frame, - FramePtr - ); - PMACC_SMEM( - acc, - finished, - int - ); - - DataSpace< simDim > const superCellIdx( - mapper.getSuperCellIndex( DataSpace( blockIdx ) ) - ); + DataSpace const superCells(mapper.getGridSuperCells()); + + PMACC_SMEM(acc, frame, FramePtr); + PMACC_SMEM(acc, finished, int); + + DataSpace const superCellIdx(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); /* offset of the superCell relative to the local domain [in supercells] (without guarding supercells) */ - DataSpace< simDim > const localSuperCellOffset( - superCellIdx - mapper.getGuardingSuperCells() - ); + DataSpace const localSuperCellOffset(superCellIdx - mapper.getGuardingSuperCells()); - using ParticleDomCfg = IdxConfig< - frameSize, - numWorkers - >; + using ParticleDomCfg = IdxConfig; - using SuperCellDomCfg = IdxConfig< - cellsPerSupercell, - numWorkers - >; + using SuperCellDomCfg = IdxConfig; - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); + ForEachIdx forEachParticle(workerIdx); /* number of particles to create for each cell (virtual worker) */ - memory::CtxArray< - uint32_t, - SuperCellDomCfg - > - numParsPerCellCtx( 0 ); + memory::CtxArray numParsPerCellCtx(0); /* create for each virtual thread a position functor instance */ memory::CtxArray< - decltype( - positionFunctor( - acc, - alpaka::core::declval< DataSpace< simDim > const >( ), - /* cellsPerSupercell is used because each virtual worker - * is creating **exactly one** functor - */ - alpaka::core::declval< WorkerCfg< cellsPerSupercell > const >( ) - ) - ), - SuperCellDomCfg - > - positionFunctorCtx{ }; + decltype(positionFunctor( + acc, + alpaka::core::declval const>(), + /* cellsPerSupercell is used because each virtual worker + * is creating **exactly one** functor + */ + alpaka::core::declval const>())), + SuperCellDomCfg> + positionFunctorCtx{}; - ForEachIdx< - IdxConfig< - 1, - numWorkers - > - > onlyMaster{ workerIdx }; + ForEachIdx> onlyMaster{workerIdx}; /* reset shared memory flag if a virtual worker needs to create a particle */ - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - finished = 1; - } - ); - - __syncthreads(); + onlyMaster([&](uint32_t const, uint32_t const) { finished = 1; }); - // initialize the position functor for each cell in the supercell - ForEachIdx< - IdxConfig< - cellsPerSupercell, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - /* cell index within the superCell */ - DataSpace< simDim > const cellIdx = DataSpaceOperations< simDim >::template map< SuperCellSize >( linearIdx ); - - /* cell offset to the begin of the simulation */ - DataSpace< simDim > const totalCellOffset = - totalGpuCellOffset + - localSuperCellOffset * SuperCellSize::toRT() + - cellIdx; - float_X const realDensity = calcRealDensity< T_Species >( - densityFunctor, - totalCellOffset - ); - - /** @bug volatile is required for CUDA 9.2 and sm_60 else the compiler will - * optimize out `if(realParticlesPerCell > 0.0_X)` later on. - */ - volatile float_X const realParticlesPerCell = realDensity * CELL_VOLUME; + cupla::__syncthreads(acc); - // create an independent position functor for each cell in the supercell - positionFunctorCtx[ idx ] = positionFunctor( + // initialize the position functor for each cell in the supercell + ForEachIdx>{ + workerIdx}([&](uint32_t const linearIdx, uint32_t const idx) { + /* cell index within the superCell */ + DataSpace const cellIdx = DataSpaceOperations::template map(linearIdx); + + /* cell offset to the begin of the simulation */ + DataSpace const totalCellOffset + = totalGpuCellOffset + localSuperCellOffset * SuperCellSize::toRT() + cellIdx; + float_X const realDensity = calcRealDensity(densityFunctor, totalCellOffset); + + /** @bug volatile is required for CUDA 9.2 and sm_60 else the compiler will + * optimize out `if(realParticlesPerCell > 0.0_X)` later on. + */ + volatile float_X const realParticlesPerCell = realDensity * CELL_VOLUME; + + // create an independent position functor for each cell in the supercell + positionFunctorCtx[idx] + = positionFunctor(acc, localSuperCellOffset, WorkerCfg{linearIdx}); + + if(realParticlesPerCell > 0.0_X) + numParsPerCellCtx[idx] + = positionFunctorCtx[idx].template numberOfMacroParticles(realParticlesPerCell); + + if(numParsPerCellCtx[idx] > 0) + nvidia::atomicAllExch( acc, - localSuperCellOffset, - WorkerCfg< cellsPerSupercell >{ linearIdx } - ); + &finished, + 0, + ::alpaka::hierarchy::Threads{}); // one or more cells have particles to create - if(realParticlesPerCell > 0.0_X) - numParsPerCellCtx[ idx ] = - positionFunctorCtx[ idx ].template numberOfMacroParticles< ParticleType >( realParticlesPerCell ); + return numParsPerCellCtx[idx]; + }); - if( numParsPerCellCtx[ idx ] > 0 ) - nvidia::atomicAllExch( - acc, - &finished, - 0, - ::alpaka::hierarchy::Threads{} - ); //one or more cells have particles to create + cupla::__syncthreads(acc); - return numParsPerCellCtx[ idx ]; - } - ); - - __syncthreads(); - - if( finished == 1 ) + if(finished == 1) return; // if there is no particle which has to be created - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - frame = pb.getEmptyFrame(); - pb.setAsLastFrame( - acc, - frame, - superCellIdx - ); - } - ); + onlyMaster([&](uint32_t const, uint32_t const) { + frame = pb.getEmptyFrame(acc); + pb.setAsLastFrame(acc, frame, superCellIdx); + }); // distribute the particles within the cell do { // wait that master updates the current used frame - __syncthreads(); + cupla::__syncthreads(acc); - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - finished = 1; - } - ); + onlyMaster([&](uint32_t const, uint32_t const) { finished = 1; }); - __syncthreads(); + cupla::__syncthreads(acc); - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(numParsPerCellCtx[idx] > 0u) { - if( numParsPerCellCtx[ idx ] > 0u ) + auto particle = frame[linearIdx]; + + /** we now initialize all attributes of the new particle to their default values + * some attributes, such as the position, localCellIdx, weighting or the + * multiMask (\see AttrToIgnore) of the particle will be set individually + * in the following lines since they are already known at this point. + */ { - auto particle = frame[ linearIdx ]; - - /** we now initialize all attributes of the new particle to their default values - * some attributes, such as the position, localCellIdx, weighting or the - * multiMask (\see AttrToIgnore) of the particle will be set individually - * in the following lines since they are already known at this point. - */ - { - using ParticleAttrList = typename FrameType::ValueTypeSeq; - using AttrToIgnore = bmpl::vector4< - position<>, - multiMask, - localCellIdx, - weighting - >; - using ParticleCleanedAttrList = typename ResolveAndRemoveFromSeq< - ParticleAttrList, - AttrToIgnore - >::type; - - meta::ForEach< - ParticleCleanedAttrList, - SetAttributeToDefault< bmpl::_1 > - > setToDefault; - setToDefault( particle ); - } - particle[ multiMask_ ] = 1; - particle[ localCellIdx_ ] = linearIdx; - // initialize position and weighting - positionFunctorCtx[ idx ]( acc, particle ); - - numParsPerCellCtx[ idx ]--; - if( numParsPerCellCtx[ idx ] > 0 ) - nvidia::atomicAllExch( - acc, - &finished, - 0, - ::alpaka::hierarchy::Threads{} - ); // one or more cells have particles to create + using ParticleAttrList = typename FrameType::ValueTypeSeq; + using AttrToIgnore = bmpl::vector4, multiMask, localCellIdx, weighting>; + using ParticleCleanedAttrList = + typename ResolveAndRemoveFromSeq::type; + + meta::ForEach> setToDefault; + setToDefault(particle); } + particle[multiMask_] = 1; + particle[localCellIdx_] = linearIdx; + // initialize position and weighting + positionFunctorCtx[idx](acc, particle); + + numParsPerCellCtx[idx]--; + if(numParsPerCellCtx[idx] > 0) + nvidia::atomicAllExch( + acc, + &finished, + 0, + ::alpaka::hierarchy::Threads{}); // one or more cells have particles to create } - ); + }); - __syncthreads(); + cupla::__syncthreads(acc); - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) + onlyMaster([&](uint32_t const, uint32_t const) { + if(finished == 0) { - if( finished == 0 ) - { - frame = pb.getEmptyFrame(); - pb.setAsLastFrame( - acc, - frame, - superCellIdx - ); - } + frame = pb.getEmptyFrame(acc); + pb.setAsLastFrame(acc, frame, superCellIdx); } - ); - } - while( finished == 0 ); + }); + } while(finished == 0); } }; diff --git a/include/picongpu/particles/access/Cell2Particle.hpp b/include/picongpu/particles/access/Cell2Particle.hpp index c655dba172..48a8c62b61 100644 --- a/include/picongpu/particles/access/Cell2Particle.hpp +++ b/include/picongpu/particles/access/Cell2Particle.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -28,34 +28,41 @@ namespace picongpu { -namespace particleAccess -{ - -#define TEMPLATE_ARGS(Z, N, _) typename Arg ## N -#define NORMAL_ARGS(Z, N, _) Arg ## N arg ## N + namespace particleAccess + { +#define TEMPLATE_ARGS(Z, N, _) typename Arg##N +#define NORMAL_ARGS(Z, N, _) Arg##N arg##N -#define CELL2PARTICLE_OPERATOR(Z, N, _) \ - template \ - DINLINE void operator()(T_Acc const & acc, TParticlesBox pb, const uint32_t workerIdx, const CellIndex& cellIndex, Functor functor, T_Filter filter BOOST_PP_ENUM_TRAILING(N, NORMAL_ARGS, _)); \ +#define CELL2PARTICLE_OPERATOR(Z, N, _) \ + template< \ + typename T_Acc, \ + typename TParticlesBox, \ + typename CellIndex, \ + typename Functor, \ + typename T_Filter BOOST_PP_ENUM_TRAILING(N, TEMPLATE_ARGS, _)> \ + DINLINE void operator()( \ + T_Acc const& acc, \ + TParticlesBox pb, \ + const uint32_t workerIdx, \ + const CellIndex& cellIndex, \ + Functor functor, \ + T_Filter filter BOOST_PP_ENUM_TRAILING(N, NORMAL_ARGS, _)); -template< - typename SuperCellSize, - uint32_t T_numWorkers -> -struct Cell2Particle -{ - using result_type = void; - static constexpr uint32_t numWorkers = T_numWorkers; + template + struct Cell2Particle + { + using result_type = void; + static constexpr uint32_t numWorkers = T_numWorkers; - BOOST_PP_REPEAT(5, CELL2PARTICLE_OPERATOR, _) -}; + BOOST_PP_REPEAT(5, CELL2PARTICLE_OPERATOR, _) + }; #undef CELL2PARTICLE_OPERATOR #undef TEMPLATE_ARGS #undef NORMAL_ARGS -} // namespace particleAccess + } // namespace particleAccess } // namespace picongpu #include "Cell2Particle.tpp" diff --git a/include/picongpu/particles/access/Cell2Particle.tpp b/include/picongpu/particles/access/Cell2Particle.tpp index d9195d9d66..cfb5657910 100644 --- a/include/picongpu/particles/access/Cell2Particle.tpp +++ b/include/picongpu/particles/access/Cell2Particle.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -31,107 +31,78 @@ namespace picongpu { -namespace particleAccess -{ - -#define TEMPLATE_ARGS(Z, N, _) typename Arg ## N -#define NORMAL_ARGS(Z, N, _) Arg ## N arg ## N -#define ARGS(Z, N, _) arg ## N + namespace particleAccess + { +#define TEMPLATE_ARGS(Z, N, _) typename Arg##N +#define NORMAL_ARGS(Z, N, _) Arg##N arg##N +#define ARGS(Z, N, _) arg##N -#define CELL2PARTICLE_OPERATOR(Z, N, _) \ -template \ -template \ -DINLINE void Cell2Particle::operator() \ -(T_Acc const & acc, TParticlesBox pb, const uint32_t workerIdx, const CellIndex& cellIndex, Functor functor, T_Filter filter \ -BOOST_PP_ENUM_TRAILING(N, NORMAL_ARGS, _)) \ -{ \ - using namespace mappings::threads; \ - constexpr uint32_t numWorkers = T_numWorkers; \ - constexpr lcellId_t maxParticlesInFrame = pmacc::math::CT::volume< typename TParticlesBox::FrameType::SuperCellSize >::type::value; \ - CellIndex superCellIdx = cellIndex / (CellIndex)SuperCellSize::toRT(); \ - \ - using FramePtr = typename TParticlesBox::FramePtr; \ - using Frame = typename TParticlesBox::FrameType; \ - PMACC_SMEM( acc, frame, FramePtr ); \ - PMACC_SMEM( acc, particlesInSuperCell, uint16_t ); \ - ForEachIdx< \ - IdxConfig< \ - 1, \ - numWorkers \ - > \ - > onlyMaster{ workerIdx }; \ - \ - onlyMaster( \ - [&]( \ - uint32_t const, \ - uint32_t const \ - ) \ - { \ - frame = pb.getLastFrame(superCellIdx); \ - particlesInSuperCell = pb.getSuperCell(superCellIdx).getSizeLastFrame(); \ - } \ - ); \ - __syncthreads(); \ - \ - if (!frame.isValid()) return; /* leave kernel if we have no frames*/ \ - \ - auto accFilter = filter( \ - acc, \ - superCellIdx - GuardSize::toRT(), \ - mappings::threads::WorkerCfg< numWorkers >{ workerIdx } \ - ); \ - \ - while (frame.isValid()) \ - { \ - using ParticleDomCfg = IdxConfig< \ - maxParticlesInFrame, \ - numWorkers \ - >; \ - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); \ - forEachParticle( \ - [&]( \ - uint32_t const linearThreadIdx, \ - uint32_t const \ - ) \ - { \ - if (linearThreadIdx < particlesInSuperCell) \ - { \ - if( \ - accFilter( \ - acc, \ - frame[ linearThreadIdx ] \ - ) \ - ) \ - functor( \ - acc, \ - frame, linearThreadIdx \ - BOOST_PP_ENUM_TRAILING(N, ARGS, _) \ - ); \ - } \ - } \ - ); \ - __syncthreads(); \ - onlyMaster( \ - [&]( \ - uint32_t const, \ - uint32_t const \ - ) \ - { \ - frame = pb.getPreviousFrame(frame); \ - particlesInSuperCell = pmacc::math::CT::volume::type::value; \ - } \ - ); \ - __syncthreads(); \ - } \ -} +#define CELL2PARTICLE_OPERATOR(Z, N, _) \ + template \ + template< \ + typename T_Acc, \ + typename TParticlesBox, \ + typename CellIndex, \ + typename Functor, \ + typename T_Filter BOOST_PP_ENUM_TRAILING(N, TEMPLATE_ARGS, _)> \ + DINLINE void Cell2Particle::operator()( \ + T_Acc const& acc, \ + TParticlesBox pb, \ + const uint32_t workerIdx, \ + const CellIndex& cellIndex, \ + Functor functor, \ + T_Filter filter BOOST_PP_ENUM_TRAILING(N, NORMAL_ARGS, _)) \ + { \ + using namespace mappings::threads; \ + constexpr uint32_t numWorkers = T_numWorkers; \ + constexpr lcellId_t maxParticlesInFrame \ + = pmacc::math::CT::volume::type::value; \ + CellIndex superCellIdx = cellIndex / (CellIndex) SuperCellSize::toRT(); \ + \ + using FramePtr = typename TParticlesBox::FramePtr; \ + using Frame = typename TParticlesBox::FrameType; \ + PMACC_SMEM(acc, frame, FramePtr); \ + PMACC_SMEM(acc, particlesInSuperCell, uint16_t); \ + ForEachIdx> onlyMaster{workerIdx}; \ + \ + onlyMaster([&](uint32_t const, uint32_t const) { \ + frame = pb.getLastFrame(superCellIdx); \ + particlesInSuperCell = pb.getSuperCell(superCellIdx).getSizeLastFrame(); \ + }); \ + cupla::__syncthreads(acc); \ + \ + if(!frame.isValid()) \ + return; /* leave kernel if we have no frames*/ \ + \ + auto accFilter \ + = filter(acc, superCellIdx - GuardSize::toRT(), mappings::threads::WorkerCfg{workerIdx}); \ + \ + while(frame.isValid()) \ + { \ + using ParticleDomCfg = IdxConfig; \ + ForEachIdx forEachParticle(workerIdx); \ + forEachParticle([&](uint32_t const linearThreadIdx, uint32_t const) { \ + if(linearThreadIdx < particlesInSuperCell) \ + { \ + if(accFilter(acc, frame[linearThreadIdx])) \ + functor(acc, frame, linearThreadIdx BOOST_PP_ENUM_TRAILING(N, ARGS, _)); \ + } \ + }); \ + cupla::__syncthreads(acc); \ + onlyMaster([&](uint32_t const, uint32_t const) { \ + frame = pb.getPreviousFrame(frame); \ + particlesInSuperCell = pmacc::math::CT::volume::type::value; \ + }); \ + cupla::__syncthreads(acc); \ + } \ + } -BOOST_PP_REPEAT(5, CELL2PARTICLE_OPERATOR, _) + BOOST_PP_REPEAT(5, CELL2PARTICLE_OPERATOR, _) #undef CELL2PARTICLE_OPERATOR #undef TEMPLATE_ARGS #undef NORMAL_ARGS #undef ARGS -} // namespace particleAccess + } // namespace particleAccess } // namespace picongpu diff --git a/include/picongpu/particles/boundary/CallPluginsAndDeleteParticles.hpp b/include/picongpu/particles/boundary/CallPluginsAndDeleteParticles.hpp index 141756e61d..882cb1e1dd 100644 --- a/include/picongpu/particles/boundary/CallPluginsAndDeleteParticles.hpp +++ b/include/picongpu/particles/boundary/CallPluginsAndDeleteParticles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -27,50 +27,40 @@ namespace picongpu { -namespace particles -{ -namespace boundary -{ - /** - * Guard handler policy calling all registered plugins when particles - * leave the global simulation volume. This class serves as policy for - * the `ParticleDescription` template class. - * - * For each plugin the method `IPlugin::onParticleLeave()` is called. - * After that the guard particles are deleted. - */ - struct CallPluginsAndDeleteParticles + namespace particles { - template< class T_Particles > - void - handleOutgoing( - T_Particles & particles, - int32_t const direction - ) const + namespace boundary { - using Plugins = std::list; - Plugins plugins = Environment<>::get().PluginConnector().getAllPlugins(); - - for( Plugins::iterator iter = plugins.begin(); iter != plugins.end(); iter++ ) + /** + * Guard handler policy calling all registered plugins when particles + * leave the global simulation volume. This class serves as policy for + * the `ParticleDescription` template class. + * + * For each plugin the method `IPlugin::onParticleLeave()` is called. + * After that the guard particles are deleted. + */ + struct CallPluginsAndDeleteParticles { - ( *iter )->onParticleLeave( - T_Particles::FrameType::getName(), - direction - ); - } + template + void handleOutgoing(T_Particles& particles, int32_t const direction) const + { + using Plugins = std::list; + Plugins plugins = Environment<>::get().PluginConnector().getAllPlugins(); + + for(Plugins::iterator iter = plugins.begin(); iter != plugins.end(); iter++) + { + (*iter)->onParticleLeave(T_Particles::FrameType::getName(), direction); + } - particles.deleteGuardParticles( direction ); - } + particles.deleteGuardParticles(direction); + } - template< class T_Particles > - void - handleIncoming( - T_Particles &, - int32_t const - ) const - {} - }; + template + void handleIncoming(T_Particles&, int32_t const) const + { + } + }; -} // namespace particles -} // namespace boundary + } // namespace boundary + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/bremsstrahlung/Bremsstrahlung.hpp b/include/picongpu/particles/bremsstrahlung/Bremsstrahlung.hpp index 053beb4ff4..47e76bdc1e 100644 --- a/include/picongpu/particles/bremsstrahlung/Bremsstrahlung.hpp +++ b/include/picongpu/particles/bremsstrahlung/Bremsstrahlung.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -33,155 +33,146 @@ namespace picongpu { -namespace particles -{ -namespace bremsstrahlung -{ - -/** Handling of the Bremsstrahlung effect. - * - * Here the screened Bethe-Heitler cross section is used. See e.g.: - * Salvat, F., et al. "Monte Carlo simulation of bremsstrahlung emission by electrons." - * Radiation Physics and Chemistry 75.10 (2006): 1201-1219. - * - * The numerics separates the energy spectrum into two parts. In the low-energy part - * photon emission is neglected and a drag force is applied to the electrons. In the high-energy part - * photons are created in addition to the drag force. - * - * Electron deflection is treated as screened Rutherford scattering, see e.g. Jackson, chap. 13.5 - * - * The photon emission angle is taken from the Lorentz-boosted dipole radiation formula, - * see e.g. Jackson, chap. 15.2 - * - * \tparam T_ElectronSpecies - * \tparam T_PhotonSpecies - */ -template -struct Bremsstrahlung -{ - using IonSpecies = T_IonSpecies; - using ElectronSpecies = T_ElectronSpecies; - using PhotonSpecies = T_PhotonSpecies; - - using FrameType = typename ElectronSpecies::FrameType; - - /* specify field to particle interpolation scheme */ - using Field2ParticleInterpolation = typename pmacc::traits::Resolve< - typename GetFlagType >::type - >::type; - - /* margins around the supercell for the interpolation of the field on the cells */ - using LowerMargin = typename GetMargin::LowerMargin; - using UpperMargin = typename GetMargin::UpperMargin; - - /* relevant area of a block */ - using BlockArea = SuperCellDescription< - typename MappingDesc::SuperCellSize, - LowerMargin, - UpperMargin - >; - - BlockArea BlockDescription; - - using TVec = MappingDesc::SuperCellSize; - - using ValueTypeIonDensity = FieldTmp::ValueType; - -private: - /* global memory ion density field device databoxes */ - PMACC_ALIGN(ionDensityBox, FieldTmp::DataBoxType); - /* shared memory ion density device databoxes */ - PMACC_ALIGN(cachedIonDensity, DataBox >); - - PMACC_ALIGN(scaledSpectrumFunctor, ScaledSpectrum::LookupTableFunctor); - PMACC_ALIGN(stoppingPowerFunctor, ScaledSpectrum::LookupTableFunctor); - PMACC_ALIGN(getPhotonAngleFunctor, GetPhotonAngle::GetPhotonAngleFunctor); - - PMACC_ALIGN(photonMom, float3_X); - - /* random number generator */ - using RNGFactory = pmacc::random::RNGProvider; - using Distribution = pmacc::random::distributions::Uniform; - using RandomGen = typename RNGFactory::GetRandomType::type; - RandomGen randomGen; - -public: - /* host constructor initializing member */ - HINLINE Bremsstrahlung( - const ScaledSpectrum::LookupTableFunctor& scaledSpectrumFunctor, - const ScaledSpectrum::LookupTableFunctor& stoppingPowerFunctor, - const GetPhotonAngle::GetPhotonAngleFunctor& getPhotonAngleFunctor, - const uint32_t currentStep); - - /** Initialization function on device - * - * \brief Cache ion density field on device - * and initialize possible prerequisites, like e.g. random number generator. - * - * This function will be called inline on the device which must happen BEFORE threads diverge - * during loop execution. The reason for this is the `__syncthreads()` call which is necessary after - * initializing the ion density field in shared memory. - */ - template< typename T_Acc > - DINLINE void init( - T_Acc const & acc, - const DataSpace& blockCell, - const int& linearThreadIdx, - const DataSpace& localCellOffset - ); - - /** cache fields used by this functor - * - * @warning this is a collective method and calls synchronize - * - * @tparam T_Acc alpaka accelerator type - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * - * @param acc alpaka accelerator - * @param blockCell relative offset (in cells) to the local domain plus the guarding cells - * @param workerCfg configuration of the worker - */ - template< - typename T_Acc , - typename T_WorkerCfg - > - DINLINE void collectiveInit( - const T_Acc & acc, - const DataSpace& blockCell, - const T_WorkerCfg & workerCfg - ); - - /** Rotates a vector to a given polar angle and a random azimuthal angle. - * - * @param vec vector to be rotated - * @param theta polar angle - * @return rotated vector - */ - template< typename T_Acc > - DINLINE float3_X scatterByTheta(const T_Acc& acc, const float3_X vec, const float_X theta); - - /** Return the number of target particles to be created from each source particle. - * - * Called for each frame of the source species. - * - * @param sourceFrame Frame of the source species - * @param localIdx Index of the source particle within frame - * @return number of particle to be created from each source particle - */ - template< typename T_Acc > - DINLINE unsigned int numNewParticles(const T_Acc& acc, FrameType& sourceFrame, int localIdx); - - /** Functor implementation. - * - * Called once for each single particle creation. - * - * \tparam Electron type of electron which creates the photon - * \tparam Photon type of photon that is created - */ - template - DINLINE void operator()(const T_Acc& acc, Electron& electron, Photon& photon); -}; - -} // namespace bremsstrahlung -} // namespace particles + namespace particles + { + namespace bremsstrahlung + { + /** Handling of the Bremsstrahlung effect. + * + * Here the screened Bethe-Heitler cross section is used. See e.g.: + * Salvat, F., et al. "Monte Carlo simulation of bremsstrahlung emission by electrons." + * Radiation Physics and Chemistry 75.10 (2006): 1201-1219. + * + * The numerics separates the energy spectrum into two parts. In the low-energy part + * photon emission is neglected and a drag force is applied to the electrons. In the high-energy part + * photons are created in addition to the drag force. + * + * Electron deflection is treated as screened Rutherford scattering, see e.g. Jackson, chap. 13.5 + * + * The photon emission angle is taken from the Lorentz-boosted dipole radiation formula, + * see e.g. Jackson, chap. 15.2 + * + * \tparam T_ElectronSpecies + * \tparam T_PhotonSpecies + */ + template + struct Bremsstrahlung + { + using IonSpecies = T_IonSpecies; + using ElectronSpecies = T_ElectronSpecies; + using PhotonSpecies = T_PhotonSpecies; + + using FrameType = typename ElectronSpecies::FrameType; + + /* specify field to particle interpolation scheme */ + using Field2ParticleInterpolation = + typename pmacc::traits::Resolve>::type>::type; + + /* margins around the supercell for the interpolation of the field on the cells */ + using LowerMargin = typename GetMargin::LowerMargin; + using UpperMargin = typename GetMargin::UpperMargin; + + /* relevant area of a block */ + using BlockArea = SuperCellDescription; + + BlockArea BlockDescription; + + using TVec = MappingDesc::SuperCellSize; + + using ValueTypeIonDensity = FieldTmp::ValueType; + + private: + /* global memory ion density field device databoxes */ + PMACC_ALIGN(ionDensityBox, FieldTmp::DataBoxType); + /* shared memory ion density device databoxes */ + PMACC_ALIGN( + cachedIonDensity, + DataBox>); + + PMACC_ALIGN(scaledSpectrumFunctor, ScaledSpectrum::LookupTableFunctor); + PMACC_ALIGN(stoppingPowerFunctor, ScaledSpectrum::LookupTableFunctor); + PMACC_ALIGN(getPhotonAngleFunctor, GetPhotonAngle::GetPhotonAngleFunctor); + + PMACC_ALIGN(photonMom, float3_X); + + /* random number generator */ + using RNGFactory = pmacc::random::RNGProvider; + using Distribution = pmacc::random::distributions::Uniform; + using RandomGen = typename RNGFactory::GetRandomType::type; + RandomGen randomGen; + + public: + /* host constructor initializing member */ + HINLINE Bremsstrahlung( + const ScaledSpectrum::LookupTableFunctor& scaledSpectrumFunctor, + const ScaledSpectrum::LookupTableFunctor& stoppingPowerFunctor, + const GetPhotonAngle::GetPhotonAngleFunctor& getPhotonAngleFunctor, + const uint32_t currentStep); + + /** Initialization function on device + * + * \brief Cache ion density field on device + * and initialize possible prerequisites, like e.g. random number generator. + * + * This function will be called inline on the device which must happen BEFORE threads diverge + * during loop execution. The reason for this is the `cupla::__syncthreads( acc )` call which is + * necessary after initializing the ion density field in shared memory. + */ + template + DINLINE void init( + T_Acc const& acc, + const DataSpace& blockCell, + const int& linearThreadIdx, + const DataSpace& localCellOffset); + + /** cache fields used by this functor + * + * @warning this is a collective method and calls synchronize + * + * @tparam T_Acc alpaka accelerator type + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * + * @param acc alpaka accelerator + * @param blockCell relative offset (in cells) to the local domain plus the guarding cells + * @param workerCfg configuration of the worker + */ + template + DINLINE void collectiveInit( + const T_Acc& acc, + const DataSpace& blockCell, + const T_WorkerCfg& workerCfg); + + /** Rotates a vector to a given polar angle and a random azimuthal angle. + * + * @param vec vector to be rotated + * @param theta polar angle + * @return rotated vector + */ + template + DINLINE float3_X scatterByTheta(const T_Acc& acc, const float3_X vec, const float_X theta); + + /** Return the number of target particles to be created from each source particle. + * + * Called for each frame of the source species. + * + * @param sourceFrame Frame of the source species + * @param localIdx Index of the source particle within frame + * @return number of particle to be created from each source particle + */ + template + DINLINE unsigned int numNewParticles(const T_Acc& acc, FrameType& sourceFrame, int localIdx); + + /** Functor implementation. + * + * Called once for each single particle creation. + * + * \tparam Electron type of electron which creates the photon + * \tparam Photon type of photon that is created + */ + template + DINLINE void operator()(const T_Acc& acc, Electron& electron, Photon& photon); + }; + + } // namespace bremsstrahlung + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/bremsstrahlung/Bremsstrahlung.tpp b/include/picongpu/particles/bremsstrahlung/Bremsstrahlung.tpp index 6c472071fa..64584f9654 100644 --- a/include/picongpu/particles/bremsstrahlung/Bremsstrahlung.tpp +++ b/include/picongpu/particles/bremsstrahlung/Bremsstrahlung.tpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -30,7 +30,6 @@ #include "picongpu/particles/traits/GetAtomicNumbers.hpp" #include -#include #include #include #include @@ -39,278 +38,244 @@ namespace picongpu { -namespace particles -{ -namespace bremsstrahlung -{ - -template -Bremsstrahlung::Bremsstrahlung( - const ScaledSpectrum::LookupTableFunctor& scaledSpectrumFunctor, - const ScaledSpectrum::LookupTableFunctor& stoppingPowerFunctor, - const GetPhotonAngle::GetPhotonAngleFunctor& getPhotonAngleFunctor, - const uint32_t currentStep) - : scaledSpectrumFunctor(scaledSpectrumFunctor), - stoppingPowerFunctor(stoppingPowerFunctor), - getPhotonAngleFunctor(getPhotonAngleFunctor), - photonMom(float3_X::create(0)), - randomGen(RNGFactory::createRandom()) -{ - DataConnector &dc = Environment<>::get().DataConnector(); - - /* initialize pointers on host-side tmp-field databoxes */ - auto fieldIonDensity = dc.get< FieldTmp >( FieldTmp::getUniqueId( 0 ), true ); - /* reset values to zero */ - fieldIonDensity->getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.0)); - - /* load species without copying the particle data to the host */ - auto ionSpecies = dc.get< T_IonSpecies >( T_IonSpecies::FrameType::getName(), true ); - - /* compute ion density */ - using DensitySolver = typename particleToGrid::CreateFieldTmpOperation< - T_IonSpecies, - particleToGrid::derivedAttributes::Density - >::type::Solver; - fieldIonDensity->template computeValue< CORE + BORDER, DensitySolver >(*ionSpecies, currentStep); - dc.releaseData(T_IonSpecies::FrameType::getName()); - - /* initialize device-side tmp-field databoxes */ - this->ionDensityBox = fieldIonDensity->getDeviceDataBox(); -} - -template< - typename T_IonSpecies, - typename T_ElectronSpecies, - typename T_PhotonSpecies -> -template< - typename T_Acc, - typename T_WorkerCfg -> -DINLINE void Bremsstrahlung::collectiveInit( - const T_Acc & acc, - const DataSpace& blockCell, - const T_WorkerCfg & workerCfg -) -{ - /* caching of ion density field */ - cachedIonDensity = CachedBox::create< - 0, - ValueTypeIonDensity - >( - acc, - BlockArea() - ); - - /* instance of nvidia assignment operator */ - nvidia::functors::Assign assign; - /* copy fields from global to shared */ - const auto fieldIonDensityBlock = ionDensityBox.shift(blockCell); - - ThreadCollective< - BlockArea, - T_WorkerCfg::numWorkers - > collective( workerCfg.getWorkerIdx( ) ); - collective( - acc, - assign, - cachedIonDensity, - fieldIonDensityBlock - ); - - /* wait for shared memory to be initialized */ - __syncthreads(); -} - -template< - typename T_IonSpecies, - typename T_ElectronSpecies, - typename T_PhotonSpecies -> -template< typename T_Acc > -DINLINE -void Bremsstrahlung::init( - T_Acc const & acc, - const DataSpace& blockCell, - const int& linearThreadIdx, - const DataSpace& localCellOffset -) -{ - /* initialize random number generator with the local cell index in the simulation */ - this->randomGen.init(localCellOffset); -} - - -template -template -DINLINE -float3_X Bremsstrahlung::scatterByTheta - (const T_Acc & acc, const float3_X vec, const float_X theta) -{ - using namespace pmacc::algorithms; - - float_X sinTheta, cosTheta; - math::sincos(theta, sinTheta, cosTheta); - - const float_X phi = -math::Pi::value + math::Pi::doubleValue * this->randomGen(acc); - float_X sinPhi, cosPhi; - math::sincos(phi, sinPhi, cosPhi); - - const float3_X vecUp(0.0, 0.0, 1.0); - float3_X vecOrtho1 = math::cross(vecUp, vec); - const float_X vecOrtho1Abs = math::abs(vecOrtho1); - - float3_X vecOrtho1_norm; - if(vecOrtho1Abs == float_X(0.0)) - vecOrtho1_norm = float3_X(1.0, 0.0, 0.0); - else - vecOrtho1_norm = vecOrtho1 / vecOrtho1Abs; - const float3_X vecOrtho2 = math::cross(vecOrtho1_norm, vec); - vecOrtho1 = vecOrtho1_norm * math::abs(vec); - - return vec * cosTheta + - vecOrtho1 * (sinTheta * cosPhi) + - vecOrtho2 * (sinTheta * sinPhi); -} - -template -template -DINLINE -unsigned int Bremsstrahlung::numNewParticles( - const T_Acc& acc, - FrameType& sourceFrame, - int localIdx -) -{ - using namespace pmacc::algorithms; - - auto particle = sourceFrame[localIdx]; - - /* particle position, used for field-to-particle interpolation */ - const floatD_X pos = particle[position_]; - const int particleCellIdx = particle[localCellIdx_]; - /* multi-dim coordinate of the local cell inside the super cell */ - const DataSpace localCell(DataSpaceOperations::template map (particleCellIdx)); - /* interpolation of fieldTmp */ - const picongpu::traits::FieldPosition fieldTmpPos; - const ValueTypeIonDensity ionDensity_norm = Field2ParticleInterpolation() - (cachedIonDensity.shift(localCell).toCursor(), pos, fieldTmpPos()); - - /* TODO: obtain the ion density from the molare ion density in order to avoid the rescaling. - * So this should be: ionDensity = ionMolDensity / UNIT_AMOUNT_SUBSTANCE */ - const float_X ionDensity = ionDensity_norm.x() * particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE; - - const float_X weighting = particle[weighting_]; - - const float_X c = SPEED_OF_LIGHT; - float3_X mom = particle[momentum_] / weighting; - const float_X momAbs = math::abs(mom); - float3_X mom_norm = mom / momAbs; - - const float_X mass = frame::getMass(); - const float_X Ekin = (Gamma<>()(mom, mass) - float_X(1.0)) * mass * c*c; - if(Ekin < electron::MIN_ENERGY) - return 0; - - /* electron deflection due to Rutherford scattering without modifying the electron - energy based on radiation emission */ - const float_X zMin = float_X(1.0) / (math::Pi::value * math::Pi::value); - const float_X zMax = float_X(1.0) / (electron::MIN_THETA*electron::MIN_THETA); - const float_X z = zMin + this->randomGen(acc) * (zMax - zMin); - const float_X theta = math::rsqrt(z); - const float_X targetZ = GetAtomicNumbers::type::numberOfProtons; - const float_X rutherfordCoeff = float_X(2.0) * ELECTRON_CHARGE*ELECTRON_CHARGE / - (float_X(4.0) * math::Pi::value * EPS0) * targetZ / Ekin; - const float_X scaledDeflectionDCS = math::Pi::value * (zMax - zMin) * rutherfordCoeff*rutherfordCoeff; - const float_X deflectionProb = ionDensity * c * DELTA_T * scaledDeflectionDCS; - - if(this->randomGen(acc) < deflectionProb) - { - mom = this->scatterByTheta(acc, mom, theta); - mom_norm = mom / momAbs; - } - - /* non-radiative Bremsstrahlung */ - const float_X kappaCutoff = math::min(photon::SOFT_PHOTONS_CUTOFF / Ekin, float_X(1.0)); - const float_X stoppingPower = ionDensity * c * this->stoppingPowerFunctor(Ekin, kappaCutoff); - const float_X newEkin = math::max(Ekin - stoppingPower * DELTA_T, float_X(0.0)); - const float_X newEkin_norm = newEkin / (mass * c*c); - /* This is based on: (p / mc)^2 = (E_kin / mc^2)^2 + 2 * (E_kin / mc^2) */ - const float_X newMomAbs = mass * c * math::sqrt(newEkin_norm*newEkin_norm + float_X(2.0) * newEkin_norm); - const float_X deltaMom = newMomAbs - momAbs; - particle[momentum_] = (mom + deltaMom * mom_norm) * weighting; - - /* photon emission */ - const float_X delta = this->randomGen(acc); - const float_X kappa = math::pow(kappaCutoff, delta); - const float_X scalingFactor = -math::log(kappaCutoff); - const float_X emissionProb = photon::WEIGHTING_RATIO * scalingFactor * ionDensity * c * DELTA_T * this->scaledSpectrumFunctor(Ekin, kappa); - - // raise a warning if the emission probability is too high. - if(picLog::log_level & picLog::CRITICAL::lvl) + namespace particles { - if(emissionProb > float_X(photon::SINGLE_EMISSION_PROB_LIMIT)) + namespace bremsstrahlung { - const float_X Ekin_SI = Ekin * UNIT_ENERGY; - printf("[Bremsstrahlung] warning: emission probability is too high: \ + template + Bremsstrahlung::Bremsstrahlung( + const ScaledSpectrum::LookupTableFunctor& scaledSpectrumFunctor, + const ScaledSpectrum::LookupTableFunctor& stoppingPowerFunctor, + const GetPhotonAngle::GetPhotonAngleFunctor& getPhotonAngleFunctor, + const uint32_t currentStep) + : scaledSpectrumFunctor(scaledSpectrumFunctor) + , stoppingPowerFunctor(stoppingPowerFunctor) + , getPhotonAngleFunctor(getPhotonAngleFunctor) + , photonMom(float3_X::create(0)) + , randomGen(RNGFactory::createRandom()) + { + DataConnector& dc = Environment<>::get().DataConnector(); + + /* initialize pointers on host-side tmp-field databoxes */ + auto fieldIonDensity = dc.get(FieldTmp::getUniqueId(0), true); + /* reset values to zero */ + fieldIonDensity->getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.0)); + + /* load species without copying the particle data to the host */ + auto ionSpecies = dc.get(T_IonSpecies::FrameType::getName(), true); + + /* compute ion density */ + using DensitySolver = typename particleToGrid:: + CreateFieldTmpOperation::type::Solver; + fieldIonDensity->template computeValue(*ionSpecies, currentStep); + dc.releaseData(T_IonSpecies::FrameType::getName()); + + /* initialize device-side tmp-field databoxes */ + this->ionDensityBox = fieldIonDensity->getDeviceDataBox(); + } + + template + template + DINLINE void Bremsstrahlung::collectiveInit( + const T_Acc& acc, + const DataSpace& blockCell, + const T_WorkerCfg& workerCfg) + { + /* caching of ion density field */ + cachedIonDensity = CachedBox::create<0, ValueTypeIonDensity>(acc, BlockArea()); + + /* instance of nvidia assignment operator */ + nvidia::functors::Assign assign; + /* copy fields from global to shared */ + const auto fieldIonDensityBlock = ionDensityBox.shift(blockCell); + + ThreadCollective collective(workerCfg.getWorkerIdx()); + collective(acc, assign, cachedIonDensity, fieldIonDensityBlock); + + /* wait for shared memory to be initialized */ + cupla::__syncthreads(acc); + } + + template + template + DINLINE void Bremsstrahlung::init( + T_Acc const& acc, + const DataSpace& blockCell, + const int& linearThreadIdx, + const DataSpace& localCellOffset) + { + /* initialize random number generator with the local cell index in the simulation */ + this->randomGen.init(localCellOffset); + } + + + template + template + DINLINE float3_X Bremsstrahlung::scatterByTheta( + const T_Acc& acc, + const float3_X vec, + const float_X theta) + { + using namespace pmacc::algorithms; + + float_X sinTheta, cosTheta; + pmacc::math::sincos(theta, sinTheta, cosTheta); + + const float_X phi + = -pmacc::math::Pi::value + pmacc::math::Pi::doubleValue * this->randomGen(acc); + float_X sinPhi, cosPhi; + pmacc::math::sincos(phi, sinPhi, cosPhi); + + const float3_X vecUp(0.0, 0.0, 1.0); + float3_X vecOrtho1 = pmacc::math::cross(vecUp, vec); + const float_X vecOrtho1Abs = math::abs(vecOrtho1); + + float3_X vecOrtho1_norm; + if(vecOrtho1Abs == float_X(0.0)) + vecOrtho1_norm = float3_X(1.0, 0.0, 0.0); + else + vecOrtho1_norm = vecOrtho1 / vecOrtho1Abs; + const float3_X vecOrtho2 = pmacc::math::cross(vecOrtho1_norm, vec); + vecOrtho1 = vecOrtho1_norm * math::abs(vec); + + return vec * cosTheta + vecOrtho1 * (sinTheta * cosPhi) + vecOrtho2 * (sinTheta * sinPhi); + } + + template + template + DINLINE unsigned int Bremsstrahlung::numNewParticles( + const T_Acc& acc, + FrameType& sourceFrame, + int localIdx) + { + using namespace pmacc::algorithms; + + auto particle = sourceFrame[localIdx]; + + /* particle position, used for field-to-particle interpolation */ + const floatD_X pos = particle[position_]; + const int particleCellIdx = particle[localCellIdx_]; + /* multi-dim coordinate of the local cell inside the super cell */ + const DataSpace localCell( + DataSpaceOperations::template map(particleCellIdx)); + /* interpolation of fieldTmp */ + const picongpu::traits::FieldPosition fieldTmpPos; + const ValueTypeIonDensity ionDensity_norm + = Field2ParticleInterpolation()(cachedIonDensity.shift(localCell).toCursor(), pos, fieldTmpPos()); + + /* TODO: obtain the ion density from the molare ion density in order to avoid the rescaling. + * So this should be: ionDensity = ionMolDensity / UNIT_AMOUNT_SUBSTANCE */ + const float_X ionDensity = ionDensity_norm.x() * particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE; + + const float_X weighting = particle[weighting_]; + + const float_X c = SPEED_OF_LIGHT; + float3_X mom = particle[momentum_] / weighting; + const float_X momAbs = math::abs(mom); + float3_X mom_norm = mom / momAbs; + + const float_X mass = frame::getMass(); + const float_X Ekin = (Gamma<>()(mom, mass) - float_X(1.0)) * mass * c * c; + if(Ekin < electron::MIN_ENERGY) + return 0; + + /* electron deflection due to Rutherford scattering without modifying the electron + energy based on radiation emission */ + const float_X zMin + = float_X(1.0) / (pmacc::math::Pi::value * pmacc::math::Pi::value); + const float_X zMax = float_X(1.0) / (electron::MIN_THETA * electron::MIN_THETA); + const float_X z = zMin + this->randomGen(acc) * (zMax - zMin); + const float_X theta = math::rsqrt(z); + const float_X targetZ = GetAtomicNumbers::type::numberOfProtons; + const float_X rutherfordCoeff = float_X(2.0) * ELECTRON_CHARGE * ELECTRON_CHARGE + / (float_X(4.0) * pmacc::math::Pi::value * EPS0) * targetZ / Ekin; + const float_X scaledDeflectionDCS + = pmacc::math::Pi::value * (zMax - zMin) * rutherfordCoeff * rutherfordCoeff; + const float_X deflectionProb = ionDensity * c * DELTA_T * scaledDeflectionDCS; + + if(this->randomGen(acc) < deflectionProb) + { + mom = this->scatterByTheta(acc, mom, theta); + mom_norm = mom / momAbs; + } + + /* non-radiative Bremsstrahlung */ + const float_X kappaCutoff = math::min(photon::SOFT_PHOTONS_CUTOFF / Ekin, float_X(1.0)); + const float_X stoppingPower = ionDensity * c * this->stoppingPowerFunctor(Ekin, kappaCutoff); + const float_X newEkin = math::max(Ekin - stoppingPower * DELTA_T, float_X(0.0)); + const float_X newEkin_norm = newEkin / (mass * c * c); + /* This is based on: (p / mc)^2 = (E_kin / mc^2)^2 + 2 * (E_kin / mc^2) */ + const float_X newMomAbs + = mass * c * math::sqrt(newEkin_norm * newEkin_norm + float_X(2.0) * newEkin_norm); + const float_X deltaMom = newMomAbs - momAbs; + particle[momentum_] = (mom + deltaMom * mom_norm) * weighting; + + /* photon emission */ + const float_X delta = this->randomGen(acc); + const float_X kappa = math::pow(kappaCutoff, delta); + const float_X scalingFactor = -math::log(kappaCutoff); + const float_X emissionProb = photon::WEIGHTING_RATIO * scalingFactor * ionDensity * c * DELTA_T + * this->scaledSpectrumFunctor(Ekin, kappa); + + // raise a warning if the emission probability is too high. + if(picLog::log_level & picLog::CRITICAL::lvl) + { + if(emissionProb > float_X(photon::SINGLE_EMISSION_PROB_LIMIT)) + { + const float_X Ekin_SI = Ekin * UNIT_ENERGY; + printf( + "[Bremsstrahlung] warning: emission probability is too high: \ p = %g, at Ekin = %g keV, kappa = %g, ion density = %g m^-3\n", - emissionProb, - Ekin_SI * UNITCONV_Joule_to_keV, - kappa, - ionDensity / (UNIT_LENGTH*UNIT_LENGTH*UNIT_LENGTH)); - } - } - - if(this->randomGen(acc) < emissionProb) - { - const float_X photonEnergy = kappa * Ekin; - this->photonMom = mom_norm * weighting / photon::WEIGHTING_RATIO * photonEnergy / c; - return 1; - } - - return 0; -} - - -template -template -DINLINE -void Bremsstrahlung::operator()( - const T_Acc& acc, - Electron& electron, - Photon& photon -) -{ - auto destPhoton = - pmacc::particles::operations::deselect< - boost::mpl::vector< - multiMask, - momentum, - weighting - > - >(photon); - - namespace parOp = pmacc::particles::operations; - parOp::assign( destPhoton, parOp::deselect(electron) ); - - const float3_X elMom = electron[momentum_]; - const float_X weighting = electron[weighting_] / photon::WEIGHTING_RATIO; - electron[momentum_] = elMom - this->photonMom; // ultra relativistic limit in terms of energy - - /* photon emission angle */ - const float_X mass = frame::getMass(); - const float_X gamma = Gamma<>()(elMom / weighting, mass); - - const float_X theta = this->getPhotonAngleFunctor(this->randomGen(acc), gamma); - - const float3_X scatteredPhotonMom = this->scatterByTheta(acc, this->photonMom, theta); - - photon[multiMask_] = 1; - photon[momentum_] = scatteredPhotonMom; - photon[weighting_] = weighting; -} - - -} // namespace bremsstrahlung -} // namespace particles + emissionProb, + Ekin_SI * UNITCONV_Joule_to_keV, + kappa, + ionDensity / (UNIT_LENGTH * UNIT_LENGTH * UNIT_LENGTH)); + } + } + + if(this->randomGen(acc) < emissionProb) + { + const float_X photonEnergy = kappa * Ekin; + this->photonMom = mom_norm * weighting / photon::WEIGHTING_RATIO * photonEnergy / c; + return 1; + } + + return 0; + } + + + template + template + DINLINE void Bremsstrahlung::operator()( + const T_Acc& acc, + Electron& electron, + Photon& photon) + { + auto destPhoton + = pmacc::particles::operations::deselect>( + photon); + + namespace parOp = pmacc::particles::operations; + parOp::assign(destPhoton, parOp::deselect(electron)); + + const float3_X elMom = electron[momentum_]; + const float_X weighting = electron[weighting_] / photon::WEIGHTING_RATIO; + electron[momentum_] = elMom - this->photonMom; // ultra relativistic limit in terms of energy + + /* photon emission angle */ + const float_X mass = frame::getMass(); + const float_X gamma = Gamma<>()(elMom / weighting, mass); + + const float_X theta = this->getPhotonAngleFunctor(this->randomGen(acc), gamma); + + const float3_X scatteredPhotonMom = this->scatterByTheta(acc, this->photonMom, theta); + + photon[multiMask_] = 1; + photon[momentum_] = scatteredPhotonMom; + photon[weighting_] = weighting; + } + + + } // namespace bremsstrahlung + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/bremsstrahlung/PhotonEmissionAngle.hpp b/include/picongpu/particles/bremsstrahlung/PhotonEmissionAngle.hpp index b51daba6f2..527abe9a09 100644 --- a/include/picongpu/particles/bremsstrahlung/PhotonEmissionAngle.hpp +++ b/include/picongpu/particles/bremsstrahlung/PhotonEmissionAngle.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -27,14 +27,14 @@ #include #include #include -#if( BOOST_VERSION == 106400 ) - /* `array_wrapper.hpp` must be included before `integrate.hpp` to avoid - * the error - * `boost/numeric/ublas/matrix.hpp(5977): error: namespace "boost::serialization" has no member "make_array"` - * in boost 1.64.0 - * see boost issue https://svn.boost.org/trac/boost/ticket/12516 - */ -# include +#if(BOOST_VERSION == 106400) +/* `array_wrapper.hpp` must be included before `integrate.hpp` to avoid + * the error + * `boost/numeric/ublas/matrix.hpp(5977): error: namespace "boost::serialization" has no member "make_array"` + * in boost 1.64.0 + * see boost issue https://svn.boost.org/trac/boost/ticket/12516 + */ +# include #endif #include #include @@ -43,230 +43,227 @@ namespace picongpu { -namespace particles -{ -namespace bremsstrahlung -{ - -namespace detail -{ - -/** Functor mapping `delta` to the photon emission polar angle `theta`, - * where delta is a uniformly distributed random number between zero and one. - */ -struct GetPhotonAngleFunctor -{ - using LinInterpCursor = typename ::pmacc::result_of::Functor< - ::pmacc::cursor::tools::LinearInterp, - ::pmacc::cursor::BufferCursor - >::type; - - using type = float_X; - - LinInterpCursor linInterpCursor; - float_X lnMinGamma; - float_X lnMaxGamma; - - /** constructor - * - * @param linInterpCursor lookup table for the photon emission angle. - */ - HDINLINE GetPhotonAngleFunctor(LinInterpCursor linInterpCursor) - : linInterpCursor(linInterpCursor) - { - this->lnMinGamma = math::log(photon::MIN_GAMMA); - this->lnMaxGamma = math::log(photon::MAX_GAMMA); - } - - /** Return the polar emission angle of the photon. - * - * @param delta uniformly distributed random number between zero and one. - * @param gamma relativistic factor of the incident electron. - */ - HDINLINE float_X operator()(const float_X delta, const float_X gamma) const + namespace particles { - const float_X deltaLookupPos = delta * static_cast(photon::NUM_SAMPLES_DELTA - 1); - - const float_X lnGamma = algorithms::math::log(gamma); - const float_X gammaLookupPos = - (lnGamma - this->lnMinGamma) / - (this->lnMaxGamma - this->lnMinGamma) * - static_cast(photon::NUM_SAMPLES_GAMMA - 1); - - if (picLog::log_level & picLog::CRITICAL::lvl) + namespace bremsstrahlung { - if(gamma > photon::MAX_GAMMA) + namespace detail { - printf("[Bremsstrahlung] error lookup table: gamma = %g is out of range.\n", - gamma); - } - } - - return this->linInterpCursor[float2_X(deltaLookupPos, gammaLookupPos)]; - } -}; - -} // namespace detail - -/** Creates and holds the lookup table for the photon emission angle. - */ -struct GetPhotonAngle -{ - using GetPhotonAngleFunctor = detail::GetPhotonAngleFunctor; - -private: - - using MyBuf = boost::shared_ptr >; - MyBuf dBufTheta; - - /** probability density at polar angle theta. - * It's the ultrarelativistic limit of the dipole radiation formula, see e.g. Jackson, chap. 15.2 - */ - struct Probability - { - const float_64 gamma2; - Probability(const float_64 gamma) : gamma2(gamma*gamma) {} - - template - void operator()(const T_State &p, T_State &dpdtheta, const float_64 theta) const - { - const float_64 theta2 = theta*theta; - const float_64 denom = float_64(1.0) + gamma2 * theta2; - - dpdtheta[0] = float_64(3.0) * theta * gamma2 * (float_64(1.0) + gamma2*gamma2 * theta2*theta2) / - (denom*denom*denom*denom); - } - }; - - /** Return the absolute deviation of a delta, computed from a given theta, and a reference delta. - * - * Delta is the angular emission probability (normalized to one) integrated from zero to theta, - * where theta is the angle between the photon momentum and the final electron momentum. - */ - struct AimForDelta - { - const float_64 targetDelta; - const float_64 gamma; - - /** constructor - * - * @param targetDelta reference delta - * @param gamma relativistic factor - */ - AimForDelta(const float_64 targetDelta, const float_64 gamma) : - targetDelta(targetDelta), gamma(gamma) {} - - float_64 delta(const float_64 theta, const float_64 gamma) const - { - namespace odeint = boost::numeric::odeint; - - using state_type = boost::array; - - state_type integral_result = {0.0}; - const float_64 lowerLimit = 0.0; - const float_64 upperLimit = theta; - const float_64 stepwidth = (upperLimit - lowerLimit) / float_64(1000.0); - Probability integrand(gamma); - odeint::integrate(integrand, integral_result, lowerLimit, upperLimit, stepwidth); - - return integral_result[0]; - } - - float_64 operator()(const float_64 theta) const - { - return math::abs(this->delta(theta, this->gamma) - this->targetDelta); - } - }; - - /** Return the maximal theta which corresponds to the maximal delta and a given gamma - * - * @param gamma relativistic factor - */ - float_64 maxTheta(const float_64 gamma) const - { - AimForDelta aimForDelta(photon::MAX_DELTA, gamma); - - std::pair minimum; - - minimum = boost::math::tools::brent_find_minima( - aimForDelta, - 0.0, - pmacc::algorithms::math::Pi::value, - std::numeric_limits::digits); - - return minimum.first; - } - - /** computes the polar emission angle theta. - * - * @param delta uniformly distributed random number within [0, 1] or (0, 1) - * @param gamma relativistic factor - * @param maxTheta maximal theta - */ - float_64 theta(const float_64 delta, const float_64 gamma, const float_64 maxTheta) const - { - AimForDelta aimForDelta(delta, gamma); - const float_64 minTheta = 0.0; - std::pair minimum; - - minimum = boost::math::tools::brent_find_minima( - aimForDelta, - minTheta, - maxTheta, - std::numeric_limits::digits); - - return minimum.first; - } - -public: - - /** Generate lookup table - */ - void init() - { - // there is a margin of one cell to make the linear interpolation valid for border cells. - this->dBufTheta = MyBuf(new pmacc::container::DeviceBuffer( - photon::NUM_SAMPLES_DELTA + 1, - photon::NUM_SAMPLES_GAMMA + 1)); - - pmacc::container::HostBuffer hBufTheta(this->dBufTheta->size()); - hBufTheta.assign(float_X(0.0)); - auto curTheta = hBufTheta.origin(); - - const float_64 lnMinGamma = math::log(photon::MIN_GAMMA); - const float_64 lnMaxGamma = math::log(photon::MAX_GAMMA); - - for(uint32_t gammaIdx = 0; gammaIdx < photon::NUM_SAMPLES_GAMMA; gammaIdx++) - { - const float_64 lnGamma_norm = static_cast(gammaIdx) / - static_cast(photon::NUM_SAMPLES_GAMMA - 1); - const float_64 gamma = math::exp(lnMinGamma + (lnMaxGamma - lnMinGamma) * lnGamma_norm); - const float_64 maxTheta = this->maxTheta(gamma); - - for(uint32_t deltaIdx = 0; deltaIdx < photon::NUM_SAMPLES_DELTA; deltaIdx++) + /** Functor mapping `delta` to the photon emission polar angle `theta`, + * where delta is a uniformly distributed random number between zero and one. + */ + struct GetPhotonAngleFunctor + { + using LinInterpCursor = typename ::pmacc::result_of::Functor< + ::pmacc::cursor::tools::LinearInterp, + ::pmacc::cursor::BufferCursor>::type; + + using type = float_X; + + LinInterpCursor linInterpCursor; + float_X lnMinGamma; + float_X lnMaxGamma; + + /** constructor + * + * @param linInterpCursor lookup table for the photon emission angle. + */ + HDINLINE GetPhotonAngleFunctor(LinInterpCursor linInterpCursor) : linInterpCursor(linInterpCursor) + { + this->lnMinGamma = math::log(photon::MIN_GAMMA); + this->lnMaxGamma = math::log(photon::MAX_GAMMA); + } + + /** Return the polar emission angle of the photon. + * + * @param delta uniformly distributed random number between zero and one. + * @param gamma relativistic factor of the incident electron. + */ + HDINLINE float_X operator()(const float_X delta, const float_X gamma) const + { + const float_X deltaLookupPos = delta * static_cast(photon::NUM_SAMPLES_DELTA - 1); + + const float_X lnGamma = math::log(gamma); + const float_X gammaLookupPos = (lnGamma - this->lnMinGamma) + / (this->lnMaxGamma - this->lnMinGamma) + * static_cast(photon::NUM_SAMPLES_GAMMA - 1); + + if(picLog::log_level & picLog::CRITICAL::lvl) + { + if(gamma > photon::MAX_GAMMA) + { + printf("[Bremsstrahlung] error lookup table: gamma = %g is out of range.\n", gamma); + } + } + + return this->linInterpCursor[float2_X(deltaLookupPos, gammaLookupPos)]; + } + }; + + } // namespace detail + + /** Creates and holds the lookup table for the photon emission angle. + */ + struct GetPhotonAngle { - const float_64 delta = photon::MAX_DELTA * static_cast(deltaIdx) / - static_cast(photon::NUM_SAMPLES_DELTA - 1); - - *curTheta(deltaIdx, gammaIdx) = static_cast(this->theta(delta, gamma, maxTheta)); - } - } - - *this->dBufTheta = hBufTheta; - } - - /** Return a functor mapping `delta` to the photon emission polar angle `theta`, - * where delta is a uniformly distributed random number within [0, 1] or (0, 1) - */ - GetPhotonAngleFunctor getPhotonAngleFunctor() const - { - GetPhotonAngleFunctor::LinInterpCursor linInterpCursor = - pmacc::cursor::tools::LinearInterp()(this->dBufTheta->origin()); - - return GetPhotonAngleFunctor(linInterpCursor); - } -}; - -} // namespace bremsstrahlung -} // namespace particles + using GetPhotonAngleFunctor = detail::GetPhotonAngleFunctor; + + private: + using MyBuf = boost::shared_ptr>; + MyBuf dBufTheta; + + /** probability density at polar angle theta. + * It's the ultrarelativistic limit of the dipole radiation formula, see e.g. Jackson, chap. 15.2 + */ + struct Probability + { + const float_64 gamma2; + Probability(const float_64 gamma) : gamma2(gamma * gamma) + { + } + + template + void operator()(const T_State& p, T_State& dpdtheta, const float_64 theta) const + { + const float_64 theta2 = theta * theta; + const float_64 denom = float_64(1.0) + gamma2 * theta2; + + dpdtheta[0] = float_64(3.0) * theta * gamma2 + * (float_64(1.0) + gamma2 * gamma2 * theta2 * theta2) / (denom * denom * denom * denom); + } + }; + + /** Return the absolute deviation of a delta, computed from a given theta, and a reference delta. + * + * Delta is the angular emission probability (normalized to one) integrated from zero to theta, + * where theta is the angle between the photon momentum and the final electron momentum. + */ + struct AimForDelta + { + const float_64 targetDelta; + const float_64 gamma; + + /** constructor + * + * @param targetDelta reference delta + * @param gamma relativistic factor + */ + AimForDelta(const float_64 targetDelta, const float_64 gamma) + : targetDelta(targetDelta) + , gamma(gamma) + { + } + + float_64 delta(const float_64 theta, const float_64 gamma) const + { + namespace odeint = boost::numeric::odeint; + + using state_type = boost::array; + + state_type integral_result = {0.0}; + const float_64 lowerLimit = 0.0; + const float_64 upperLimit = theta; + const float_64 stepwidth = (upperLimit - lowerLimit) / float_64(1000.0); + Probability integrand(gamma); + odeint::integrate(integrand, integral_result, lowerLimit, upperLimit, stepwidth); + + return integral_result[0]; + } + + float_64 operator()(const float_64 theta) const + { + return math::abs(this->delta(theta, this->gamma) - this->targetDelta); + } + }; + + /** Return the maximal theta which corresponds to the maximal delta and a given gamma + * + * @param gamma relativistic factor + */ + float_64 maxTheta(const float_64 gamma) const + { + AimForDelta aimForDelta(photon::MAX_DELTA, gamma); + + std::pair minimum; + + minimum = boost::math::tools::brent_find_minima( + aimForDelta, + 0.0, + pmacc::math::Pi::value, + std::numeric_limits::digits); + + return minimum.first; + } + + /** computes the polar emission angle theta. + * + * @param delta uniformly distributed random number within [0, 1] or (0, 1) + * @param gamma relativistic factor + * @param maxTheta maximal theta + */ + float_64 theta(const float_64 delta, const float_64 gamma, const float_64 maxTheta) const + { + AimForDelta aimForDelta(delta, gamma); + const float_64 minTheta = 0.0; + std::pair minimum; + + minimum = boost::math::tools::brent_find_minima( + aimForDelta, + minTheta, + maxTheta, + std::numeric_limits::digits); + + return minimum.first; + } + + public: + /** Generate lookup table + */ + void init() + { + // there is a margin of one cell to make the linear interpolation valid for border cells. + this->dBufTheta = MyBuf(new pmacc::container::DeviceBuffer( + photon::NUM_SAMPLES_DELTA + 1, + photon::NUM_SAMPLES_GAMMA + 1)); + + pmacc::container::HostBuffer hBufTheta(this->dBufTheta->size()); + hBufTheta.assign(float_X(0.0)); + auto curTheta = hBufTheta.origin(); + + const float_64 lnMinGamma = math::log(photon::MIN_GAMMA); + const float_64 lnMaxGamma = math::log(photon::MAX_GAMMA); + + for(uint32_t gammaIdx = 0; gammaIdx < photon::NUM_SAMPLES_GAMMA; gammaIdx++) + { + const float_64 lnGamma_norm + = static_cast(gammaIdx) / static_cast(photon::NUM_SAMPLES_GAMMA - 1); + const float_64 gamma = math::exp(lnMinGamma + (lnMaxGamma - lnMinGamma) * lnGamma_norm); + const float_64 maxTheta = this->maxTheta(gamma); + + for(uint32_t deltaIdx = 0; deltaIdx < photon::NUM_SAMPLES_DELTA; deltaIdx++) + { + const float_64 delta = photon::MAX_DELTA * static_cast(deltaIdx) + / static_cast(photon::NUM_SAMPLES_DELTA - 1); + + *curTheta(deltaIdx, gammaIdx) = static_cast(this->theta(delta, gamma, maxTheta)); + } + } + + *this->dBufTheta = hBufTheta; + } + + /** Return a functor mapping `delta` to the photon emission polar angle `theta`, + * where delta is a uniformly distributed random number within [0, 1] or (0, 1) + */ + GetPhotonAngleFunctor getPhotonAngleFunctor() const + { + GetPhotonAngleFunctor::LinInterpCursor linInterpCursor + = pmacc::cursor::tools::LinearInterp()(this->dBufTheta->origin()); + + return GetPhotonAngleFunctor(linInterpCursor); + } + }; + + } // namespace bremsstrahlung + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/bremsstrahlung/ScaledSpectrum.hpp b/include/picongpu/particles/bremsstrahlung/ScaledSpectrum.hpp index 6ca09c1a1e..83c3c739bb 100644 --- a/include/picongpu/particles/bremsstrahlung/ScaledSpectrum.hpp +++ b/include/picongpu/particles/bremsstrahlung/ScaledSpectrum.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -30,14 +30,14 @@ #include #include -#if( BOOST_VERSION == 106400 ) - /* `array_wrapper.hpp` must be included before `integrate.hpp` to avoid - * the error - * `boost/numeric/ublas/matrix.hpp(5977): error: namespace "boost::serialization" has no member "make_array"` - * in boost 1.64.0 - * see boost issue https://svn.boost.org/trac/boost/ticket/12516 - */ -# include +#if(BOOST_VERSION == 106400) +/* `array_wrapper.hpp` must be included before `integrate.hpp` to avoid + * the error + * `boost/numeric/ublas/matrix.hpp(5977): error: namespace "boost::serialization" has no member "make_array"` + * in boost 1.64.0 + * see boost issue https://svn.boost.org/trac/boost/ticket/12516 + */ +# include #endif #include #include @@ -45,156 +45,154 @@ namespace picongpu { -namespace particles -{ -namespace bremsstrahlung -{ - -namespace detail -{ - -/** Functor for the scaled differential cross section (dcs) which - * equals to the electron energy loss times the cross section per unit energy. - */ -struct LookupTableFunctor -{ - using LinInterpCursor = typename ::pmacc::result_of::Functor< - ::pmacc::cursor::tools::LinearInterp, - ::pmacc::cursor::BufferCursor - >::type; - - using type = float_X; - - LinInterpCursor linInterpCursor; - float_X lnEMin; - float_X lnEMax; - - /** constructor - * - * @param linInterpCursor - */ - HDINLINE LookupTableFunctor(LinInterpCursor linInterpCursor); - /** scaled differential cross section - * - * @param Ekin kinetic energy of the incident electron - * @param kappa energy loss normalized to Ekin - */ - HDINLINE float_X operator()(const float_X Ekin, const float_X kappa) const; -}; - -} // namespace detail - - -/** Generates and holds the lookup tables for the scaled differential cross section - * and the stopping power. - * - * scaled differential cross section = electron energy loss times cross section per unit energy - * - * stopping power = energy loss per unit length - * - * The lookup tables are generated from the screened Bethe-Heitler cross section. See e.g.: - * Salvat, F., et al. "Monte Carlo simulation of bremsstrahlung emission by electrons." - * Radiation Physics and Chemistry 75.10 (2006): 1201-1219. - */ -struct ScaledSpectrum -{ -public: - using LookupTableFunctor = detail::LookupTableFunctor; -private: - - using MyBuf = boost::shared_ptr >; - MyBuf dBufScaledSpectrum; - MyBuf dBufStoppingPower; - - /** differential cross section: cross section per unit energy - * - * This is the screened Bethe-Heitler cross section. See e.g.: - * Salvat, F., et al. "Monte Carlo simulation of bremsstrahlung emission by electrons." - * Radiation Physics and Chemistry 75.10 (2006): 1201-1219. - * - * @param Ekin kinetic electron energy - * @param kappa energy loss normalized to Ekin - * @param targetZ atomic number of the target material - */ - HINLINE float_64 dcs(const float_64 Ekin, const float_64 kappa, const float_64 targetZ) const; - - /** differential cross section times energy loss - */ - struct StoppingPowerIntegrand + namespace particles { - const float_64 Ekin; - const float_64 targetZ; - const ScaledSpectrum& scaledSpectrum; - - StoppingPowerIntegrand(const float_64 Ekin, const ScaledSpectrum& scaledSpectrum, const float_64 targetZ) : - Ekin(Ekin), scaledSpectrum(scaledSpectrum), targetZ(targetZ) {} - - template - void operator()(const T_State &x, T_State &dxdW, T_W W) const - { - dxdW[0] = this->scaledSpectrum.dcs(this->Ekin, W / this->Ekin, this->targetZ) * W; - } - }; - -public: - - /** Generate lookup tables - * - * @param targetZ atomic number of the target material - */ - HINLINE void init(const float_64 targetZ); - - /** Return a functor representing the scaled differential cross section - * - * scaled differential cross section = electron energy loss times cross section per unit energy - */ - HINLINE LookupTableFunctor getScaledSpectrumFunctor() const; - - /** Return a functor representing the stopping power - * - * stopping power = energy loss per unit length - */ - HINLINE LookupTableFunctor getStoppingPowerFunctor() const; -}; - - -/** Creates a `ScaledSpectrum` instance for a given electron species - * and stores it in a map object. - * - * This functor is called from MySimulation::init() to generate lookup tables. - * - * @tparam T_ElectronSpecies type or name as boost::mpl::string of the electron species - */ -template -struct FillScaledSpectrumMap -{ - using ElectronSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_ElectronSpecies - >; - - using IonSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - typename pmacc::particles::traits::ResolveAliasFromSpecies< - ElectronSpecies, - bremsstrahlungIons<> - >::type - >; - - template - void operator()(T_Map& map) const - { - const float_X targetZ = GetAtomicNumbers::type::numberOfProtons; - - if(map.count(targetZ) == 0) + namespace bremsstrahlung { - ScaledSpectrum scaledSpectrum; - scaledSpectrum.init(static_cast(targetZ)); - map[targetZ] = scaledSpectrum; - } - } -}; - -} // namespace bremsstrahlung -} // namespace particles + namespace detail + { + /** Functor for the scaled differential cross section (dcs) which + * equals to the electron energy loss times the cross section per unit energy. + */ + struct LookupTableFunctor + { + using LinInterpCursor = typename ::pmacc::result_of::Functor< + ::pmacc::cursor::tools::LinearInterp, + ::pmacc::cursor::BufferCursor>::type; + + using type = float_X; + + LinInterpCursor linInterpCursor; + float_X lnEMin; + float_X lnEMax; + + /** constructor + * + * @param linInterpCursor + */ + HDINLINE LookupTableFunctor(LinInterpCursor linInterpCursor); + /** scaled differential cross section + * + * @param Ekin kinetic energy of the incident electron + * @param kappa energy loss normalized to Ekin + */ + HDINLINE float_X operator()(const float_X Ekin, const float_X kappa) const; + }; + + } // namespace detail + + + /** Generates and holds the lookup tables for the scaled differential cross section + * and the stopping power. + * + * scaled differential cross section = electron energy loss times cross section per unit energy + * + * stopping power = energy loss per unit length + * + * The lookup tables are generated from the screened Bethe-Heitler cross section. See e.g.: + * Salvat, F., et al. "Monte Carlo simulation of bremsstrahlung emission by electrons." + * Radiation Physics and Chemistry 75.10 (2006): 1201-1219. + */ + struct ScaledSpectrum + { + public: + using LookupTableFunctor = detail::LookupTableFunctor; + + private: + using MyBuf = boost::shared_ptr>; + MyBuf dBufScaledSpectrum; + MyBuf dBufStoppingPower; + + /** differential cross section: cross section per unit energy + * + * This is the screened Bethe-Heitler cross section. See e.g.: + * Salvat, F., et al. "Monte Carlo simulation of bremsstrahlung emission by electrons." + * Radiation Physics and Chemistry 75.10 (2006): 1201-1219. + * + * @param Ekin kinetic electron energy + * @param kappa energy loss normalized to Ekin + * @param targetZ atomic number of the target material + */ + HINLINE float_64 dcs(const float_64 Ekin, const float_64 kappa, const float_64 targetZ) const; + + /** differential cross section times energy loss + */ + struct StoppingPowerIntegrand + { + const float_64 Ekin; + const float_64 targetZ; + const ScaledSpectrum& scaledSpectrum; + + StoppingPowerIntegrand( + const float_64 Ekin, + const ScaledSpectrum& scaledSpectrum, + const float_64 targetZ) + : Ekin(Ekin) + , scaledSpectrum(scaledSpectrum) + , targetZ(targetZ) + { + } + + template + void operator()(const T_State& x, T_State& dxdW, T_W W) const + { + dxdW[0] = this->scaledSpectrum.dcs(this->Ekin, W / this->Ekin, this->targetZ) * W; + } + }; + + public: + /** Generate lookup tables + * + * @param targetZ atomic number of the target material + */ + HINLINE void init(const float_64 targetZ); + + /** Return a functor representing the scaled differential cross section + * + * scaled differential cross section = electron energy loss times cross section per unit energy + */ + HINLINE LookupTableFunctor getScaledSpectrumFunctor() const; + + /** Return a functor representing the stopping power + * + * stopping power = energy loss per unit length + */ + HINLINE LookupTableFunctor getStoppingPowerFunctor() const; + }; + + + /** Creates a `ScaledSpectrum` instance for a given electron species + * and stores it in a map object. + * + * This functor is called from Simulation::init() to generate lookup tables. + * + * @tparam T_ElectronSpecies type or name as boost::mpl::string of the electron species + */ + template + struct FillScaledSpectrumMap + { + using ElectronSpecies + = pmacc::particles::meta::FindByNameOrType_t; + + using IonSpecies = pmacc::particles::meta::FindByNameOrType_t< + VectorAllSpecies, + typename pmacc::particles::traits::ResolveAliasFromSpecies>:: + type>; + + template + void operator()(T_Map& map) const + { + const float_X targetZ = GetAtomicNumbers::type::numberOfProtons; + + if(map.count(targetZ) == 0) + { + ScaledSpectrum scaledSpectrum; + scaledSpectrum.init(static_cast(targetZ)); + map[targetZ] = scaledSpectrum; + } + } + }; + + } // namespace bremsstrahlung + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/bremsstrahlung/ScaledSpectrum.tpp b/include/picongpu/particles/bremsstrahlung/ScaledSpectrum.tpp index 884c719d01..01a0d3a749 100644 --- a/include/picongpu/particles/bremsstrahlung/ScaledSpectrum.tpp +++ b/include/picongpu/particles/bremsstrahlung/ScaledSpectrum.tpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -23,205 +23,207 @@ namespace picongpu { -namespace particles -{ -namespace bremsstrahlung -{ - -namespace detail -{ - - -/** constructor - * - * @param linInterpCursor - */ -HDINLINE LookupTableFunctor::LookupTableFunctor(LinInterpCursor linInterpCursor) - : linInterpCursor(linInterpCursor) -{ - float_X const lnEMinTmp( electron::MIN_ENERGY ); - float_X const lnEMaxTmp( electron::MAX_ENERGY ); - this->lnEMin = math::log( lnEMinTmp ); - this->lnEMax = math::log( lnEMaxTmp); -} - -/** scaled differential cross section - * - * @param Ekin kinetic energy of the incident electron - * @param kappa energy loss normalized to Ekin - */ -HDINLINE float_X LookupTableFunctor::operator()(const float_X Ekin, const float_X kappa) const -{ - const float_X lnE = math::log(Ekin); - - const float_X binE = (lnE - this->lnEMin) / (this->lnEMax - this->lnEMin) * static_cast(electron::NUM_SAMPLES_EKIN - 1); - // in the low-energy limit Bremsstrahlung is not taken into account - if(binE < float_X(0.0)) - return float_X(0.0); - const float_X binKappa = kappa * static_cast(electron::NUM_SAMPLES_KAPPA - 1); - - if (picLog::log_level & picLog::CRITICAL::lvl) + namespace particles { - if(Ekin < electron::MIN_ENERGY || Ekin > electron::MAX_ENERGY) + namespace bremsstrahlung { - const float_64 Ekin_SI = Ekin * UNIT_ENERGY; - printf("[Bremsstrahlung] error lookup table: Ekin=%g MeV is out of range.\n", - float_X(Ekin_SI * UNITCONV_Joule_to_keV * float_X(1.0e-3))); - } - if(kappa < float_X(0.0) || kappa > float_X(1.0)) - printf("[Bremsstrahlung] error lookup table: kappa=%f is out of range.\n", - kappa); - } - - return this->linInterpCursor[float2_X(binE, binKappa)]; -} - - -} // namespace detail - - - -/** differential cross section: cross section per unit energy - * - * This is the screened Bethe-Heitler cross section. See e.g.: - * Salvat, F., et al. "Monte Carlo simulation of bremsstrahlung emission by electrons." - * Radiation Physics and Chemistry 75.10 (2006): 1201-1219. - * - * @param Ekin kinetic electron energy - * @param kappa energy loss normalized to Ekin - */ -float_64 ScaledSpectrum::dcs(const float_64 Ekin, const float_64 kappa, const float_64 targetZ) const -{ - constexpr float_64 pi = pmacc::algorithms::math::Pi::value; - constexpr float_64 bohrRadius = pi * 4.0 * EPS0 * HBAR * HBAR / - (float_64(ELECTRON_MASS) * ELECTRON_CHARGE * ELECTRON_CHARGE); - constexpr float_64 classicalElRadius = float_64(ELECTRON_CHARGE*ELECTRON_CHARGE) / (pi * 4.0 * EPS0 * ELECTRON_MASS * SPEED_OF_LIGHT*SPEED_OF_LIGHT); - constexpr float_64 fineStructureConstant = float_64(ELECTRON_CHARGE*ELECTRON_CHARGE) / (pi * 4.0 * EPS0 * HBAR * SPEED_OF_LIGHT); - - constexpr float_64 c = SPEED_OF_LIGHT; - constexpr float_64 c2 = c*c; - constexpr float_64 m_e = ELECTRON_MASS; - constexpr float_64 r_e = classicalElRadius; - constexpr float_64 alpha = fineStructureConstant; - - const float_64 W = kappa * Ekin; - const float_64 eps = W / (Ekin + m_e * c2); - const float_64 R = math::pow(targetZ, float_64(-1.0/3.0)) * bohrRadius; - const float_64 gamma = Ekin / (m_e * c2) + float_64(1.0); - const float_64 b = R * m_e * c / HBAR / (float_64(2.0) * gamma) * eps / (float_64(1.0) - eps); - - const float_64 phi_1 = float_64(4.0) * math::log(R * m_e * c / HBAR) + float_64(2.0) - float_64(2.0) * math::log(float_64(1.0) + b*b) - - float_64(4.0) * b * math::atan(float_64(1.0) / b); - const float_64 phi_2 = float_64(4.0) * math::log(R * m_e * c / HBAR) + float_64(7.0) / float_64(3.0) - float_64(2.0) * math::log(float_64(1.0) + b*b) - - float_64(6.0) * b * math::atan(float_64(1.0) / b) - - b*b * (float_64(4.0) - float_64(4.0) * b * math::atan(float_64(1.0) / b) - float_64(3.0) * math::log(float_64(1.0) + float_64(1.0) / (b*b))); - - return r_e*r_e * alpha * targetZ*targetZ / W * (eps*eps * phi_1 + float_64(4.0) / float_64(3.0) * (float_64(1.0) - eps) * phi_2); -} - - - -void ScaledSpectrum::init(const float_64 targetZ) -{ - namespace odeint = boost::numeric::odeint; - - // there is a margin of one cell to make the linear interpolation valid for border cells. - this->dBufScaledSpectrum = MyBuf( - new pmacc::container::DeviceBuffer( - electron::NUM_SAMPLES_EKIN + 1, - electron::NUM_SAMPLES_KAPPA + 1)); - this->dBufStoppingPower = MyBuf( - new pmacc::container::DeviceBuffer( - electron::NUM_SAMPLES_EKIN + 1, - electron::NUM_SAMPLES_KAPPA + 1)); - - pmacc::container::HostBuffer hBufScaledSpectrum(this->dBufScaledSpectrum->size()); - pmacc::container::HostBuffer hBufStoppingPower(this->dBufStoppingPower->size()); - hBufScaledSpectrum.assign(float_X(0.0)); - hBufStoppingPower.assign(float_X(0.0)); - - auto curScaledSpectrum = hBufScaledSpectrum.origin(); - auto curStoppingPower = hBufStoppingPower.origin(); - - const float_64 lnEMin = math::log(electron::MIN_ENERGY); - const float_64 lnEMax = math::log(electron::MAX_ENERGY); - - using state_type = boost::array; - - for(uint32_t EkinIdx = 0; EkinIdx < electron::NUM_SAMPLES_EKIN; EkinIdx++) - { - for(uint32_t kappaIdx = 0; kappaIdx < electron::NUM_SAMPLES_KAPPA; kappaIdx++) - { - float_64 kappa = static_cast(kappaIdx) / - static_cast(electron::NUM_SAMPLES_KAPPA - 1); - if(kappa == 0.0) - kappa = electron::MIN_KAPPA; - - const float_64 lnE_norm = static_cast(EkinIdx) / - static_cast(electron::NUM_SAMPLES_EKIN - 1); - const float_64 Ekin = math::exp(lnEMin + (lnEMax - lnEMin) * lnE_norm); - - *curScaledSpectrum(EkinIdx, kappaIdx) = Ekin * kappa * static_cast(this->dcs(Ekin, kappa, targetZ)); - - state_type integral_result = {0.0}; - const float_64 lowerLimit = electron::MIN_KAPPA * Ekin; - const float_64 upperLimit = kappa * Ekin; - const float_64 stepwidth = upperLimit / electron::NUM_STEPS_STOPPING_POWER_INTERGRAL; - StoppingPowerIntegrand integrand(Ekin, *this, targetZ); - odeint::integrate(integrand, integral_result, lowerLimit, upperLimit, stepwidth); - *curStoppingPower(EkinIdx, kappaIdx) = static_cast(integral_result[0]); - - // check for nans - if(*curScaledSpectrum(EkinIdx, kappaIdx) != *curScaledSpectrum(EkinIdx, kappaIdx)) + namespace detail { - const float_64 Ekin_SI = Ekin * UNIT_ENERGY; - const float_64 Ekin_MeV = Ekin_SI * UNITCONV_Joule_to_keV / 1.0e3; - std::stringstream errMsg; - errMsg << "[Bremsstrahlung] lookup table (scaled spectrum) has NaN-entry at Ekin = " - << Ekin_MeV << " MeV, kappa = " << kappa << std::endl; - throw std::runtime_error(errMsg.str().c_str()); - } - if(*curStoppingPower(EkinIdx, kappaIdx) != *curStoppingPower(EkinIdx, kappaIdx)) + /** constructor + * + * @param linInterpCursor + */ + HDINLINE LookupTableFunctor::LookupTableFunctor(LinInterpCursor linInterpCursor) + : linInterpCursor(linInterpCursor) + { + float_X const lnEMinTmp(electron::MIN_ENERGY); + float_X const lnEMaxTmp(electron::MAX_ENERGY); + this->lnEMin = math::log(lnEMinTmp); + this->lnEMax = math::log(lnEMaxTmp); + } + + /** scaled differential cross section + * + * @param Ekin kinetic energy of the incident electron + * @param kappa energy loss normalized to Ekin + */ + HDINLINE float_X LookupTableFunctor::operator()(const float_X Ekin, const float_X kappa) const + { + const float_X lnE = math::log(Ekin); + + const float_X binE = (lnE - this->lnEMin) / (this->lnEMax - this->lnEMin) + * static_cast(electron::NUM_SAMPLES_EKIN - 1); + // in the low-energy limit Bremsstrahlung is not taken into account + if(binE < float_X(0.0)) + return float_X(0.0); + const float_X binKappa = kappa * static_cast(electron::NUM_SAMPLES_KAPPA - 1); + + if(picLog::log_level & picLog::CRITICAL::lvl) + { + if(Ekin < electron::MIN_ENERGY || Ekin > electron::MAX_ENERGY) + { + const float_64 Ekin_SI = Ekin * UNIT_ENERGY; + printf( + "[Bremsstrahlung] error lookup table: Ekin=%g MeV is out of range.\n", + float_X(Ekin_SI * UNITCONV_Joule_to_keV * float_X(1.0e-3))); + } + if(kappa < float_X(0.0) || kappa > float_X(1.0)) + printf("[Bremsstrahlung] error lookup table: kappa=%f is out of range.\n", kappa); + } + + return this->linInterpCursor[float2_X(binE, binKappa)]; + } + + + } // namespace detail + + + /** differential cross section: cross section per unit energy + * + * This is the screened Bethe-Heitler cross section. See e.g.: + * Salvat, F., et al. "Monte Carlo simulation of bremsstrahlung emission by electrons." + * Radiation Physics and Chemistry 75.10 (2006): 1201-1219. + * + * @param Ekin kinetic electron energy + * @param kappa energy loss normalized to Ekin + */ + float_64 ScaledSpectrum::dcs(const float_64 Ekin, const float_64 kappa, const float_64 targetZ) const { - const float_64 Ekin_SI = Ekin * UNIT_ENERGY; - const float_64 Ekin_MeV = Ekin_SI * UNITCONV_Joule_to_keV / 1.0e3; - std::stringstream errMsg; - errMsg << "[Bremsstrahlung] lookup table (stopping power) has NaN-entry at Ekin = " - << Ekin_MeV << " MeV, kappa = " << kappa << std::endl; - throw std::runtime_error(errMsg.str().c_str()); + constexpr float_64 pi = pmacc::math::Pi::value; + constexpr float_64 bohrRadius + = pi * 4.0 * EPS0 * HBAR * HBAR / (float_64(ELECTRON_MASS) * ELECTRON_CHARGE * ELECTRON_CHARGE); + constexpr float_64 classicalElRadius = float_64(ELECTRON_CHARGE * ELECTRON_CHARGE) + / (pi * 4.0 * EPS0 * ELECTRON_MASS * SPEED_OF_LIGHT * SPEED_OF_LIGHT); + constexpr float_64 fineStructureConstant + = float_64(ELECTRON_CHARGE * ELECTRON_CHARGE) / (pi * 4.0 * EPS0 * HBAR * SPEED_OF_LIGHT); + + constexpr float_64 c = SPEED_OF_LIGHT; + constexpr float_64 c2 = c * c; + constexpr float_64 m_e = ELECTRON_MASS; + constexpr float_64 r_e = classicalElRadius; + constexpr float_64 alpha = fineStructureConstant; + + const float_64 W = kappa * Ekin; + const float_64 eps = W / (Ekin + m_e * c2); + const float_64 R = math::pow(targetZ, float_64(-1.0 / 3.0)) * bohrRadius; + const float_64 gamma = Ekin / (m_e * c2) + float_64(1.0); + const float_64 b = R * m_e * c / HBAR / (float_64(2.0) * gamma) * eps / (float_64(1.0) - eps); + + const float_64 phi_1 = float_64(4.0) * math::log(R * m_e * c / HBAR) + float_64(2.0) + - float_64(2.0) * math::log(float_64(1.0) + b * b) + - float_64(4.0) * b * math::atan(float_64(1.0) / b); + const float_64 phi_2 = float_64(4.0) * math::log(R * m_e * c / HBAR) + float_64(7.0) / float_64(3.0) + - float_64(2.0) * math::log(float_64(1.0) + b * b) + - float_64(6.0) * b * math::atan(float_64(1.0) / b) + - b * b + * (float_64(4.0) - float_64(4.0) * b * math::atan(float_64(1.0) / b) + - float_64(3.0) * math::log(float_64(1.0) + float_64(1.0) / (b * b))); + + return r_e * r_e * alpha * targetZ * targetZ / W + * (eps * eps * phi_1 + float_64(4.0) / float_64(3.0) * (float_64(1.0) - eps) * phi_2); } - } - } - *this->dBufScaledSpectrum = hBufScaledSpectrum; - *this->dBufStoppingPower = hBufStoppingPower; -} -/** Return a functor representing the scaled differential cross section - * - * scaled differential cross section = electron energy loss times cross section per unit energy - */ -detail::LookupTableFunctor ScaledSpectrum::getScaledSpectrumFunctor() const -{ - LookupTableFunctor::LinInterpCursor linInterpCursor = - pmacc::cursor::tools::LinearInterp()(this->dBufScaledSpectrum->origin()); + void ScaledSpectrum::init(const float_64 targetZ) + { + namespace odeint = boost::numeric::odeint; + + // there is a margin of one cell to make the linear interpolation valid for border cells. + this->dBufScaledSpectrum = MyBuf(new pmacc::container::DeviceBuffer( + electron::NUM_SAMPLES_EKIN + 1, + electron::NUM_SAMPLES_KAPPA + 1)); + this->dBufStoppingPower = MyBuf(new pmacc::container::DeviceBuffer( + electron::NUM_SAMPLES_EKIN + 1, + electron::NUM_SAMPLES_KAPPA + 1)); + + pmacc::container::HostBuffer hBufScaledSpectrum(this->dBufScaledSpectrum->size()); + pmacc::container::HostBuffer hBufStoppingPower(this->dBufStoppingPower->size()); + hBufScaledSpectrum.assign(float_X(0.0)); + hBufStoppingPower.assign(float_X(0.0)); + + auto curScaledSpectrum = hBufScaledSpectrum.origin(); + auto curStoppingPower = hBufStoppingPower.origin(); + + const float_64 lnEMin = math::log(electron::MIN_ENERGY); + const float_64 lnEMax = math::log(electron::MAX_ENERGY); + + using state_type = boost::array; + + for(uint32_t EkinIdx = 0; EkinIdx < electron::NUM_SAMPLES_EKIN; EkinIdx++) + { + for(uint32_t kappaIdx = 0; kappaIdx < electron::NUM_SAMPLES_KAPPA; kappaIdx++) + { + float_64 kappa + = static_cast(kappaIdx) / static_cast(electron::NUM_SAMPLES_KAPPA - 1); + if(kappa == 0.0) + kappa = electron::MIN_KAPPA; + + const float_64 lnE_norm + = static_cast(EkinIdx) / static_cast(electron::NUM_SAMPLES_EKIN - 1); + const float_64 Ekin = math::exp(lnEMin + (lnEMax - lnEMin) * lnE_norm); + + *curScaledSpectrum(EkinIdx, kappaIdx) + = Ekin * kappa * static_cast(this->dcs(Ekin, kappa, targetZ)); + + state_type integral_result = {0.0}; + const float_64 lowerLimit = electron::MIN_KAPPA * Ekin; + const float_64 upperLimit = kappa * Ekin; + const float_64 stepwidth = upperLimit / electron::NUM_STEPS_STOPPING_POWER_INTERGRAL; + StoppingPowerIntegrand integrand(Ekin, *this, targetZ); + odeint::integrate(integrand, integral_result, lowerLimit, upperLimit, stepwidth); + *curStoppingPower(EkinIdx, kappaIdx) = static_cast(integral_result[0]); + + // check for nans + if(*curScaledSpectrum(EkinIdx, kappaIdx) != *curScaledSpectrum(EkinIdx, kappaIdx)) + { + const float_64 Ekin_SI = Ekin * UNIT_ENERGY; + const float_64 Ekin_MeV = Ekin_SI * UNITCONV_Joule_to_keV / 1.0e3; + std::stringstream errMsg; + errMsg << "[Bremsstrahlung] lookup table (scaled spectrum) has NaN-entry at Ekin = " + << Ekin_MeV << " MeV, kappa = " << kappa << std::endl; + throw std::runtime_error(errMsg.str().c_str()); + } + if(*curStoppingPower(EkinIdx, kappaIdx) != *curStoppingPower(EkinIdx, kappaIdx)) + { + const float_64 Ekin_SI = Ekin * UNIT_ENERGY; + const float_64 Ekin_MeV = Ekin_SI * UNITCONV_Joule_to_keV / 1.0e3; + std::stringstream errMsg; + errMsg << "[Bremsstrahlung] lookup table (stopping power) has NaN-entry at Ekin = " + << Ekin_MeV << " MeV, kappa = " << kappa << std::endl; + throw std::runtime_error(errMsg.str().c_str()); + } + } + } + + *this->dBufScaledSpectrum = hBufScaledSpectrum; + *this->dBufStoppingPower = hBufStoppingPower; + } - return LookupTableFunctor(linInterpCursor); -} + /** Return a functor representing the scaled differential cross section + * + * scaled differential cross section = electron energy loss times cross section per unit energy + */ + detail::LookupTableFunctor ScaledSpectrum::getScaledSpectrumFunctor() const + { + LookupTableFunctor::LinInterpCursor linInterpCursor + = pmacc::cursor::tools::LinearInterp()(this->dBufScaledSpectrum->origin()); -/** Return a functor representing the stopping power - * - * stopping power = energy loss per unit length - */ -detail::LookupTableFunctor ScaledSpectrum::getStoppingPowerFunctor() const -{ - LookupTableFunctor::LinInterpCursor linInterpCursor = - pmacc::cursor::tools::LinearInterp()(this->dBufStoppingPower->origin()); + return LookupTableFunctor(linInterpCursor); + } - return LookupTableFunctor(linInterpCursor); -} + /** Return a functor representing the stopping power + * + * stopping power = energy loss per unit length + */ + detail::LookupTableFunctor ScaledSpectrum::getStoppingPowerFunctor() const + { + LookupTableFunctor::LinInterpCursor linInterpCursor + = pmacc::cursor::tools::LinearInterp()(this->dBufStoppingPower->origin()); + + return LookupTableFunctor(linInterpCursor); + } -} // namespace bremsstrahlung -} // namespace particles + } // namespace bremsstrahlung + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/creation/creation.hpp b/include/picongpu/particles/creation/creation.hpp index 12d7d7c2a8..65adc80457 100644 --- a/include/picongpu/particles/creation/creation.hpp +++ b/include/picongpu/particles/creation/creation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -28,57 +28,59 @@ namespace picongpu { + namespace particles + { + namespace creation + { + /** Calls the `createParticlesKernel` kernel to create new particles. + * + * @param sourceSpecies species from which new particles are created + * @param targetSpecies species of the created particles + * @param particleCreator functor that defines the particle creation + * @param cellDesc mapping description + * + * `particleCreator` must define: `init()`, `numNewParticles()` and `operator()()` + * \see `PhotonCreator.hpp` for a further description. + */ + template< + typename T_SourceSpecies, + typename T_TargetSpecies, + typename T_ParticleCreator, + typename T_CellDescription> + void createParticlesFromSpecies( + T_SourceSpecies& sourceSpecies, + T_TargetSpecies& targetSpecies, + T_ParticleCreator particleCreator, + T_CellDescription cellDesc) + { + using SuperCellSize = typename MappingDesc::SuperCellSize; + const pmacc::math::Int coreBorderGuardSuperCells = cellDesc.getGridSuperCells(); + const pmacc::math::Int guardSuperCells = cellDesc.getGuardingSuperCells(); + const pmacc::math::Int coreBorderSuperCells = coreBorderGuardSuperCells - 2 * guardSuperCells; -namespace particles -{ - -namespace creation -{ - -/** Calls the `createParticlesKernel` kernel to create new particles. - * - * @param sourceSpecies species from which new particles are created - * @param targetSpecies species of the created particles - * @param particleCreator functor that defines the particle creation - * @param cellDesc mapping description - * - * `particleCreator` must define: `init()`, `numNewParticles()` and `operator()()` - * \see `PhotonCreator.hpp` for a further description. - */ -template -void createParticlesFromSpecies(T_SourceSpecies& sourceSpecies, - T_TargetSpecies& targetSpecies, - T_ParticleCreator particleCreator, - T_CellDescription cellDesc) -{ - using SuperCellSize = typename MappingDesc::SuperCellSize; - const pmacc::math::Int coreBorderGuardSuperCells = cellDesc.getGridSuperCells(); - const pmacc::math::Int guardSuperCells = cellDesc.getGuardingSuperCells(); - const pmacc::math::Int coreBorderSuperCells = coreBorderGuardSuperCells - 2 * guardSuperCells; - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; - /* Functor holding the actual generic particle creation kernel */ - auto createParticlesKernel = make_CreateParticlesKernel< numWorkers >( - sourceSpecies.getDeviceParticlesBox(), - targetSpecies.getDeviceParticlesBox(), - particleCreator, - guardSuperCells); + /* Functor holding the actual generic particle creation kernel */ + auto createParticlesKernel = make_CreateParticlesKernel( + sourceSpecies.getDeviceParticlesBox(), + targetSpecies.getDeviceParticlesBox(), + particleCreator, + guardSuperCells); - /* This zone represents the core+border area with guard offset in unit of cells */ - const zone::SphericZone zone( - static_cast >(coreBorderSuperCells * SuperCellSize::toRT()), - guardSuperCells * SuperCellSize::toRT()); + /* This zone represents the core+border area with guard offset in unit of cells */ + const zone::SphericZone zone( + static_cast>(coreBorderSuperCells * SuperCellSize::toRT()), + guardSuperCells * SuperCellSize::toRT()); - algorithm::kernel::ForeachLockstep foreach; - foreach(zone, createParticlesKernel, cursor::make_MultiIndexCursor()); + algorithm::kernel::ForeachLockstep foreach; + foreach(zone, createParticlesKernel, cursor::make_MultiIndexCursor()) + ; - /* Make sure to leave no gaps in newly created frames */ - targetSpecies.fillAllGaps(); -} + /* Make sure to leave no gaps in newly created frames */ + targetSpecies.fillAllGaps(); + } -} // namespace creation -} // namespace particles + } // namespace creation + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/creation/creation.kernel b/include/picongpu/particles/creation/creation.kernel index 81e5fa8960..21224ebd43 100644 --- a/include/picongpu/particles/creation/creation.kernel +++ b/include/picongpu/particles/creation/creation.kernel @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten, Axel Huebl, Heiko Burau, Rene Widera, +/* Copyright 2015-2021 Marco Garten, Axel Huebl, Heiko Burau, Rene Widera, * Richard Pausch, Felix Schmitt * * This file is part of PIConGPU. @@ -40,429 +40,309 @@ namespace picongpu { -namespace particles -{ -namespace creation -{ - - /** Functor with main kernel for particle creation - * - * - maps the frame dimensions and gathers the particle boxes - * - contains / calls the Creator - * - * @tparam T_numWorkers number of workers - * @tparam T_ParBoxSource container of the source species - * @tparam T_ParBoxTarget container of the target species - * @tparam T_ParticleCreator type of the particle creation functor - */ - template< - uint32_t T_numWorkers, - typename T_ParBoxSource, - typename T_ParBoxTarget, - typename T_ParticleCreator - > - struct CreateParticlesKernel + namespace particles { - using ParBoxSource = T_ParBoxSource; - using ParBoxTarget = T_ParBoxTarget; - using ParticleCreator = T_ParticleCreator; - - ParBoxSource sourceBox; - ParBoxTarget targetBox; - ParticleCreator particleCreator; - DataSpace< simDim > const guardSuperCells; - - CreateParticlesKernel( - ParBoxSource const & sourceBox, - ParBoxTarget const & targetBox, - ParticleCreator const & particleCreator, - DataSpace< simDim > const guardSuperCells - ) : - sourceBox( sourceBox ), - targetBox( targetBox ), - particleCreator( particleCreator ), - guardSuperCells( guardSuperCells ) - { } - - /** Goes over all frames and calls `ParticleCreator` - * - * @tparam T_Acc alpaka accelerator type - * - * @param blockCell n-dim. block offset (in cells) relative to the origin - * of the local domain plus guarding cells - */ - template< typename T_Acc > - DINLINE void operator( )( - T_Acc const & acc, - pmacc::math::Int< simDim > const & blockCell - ) + namespace creation { + /** Functor with main kernel for particle creation + * + * - maps the frame dimensions and gathers the particle boxes + * - contains / calls the Creator + * + * @tparam T_numWorkers number of workers + * @tparam T_ParBoxSource container of the source species + * @tparam T_ParBoxTarget container of the target species + * @tparam T_ParticleCreator type of the particle creation functor + */ + template< + uint32_t T_numWorkers, + typename T_ParBoxSource, + typename T_ParBoxTarget, + typename T_ParticleCreator> + struct CreateParticlesKernel + { + using ParBoxSource = T_ParBoxSource; + using ParBoxTarget = T_ParBoxTarget; + using ParticleCreator = T_ParticleCreator; + + ParBoxSource sourceBox; + ParBoxTarget targetBox; + ParticleCreator particleCreator; + DataSpace const guardSuperCells; + + CreateParticlesKernel( + ParBoxSource const& sourceBox, + ParBoxTarget const& targetBox, + ParticleCreator const& particleCreator, + DataSpace const guardSuperCells) + : sourceBox(sourceBox) + , targetBox(targetBox) + , particleCreator(particleCreator) + , guardSuperCells(guardSuperCells) + { + } - using namespace mappings::threads; + /** Goes over all frames and calls `ParticleCreator` + * + * @tparam T_Acc alpaka accelerator type + * + * @param blockCell n-dim. block offset (in cells) relative to the origin + * of the local domain plus guarding cells + */ + template + DINLINE void operator()(T_Acc const& acc, pmacc::math::Int const& blockCell) + { + using namespace mappings::threads; - constexpr uint32_t numWorkers = T_numWorkers; + constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; - /* multi-dimensional offset vector from local domain origin on GPU in units of super cells */ - pmacc::math::Int< simDim > const block = blockCell / SuperCellSize::toRT( ); + /* multi-dimensional offset vector from local domain origin on GPU in units of super cells */ + pmacc::math::Int const block = blockCell / SuperCellSize::toRT(); - // relative offset to the origin of the local domain (without any guarding cells) - pmacc::math::Int const supercellCellOffset = blockCell - this->guardSuperCells * SuperCellSize::toRT( ); + // relative offset to the origin of the local domain (without any guarding cells) + pmacc::math::Int const supercellCellOffset + = blockCell - this->guardSuperCells * SuperCellSize::toRT(); - /* "particle box" : container/iterator where the particles live in - * and where one can get the frame in a super cell from - */ - using SourceFramePtr = typename ParBoxSource::FramePtr; - using TargetFramePtr = typename ParBoxTarget::FramePtr; + /* "particle box" : container/iterator where the particles live in + * and where one can get the frame in a super cell from + */ + using SourceFramePtr = typename ParBoxSource::FramePtr; + using TargetFramePtr = typename ParBoxTarget::FramePtr; - /* for not mixing operations::assign up with the nvidia functor assign */ - namespace partOp = pmacc::particles::operations; + /* for not mixing operations::assign up with the nvidia functor assign */ + namespace partOp = pmacc::particles::operations; - constexpr lcellId_t maxParticlesInFrame = pmacc::math::CT::volume< SuperCellSize >::type::value; + constexpr lcellId_t maxParticlesInFrame = pmacc::math::CT::volume::type::value; - /* use two frames to allow that all virtual workers can create new particles - * even if newFrameFillLvl is not zero. - */ - using FrameArray = memory::Array< - TargetFramePtr, - 2 - >; - - PMACC_SMEM( - acc, - targetFrames, - FrameArray - ); - - // find last frame in super cell - SourceFramePtr sourceFrame( sourceBox.getLastFrame( block ) ); - - // end method if we have no frames - if( !sourceFrame.isValid( ) ) - return; - - using ParticleDomCfg = IdxConfig< - maxParticlesInFrame, - numWorkers - >; - - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); - - // initialize the collective part of the functor (e.g. field caching) - particleCreator.collectiveInit( - acc, - blockCell, - WorkerCfg< numWorkers >{ workerIdx } - ); - - memory::CtxArray< - ParticleCreator, - ParticleDomCfg - > - particleCreatorCtx{ }; - - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - // cell index within the superCell - DataSpace< simDim > const cellIdx = DataSpaceOperations< simDim >::template map< SuperCellSize >( linearIdx ); - - // cell offset with respect to the local domain origin (without any guarding cells - pmacc::math::Int< simDim > const localCellIndex = supercellCellOffset + cellIdx; - - // create a copy of the functor for each virtual worker - particleCreatorCtx[ idx ] = particleCreator; - - // init particle creator functor for each virtual worker - particleCreatorCtx[ idx ].init( - acc, - blockCell, - linearIdx, - localCellIndex - ); - } - ); + /* use two frames to allow that all virtual workers can create new particles + * even if newFrameFillLvl is not zero. + */ + using FrameArray = memory::Array; - /* Declare counter in shared memory that will later tell the current fill level or - * occupation of the newly created target frames. - */ - PMACC_SMEM( - acc, - newFrameFillLvl, - int - ); - - ForEachIdx< - IdxConfig< - 2, - numWorkers - > - > onlyMasters{ workerIdx }; - - // Declare local variable oldFrameFillLvl for each thread - int oldFrameFillLvl; - - /* Initialize local (register) counter for each thread - * - describes how many new macro target particles should be created - */ - memory::CtxArray< - uint32_t, - ParticleDomCfg - > - numNewParticlesCtx( 0 ); - - // Master initializes the frame fill level with 0 - onlyMasters( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - if( linearIdx == 0 ) - newFrameFillLvl = 0; - targetFrames[ linearIdx ] = nullptr; - } - ); + PMACC_SMEM(acc, targetFrames, FrameArray); - __syncthreads( ); + // find last frame in super cell + SourceFramePtr sourceFrame(sourceBox.getLastFrame(block)); - /* move over source species frames and call particleCreator - * frames are worked on in backwards order to avoid asking if there is another frame - * --> performance - * Because all frames are completely filled except the last and apart from that last frame - * one wants to make sure that all threads are working and every frame is worked on. - */ - while( sourceFrame.isValid( ) ) - { + // end method if we have no frames + if(!sourceFrame.isValid()) + return; - memory::CtxArray< - bool, - ParticleDomCfg - > - isParticleCtx( - workerIdx, - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - return static_cast< bool >( sourceFrame[ linearIdx ][ multiMask_ ] ); - } - ); - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - bool const isParticle = static_cast< bool >( sourceFrame[ linearIdx ][ multiMask_ ] ); - numNewParticlesCtx[ idx ] = 0u; - if( isParticle ) - /* ask the particle creator functor how many new particles to create. */ - numNewParticlesCtx[ idx ] = particleCreatorCtx[ idx ].numNewParticles( - acc, - *sourceFrame, - linearIdx - ); - } - ); + using ParticleDomCfg = IdxConfig; - __syncthreads( ); + ForEachIdx forEachParticle(workerIdx); - /* always true while-loop over all particles inside source frame until each thread breaks out individually - * - * **Attention**: Speaking of 1st and 2nd frame only may seem odd. - * The question might arise what happens if more target particles are created than would fit into two frames. - * Well, multi-particle creation during a time step is accounted for. The number of new target particles is - * determined inside the outer loop over the valid frames while in the inner loop each thread can create only ONE - * new macro target particle. But the loop repeats until each thread has created all the target particles needed in the time step. - */ - while( true ) - { - /* < INIT > - * - targetParId is initialized as -1 (meaning: invalid) - * - (local) oldFrameFillLvl set equal to (shared) newFrameFillLvl for each thread - * --> each thread remembers the old "counter" - */ + // initialize the collective part of the functor (e.g. field caching) + particleCreator.collectiveInit(acc, blockCell, WorkerCfg{workerIdx}); - /* Declare local target particle ID - * - describes at which position in the new frame the new target particle is to be created - */ - memory::CtxArray< - int, - ParticleDomCfg - > - targetParIdCtx( -1 ); + memory::CtxArray particleCreatorCtx{}; - oldFrameFillLvl = newFrameFillLvl; + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + // cell index within the superCell + DataSpace const cellIdx + = DataSpaceOperations::template map(linearIdx); - __syncthreads( ); + // cell offset with respect to the local domain origin (without any guarding cells + pmacc::math::Int const localCellIndex = supercellCellOffset + cellIdx; - /* < CHECK & ADD > - * - if a thread wants to create target particles in each cycle it can do that only once - * and before that it atomically adds to the shared counter and uses the current - * value as targetParId in the new frame - */ - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if( numNewParticlesCtx[ idx ] > 0u ) - targetParIdCtx[ idx ] = nvidia::atomicAllInc( - acc, - &newFrameFillLvl, - ::alpaka::hierarchy::Threads{} - ); - } - ); + // create a copy of the functor for each virtual worker + particleCreatorCtx[idx] = particleCreator; - __syncthreads( ); + // init particle creator functor for each virtual worker + particleCreatorCtx[idx].init(acc, blockCell, linearIdx, localCellIndex); + }); - /* < EXIT? > - * - if the counter hasn't changed all threads break out of the loop + /* Declare counter in shared memory that will later tell the current fill level or + * occupation of the newly created target frames. */ - if( oldFrameFillLvl == newFrameFillLvl ) - break; + PMACC_SMEM(acc, newFrameFillLvl, int); - __syncthreads( ); + ForEachIdx> onlyMasters{workerIdx}; - /* < NEW FRAME > - * - if there is no frame, yet, the master will create a new target particle frame - * and attach it to the back of the frame list - */ - onlyMasters( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - uint32_t const numFramesNeeded = ( newFrameFillLvl + maxParticlesInFrame - 1u ) / maxParticlesInFrame; - if( linearIdx < numFramesNeeded && !targetFrames[ linearIdx ].isValid( ) ) - { - targetFrames[ linearIdx ] = targetBox.getEmptyFrame( ); - targetBox.setAsLastFrame( - acc, - targetFrames[ linearIdx ], - block - ); - } - } - ); - - __syncthreads( ); + // Declare local variable oldFrameFillLvl for each thread + int oldFrameFillLvl; - /* < CREATE > - * - all target particles were created - * - internal particle creation counter is decremented by 1 + /* Initialize local (register) counter for each thread + * - describes how many new macro target particles should be created */ - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) + memory::CtxArray numNewParticlesCtx(0); + + // Master initializes the frame fill level with 0 + onlyMasters([&](uint32_t const linearIdx, uint32_t const) { + if(linearIdx == 0) + newFrameFillLvl = 0; + targetFrames[linearIdx] = nullptr; + }); + + cupla::__syncthreads(acc); + + /* move over source species frames and call particleCreator + * frames are worked on in backwards order to avoid asking if there is another frame + * --> performance + * Because all frames are completely filled except the last and apart from that last frame + * one wants to make sure that all threads are working and every frame is worked on. + */ + while(sourceFrame.isValid()) + { + memory::CtxArray isParticleCtx( + workerIdx, + [&](uint32_t const linearIdx, uint32_t const) { + return static_cast(sourceFrame[linearIdx][multiMask_]); + }); + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + bool const isParticle = static_cast(sourceFrame[linearIdx][multiMask_]); + numNewParticlesCtx[idx] = 0u; + if(isParticle) + /* ask the particle creator functor how many new particles to create. */ + numNewParticlesCtx[idx] + = particleCreatorCtx[idx].numNewParticles(acc, *sourceFrame, linearIdx); + }); + + cupla::__syncthreads(acc); + + /* always true while-loop over all particles inside source frame until each thread breaks out + * individually + * + * **Attention**: Speaking of 1st and 2nd frame only may seem odd. + * The question might arise what happens if more target particles are created than would fit + * into two frames. Well, multi-particle creation during a time step is accounted for. The + * number of new target particles is determined inside the outer loop over the valid frames + * while in the inner loop each thread can create only ONE new macro target particle. But the + * loop repeats until each thread has created all the target particles needed in the time step. + */ + while(true) { - uint32_t targetFrameIdx = 0; - if( targetParIdCtx[ idx ] >= maxParticlesInFrame ) - { - targetFrameIdx = 1; - targetParIdCtx[ idx ] -= maxParticlesInFrame; - } - if( 0 <= targetParIdCtx[ idx ] ) - { - // each virtual worker makes the attributes of its source particle accessible - auto sourceParticle = sourceFrame[ linearIdx ]; - // each virtual worker initializes a target particle if one should be created - auto targetParticle = targetFrames[ targetFrameIdx ][ targetParIdCtx[ idx ] ]; - - // create a target particle in the new target particle frame: - particleCreatorCtx[ idx ]( - acc, - sourceParticle, - targetParticle - ); - - numNewParticlesCtx[ idx ] -= 1; + /* < INIT > + * - targetParId is initialized as -1 (meaning: invalid) + * - (local) oldFrameFillLvl set equal to (shared) newFrameFillLvl for each thread + * --> each thread remembers the old "counter" + */ + + /* Declare local target particle ID + * - describes at which position in the new frame the new target particle is to be created + */ + memory::CtxArray targetParIdCtx(-1); + + oldFrameFillLvl = newFrameFillLvl; + + cupla::__syncthreads(acc); + + /* < CHECK & ADD > + * - if a thread wants to create target particles in each cycle it can do that only once + * and before that it atomically adds to the shared counter and uses the current + * value as targetParId in the new frame + */ + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(numNewParticlesCtx[idx] > 0u) + targetParIdCtx[idx] + = nvidia::atomicAllInc(acc, &newFrameFillLvl, ::alpaka::hierarchy::Threads{}); + }); + + cupla::__syncthreads(acc); + + /* < EXIT? > + * - if the counter hasn't changed all threads break out of the loop + */ + if(oldFrameFillLvl == newFrameFillLvl) + break; + + cupla::__syncthreads(acc); + + /* < NEW FRAME > + * - if there is no frame, yet, the master will create a new target particle frame + * and attach it to the back of the frame list + */ + onlyMasters([&](uint32_t const linearIdx, uint32_t const) { + uint32_t const numFramesNeeded + = (newFrameFillLvl + maxParticlesInFrame - 1u) / maxParticlesInFrame; + if(linearIdx < numFramesNeeded && !targetFrames[linearIdx].isValid()) + { + targetFrames[linearIdx] = targetBox.getEmptyFrame(acc); + targetBox.setAsLastFrame(acc, targetFrames[linearIdx], block); } - } - ); - - __syncthreads( ); + }); + + cupla::__syncthreads(acc); + + /* < CREATE > + * - all target particles were created + * - internal particle creation counter is decremented by 1 + */ + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + uint32_t targetFrameIdx = 0; + if(targetParIdCtx[idx] >= maxParticlesInFrame) + { + targetFrameIdx = 1; + targetParIdCtx[idx] -= maxParticlesInFrame; + } + if(0 <= targetParIdCtx[idx]) + { + // each virtual worker makes the attributes of its source particle accessible + auto sourceParticle = sourceFrame[linearIdx]; + // each virtual worker initializes a target particle if one should be created + auto targetParticle = targetFrames[targetFrameIdx][targetParIdCtx[idx]]; - onlyMasters( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - if( linearIdx == 0 && newFrameFillLvl >= maxParticlesInFrame ) - { - newFrameFillLvl -= maxParticlesInFrame; - // copy the not filled frame pointer to the beginning - targetFrames[ 0 ] = targetFrames[ 1 ]; - // reset second frame - targetFrames[ 1 ] = nullptr; - } - } - ); + // create a target particle in the new target particle frame: + particleCreatorCtx[idx](acc, sourceParticle, targetParticle); - __syncthreads( ); - } + numNewParticlesCtx[idx] -= 1; + } + }); + + cupla::__syncthreads(acc); + + onlyMasters([&](uint32_t const linearIdx, uint32_t const) { + if(linearIdx == 0 && newFrameFillLvl >= maxParticlesInFrame) + { + newFrameFillLvl -= maxParticlesInFrame; + // copy the not filled frame pointer to the beginning + targetFrames[0] = targetFrames[1]; + // reset second frame + targetFrames[1] = nullptr; + } + }); - __syncthreads( ); + cupla::__syncthreads(acc); + } - sourceFrame = sourceBox.getPreviousFrame( sourceFrame ); + cupla::__syncthreads(acc); + sourceFrame = sourceBox.getPreviousFrame(sourceFrame); + } + } + }; + + /** Convenient function to create a `CreateParticlesKernel` instance + * + * @tparam T_numWorkers number of workers + * + * @param parBoxSource particle box of the source species + * @param parBoxTarget particle box of the target species + * @param particleCreator particle creation functor + * @param guardSuperCells number of guard cells per dimension + * @return new `CreateParticlesKernel` instance + */ + template< + uint32_t T_numWorkers, + typename T_ParBoxSource, + typename T_ParBoxTarget, + typename T_ParticleCreator> + CreateParticlesKernel + make_CreateParticlesKernel( + T_ParBoxSource const& parBoxSource, + T_ParBoxTarget const& parBoxTarget, + T_ParticleCreator const& particleCreator, + DataSpace const& guardSuperCells) + { + return CreateParticlesKernel( + parBoxSource, + parBoxTarget, + particleCreator, + guardSuperCells); } - } - }; - - /** Convenient function to create a `CreateParticlesKernel` instance - * - * @tparam T_numWorkers number of workers - * - * @param parBoxSource particle box of the source species - * @param parBoxTarget particle box of the target species - * @param particleCreator particle creation functor - * @param guardSuperCells number of guard cells per dimension - * @return new `CreateParticlesKernel` instance - */ - template< - uint32_t T_numWorkers, - typename T_ParBoxSource, - typename T_ParBoxTarget, - typename T_ParticleCreator - > - CreateParticlesKernel< - T_numWorkers, - T_ParBoxSource, - T_ParBoxTarget, - T_ParticleCreator - > - make_CreateParticlesKernel( - T_ParBoxSource const & parBoxSource, - T_ParBoxTarget const & parBoxTarget, - T_ParticleCreator const & particleCreator, - DataSpace< simDim > const & guardSuperCells) - { - return CreateParticlesKernel< - T_numWorkers, - T_ParBoxSource, - T_ParBoxTarget, - T_ParticleCreator - >( - parBoxSource, - parBoxTarget, - particleCreator, - guardSuperCells - ); - } - -} // namespace creation -} // namespace particles + + } // namespace creation + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/EveryNthCellImpl.def b/include/picongpu/particles/densityProfiles/EveryNthCellImpl.def index ab3578f6fa..4d5c959b7c 100644 --- a/include/picongpu/particles/densityProfiles/EveryNthCellImpl.def +++ b/include/picongpu/particles/densityProfiles/EveryNthCellImpl.def @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -24,32 +24,29 @@ namespace picongpu { -namespace densityProfiles -{ - /** A density profile which only initializes each nth cell - * - * Useful to initialize probe particles or material dopings. The result is - * either 0 (no particle) or the full density. The result of this particular - * functor can be larger 1.0 with T_SkipCells::toRT().productOfComponents() - * in order to properly fulfill the density of a species via increased - * weighting. - * - * @tparam T_SkipCells The period for the number of cells to skip for each - * direction before initializing a particle. Signature - * of a pmacc::math::CT::UInt32 - */ - template< - typename T_SkipCells - > - struct EveryNthCellImpl + namespace densityProfiles { - // note: `sizeof(ANY_TYPE) != 0` defers the evaluation - PMACC_CASSERT_MSG_TYPE( - __Density_Profile_EveryNthCellImpl_expects_a_PMacc_math_CT_UInt32, - T_SkipCells, - false && sizeof( T_SkipCells ) != 0 - ); - }; + /** A density profile which only initializes each nth cell + * + * Useful to initialize probe particles or material dopings. The result is + * either 0 (no particle) or the full density. The result of this particular + * functor can be larger 1.0 with T_SkipCells::toRT().productOfComponents() + * in order to properly fulfill the density of a species via increased + * weighting. + * + * @tparam T_SkipCells The period for the number of cells to skip for each + * direction before initializing a particle. Signature + * of a pmacc::math::CT::UInt32 + */ + template + struct EveryNthCellImpl + { + // note: `sizeof(ANY_TYPE) != 0` defers the evaluation + PMACC_CASSERT_MSG_TYPE( + __Density_Profile_EveryNthCellImpl_expects_a_PMacc_math_CT_UInt32, + T_SkipCells, + false && sizeof(T_SkipCells) != 0); + }; -} // namespace densityProfiles + } // namespace densityProfiles } // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/EveryNthCellImpl.hpp b/include/picongpu/particles/densityProfiles/EveryNthCellImpl.hpp index d86e83d8c1..5c558414ca 100644 --- a/include/picongpu/particles/densityProfiles/EveryNthCellImpl.hpp +++ b/include/picongpu/particles/densityProfiles/EveryNthCellImpl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -27,64 +27,48 @@ namespace picongpu { -namespace densityProfiles -{ - template< - uint32_t ... Args - > - struct EveryNthCellImpl< - pmacc::math::CT::UInt32< - Args ... - > - > + namespace densityProfiles { - using OrgSkipCells = pmacc::math::CT::UInt32< Args ... >; - using SkipCells = typename pmacc::math::CT::shrinkTo< - OrgSkipCells, - simDim - >::type; - - template - struct apply + template + struct EveryNthCellImpl> { - using type = EveryNthCellImpl< OrgSkipCells >; - }; + using OrgSkipCells = pmacc::math::CT::UInt32; + using SkipCells = typename pmacc::math::CT::shrinkTo::type; - HINLINE - EveryNthCellImpl( uint32_t currentStep ) - { - } + template + struct apply + { + using type = EveryNthCellImpl; + }; - /** Calculate the normalized density - * - * @param totalCellOffset total offset including all slides [in cells] - */ - HDINLINE float_X - operator()( DataSpace< simDim > const & totalCellOffset ) - { - // modulo! - auto const isThisCellWithProbe( totalCellOffset % SkipCells::toRT() ); + HINLINE + EveryNthCellImpl(uint32_t currentStep) + { + } + + /** Calculate the normalized density + * + * @param totalCellOffset total offset including all slides [in cells] + */ + HDINLINE float_X operator()(DataSpace const& totalCellOffset) + { + // modulo! + auto const isThisCellWithProbe(totalCellOffset % SkipCells::toRT()); - // is this cell populated with a probe particle? - bool const isPopulated( - isThisCellWithProbe == DataSpace< simDim >::create( 0 ) - ); + // is this cell populated with a probe particle? + bool const isPopulated(isThisCellWithProbe == DataSpace::create(0)); - /* every how many (volumentric) cells do we set a particle: - * scale up weighting accordingly */ - float_X const weightingScaling( - precisionCast< float_X >( - SkipCells::toRT().productOfComponents() - ) - ); + /* every how many (volumentric) cells do we set a particle: + * scale up weighting accordingly */ + float_X const weightingScaling(precisionCast(SkipCells::toRT().productOfComponents())); - // fill only the selected cells - float_X result( 0.0 ); - if( isPopulated ) - result = weightingScaling; + // fill only the selected cells + float_X result(0.0); + if(isPopulated) + result = weightingScaling; - return result; - } - }; -} // namespace densityProfiles + return result; + } + }; + } // namespace densityProfiles } // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/FreeFormulaImpl.def b/include/picongpu/particles/densityProfiles/FreeFormulaImpl.def index 63a7ed5569..decdfd822c 100644 --- a/include/picongpu/particles/densityProfiles/FreeFormulaImpl.def +++ b/include/picongpu/particles/densityProfiles/FreeFormulaImpl.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,9 +22,9 @@ namespace picongpu { -namespace densityProfiles -{ - template - struct FreeFormulaImpl; -} -} + namespace densityProfiles + { + template + struct FreeFormulaImpl; + } +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/FreeFormulaImpl.hpp b/include/picongpu/particles/densityProfiles/FreeFormulaImpl.hpp index 40b7ebd112..6f1bf20022 100644 --- a/include/picongpu/particles/densityProfiles/FreeFormulaImpl.hpp +++ b/include/picongpu/particles/densityProfiles/FreeFormulaImpl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Richard Pausch, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Richard Pausch, Axel Huebl * * This file is part of PIConGPU. * @@ -26,38 +26,38 @@ namespace picongpu { -namespace densityProfiles -{ - template< typename T_UserFunctor > - struct FreeFormulaImpl : public particles::functor::User< T_UserFunctor > + namespace densityProfiles { - using UserFunctor = particles::functor::User< T_UserFunctor >; - - template< typename T_SpeciesType > - struct apply + template + struct FreeFormulaImpl : public particles::functor::User { - using type = FreeFormulaImpl< UserFunctor >; + using UserFunctor = particles::functor::User; + + template + struct apply + { + using type = FreeFormulaImpl; + }; + + HINLINE FreeFormulaImpl(uint32_t currentStep) : UserFunctor(currentStep) + { + } + + /** Calculate the normalized density + * + * @param totalCellOffset total offset including all slides [in cells] + */ + HDINLINE float_X operator()(DataSpace const& totalCellOffset) + { + float_64 const unitLength(UNIT_LENGTH); // workaround to use UNIT_LENGTH on device + float3_64 const cellSize_SI(precisionCast(cellSize) * unitLength); + // evaluate at cell center for a more accurate estimate for the cell + floatD_64 const totalCenterCellOffset + = precisionCast(totalCellOffset) + floatD_64::create(0.5); + floatD_64 const position_SI(totalCenterCellOffset * cellSize_SI.shrink()); + + return UserFunctor::operator()(position_SI, cellSize_SI); + } }; - - HINLINE FreeFormulaImpl( uint32_t currentStep ) : UserFunctor( currentStep ) - { - } - - /** Calculate the normalized density - * - * @param totalCellOffset total offset including all slides [in cells] - */ - HDINLINE float_X operator()( DataSpace< simDim > const & totalCellOffset ) - { - float_64 const unitLength( UNIT_LENGTH ); // workaround to use UNIT_LENGTH on device - float3_64 const cellSize_SI( precisionCast< float_64 >( cellSize ) * unitLength ); - floatD_64 const position_SI( precisionCast< float_64 >( totalCellOffset ) * cellSize_SI.shrink( ) ); - - return UserFunctor::operator()( - position_SI, - cellSize_SI - ); - } - }; -} // namespace densityProfiles + } // namespace densityProfiles } // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/FromHDF5Impl.def b/include/picongpu/particles/densityProfiles/FromHDF5Impl.def index b1b9801187..07888ee2b0 100644 --- a/include/picongpu/particles/densityProfiles/FromHDF5Impl.def +++ b/include/picongpu/particles/densityProfiles/FromHDF5Impl.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,9 +22,9 @@ namespace picongpu { -namespace densityProfiles -{ - template - struct FromHDF5Impl; -} -} + namespace densityProfiles + { + template + struct FromHDF5Impl; + } +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/FromHDF5Impl.hpp b/include/picongpu/particles/densityProfiles/FromHDF5Impl.hpp index 9795c33c32..eac4936874 100644 --- a/include/picongpu/particles/densityProfiles/FromHDF5Impl.hpp +++ b/include/picongpu/particles/densityProfiles/FromHDF5Impl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Rene Widera * * This file is part of PIConGPU. * @@ -33,211 +33,206 @@ namespace picongpu { -namespace densityProfiles -{ - -template -struct FromHDF5Impl : public T_ParamClass -{ - using ParamClass = T_ParamClass; - - template - struct apply - { - using type = FromHDF5Impl; - }; - - HINLINE FromHDF5Impl(uint32_t currentStep) - { - const uint32_t numSlides = MovingWindow::getInstance( ).getSlideCounter( currentStep ); - auto window = MovingWindow::getInstance().getWindow(currentStep); - loadHDF5(window); - const SubGrid& subGrid = Environment::get().SubGrid(); - DataSpace localCells = subGrid.getLocalDomain( ).size; - totalGpuOffset = subGrid.getLocalDomain( ).offset; - totalGpuOffset.y( ) += numSlides * localCells.y( ); - } - - /** Calculate the normalized density from HDF5 file - * - * @param totalCellOffset total offset including all slides [in cells] - */ - HDINLINE float_X operator()(const DataSpace& totalCellOffset) - { - const DataSpace localCellIdx(totalCellOffset - totalGpuOffset); - return precisionCast(deviceDataBox(localCellIdx + SuperCellSize::toRT() * GuardSize::toRT()).x()); - } - -private: - - void loadHDF5(Window &window) + namespace densityProfiles { - using namespace splash; - DataConnector &dc = Environment<>::get().DataConnector(); - - PMACC_CASSERT_MSG( - _please_allocate_at_least_one_FieldTmp_in_memory_param, - fieldTmpNumSlots > 0 - ); - auto fieldTmp = dc.get< FieldTmp >( FieldTmp::getUniqueId( 0 ), true ); - auto& fieldBuffer = fieldTmp->getGridBuffer(); - - deviceDataBox = fieldBuffer.getDeviceBuffer().getDataBox(); - - GridController &gc = Environment::get().GridController(); - const pmacc::Selection& localDomain = Environment::get().SubGrid().getLocalDomain(); - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(0); - const uint32_t maxOpenFilesPerNode = 1; - - /* get a new ParallelDomainCollector for our MPI rank only*/ - ParallelDomainCollector pdc( - MPI_COMM_SELF, - gc.getCommunicator().getMPIInfo(), - Dimensions(1, 1, 1), - maxOpenFilesPerNode); - - try + template + struct FromHDF5Impl : public T_ParamClass { - /* setup ParallelDomainCollector pdc to read the density information from hdf5 */ - DataCollector::FileCreationAttr attr; - DataCollector::initFileCreationAttr(attr); - attr.fileAccType = DataCollector::FAT_READ; + using ParamClass = T_ParamClass; - pdc.open(ParamClass::filename, attr); - - /* set which part of the hdf5 file our MPI rank reads */ - DataSpace globalSlideOffset; - globalSlideOffset.y() = numSlides * localDomain.size.y(); - - Dimensions domainOffset(0, 0, 0); - for (uint32_t d = 0; d < simDim; ++d) - domainOffset[d] = localDomain.offset[d] + globalSlideOffset[d]; - - if (gc.getPosition().y() == 0) - domainOffset[1] += window.globalDimensions.offset.y(); - - DataSpace localDomainSize = localDomain.size; - Dimensions domainSize(1, 1, 1); - for (uint32_t d = 0; d < simDim; ++d) - domainSize[d] = localDomainSize[d]; - - /* clear host buffer with default value */ - fieldBuffer.getHostBuffer().setValue(float1_X(ParamClass::defaultDensity)); - - /* get dimensions and offsets (collective call) */ - Domain fileDomain = pdc.getGlobalDomain(ParamClass::iteration, ParamClass::datasetName); - Dimensions fileDomainEnd = fileDomain.getOffset() + fileDomain.getSize(); - DataSpace accessSpace; - DataSpace accessOffset; - - Dimensions fileAccessSpace(1, 1, 1); - Dimensions fileAccessOffset(0, 0, 0); - - /* For each dimension, compute how file domain and local simulation domain overlap - * and which sizes and offsets are required for loading data from the file. - **/ - for (uint32_t d = 0; d < simDim; ++d) + template + struct apply { - /* file domain in/in-after sim domain */ - if (fileDomain.getOffset()[d] >= domainOffset[d] && - fileDomain.getOffset()[d] <= domainOffset[d] + domainSize[d]) - { - accessSpace[d] = std::min(domainOffset[d] + domainSize[d] - fileDomain.getOffset()[d], - fileDomain.getSize()[d]); - fileAccessSpace[d] = accessSpace[d]; + using type = FromHDF5Impl; + }; - accessOffset[d] = fileDomain.getOffset()[d] - domainOffset[d]; - fileAccessOffset[d] = 0; - continue; - } + HINLINE FromHDF5Impl(uint32_t currentStep) + { + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + auto window = MovingWindow::getInstance().getWindow(currentStep); + loadHDF5(window); + const SubGrid& subGrid = Environment::get().SubGrid(); + DataSpace localCells = subGrid.getLocalDomain().size; + totalGpuOffset = subGrid.getLocalDomain().offset; + totalGpuOffset.y() += numSlides * localCells.y(); + } - /* file domain before-in sim domain */ - if (fileDomainEnd[d] >= domainOffset[d] && - fileDomainEnd[d] <= domainOffset[d] + domainSize[d]) - { - accessSpace[d] = fileDomainEnd[d] - domainOffset[d]; - fileAccessSpace[d] = accessSpace[d]; + /** Calculate the normalized density from HDF5 file + * + * @param totalCellOffset total offset including all slides [in cells] + */ + HDINLINE float_X operator()(const DataSpace& totalCellOffset) + { + const DataSpace localCellIdx(totalCellOffset - totalGpuOffset); + return precisionCast( + deviceDataBox(localCellIdx + SuperCellSize::toRT() * GuardSize::toRT()).x()); + } - accessOffset[d] = 0; - fileAccessOffset[d] = domainOffset[d] - fileDomain.getOffset()[d]; - continue; - } + private: + void loadHDF5(Window& window) + { + using namespace splash; + DataConnector& dc = Environment<>::get().DataConnector(); - /* sim domain in file domain */ - if (domainOffset[d] >= fileDomain.getOffset()[d] && - domainOffset[d] + domainSize[d] <= fileDomainEnd[d]) - { - accessSpace[d] = domainSize[d]; - fileAccessSpace[d] = accessSpace[d]; + PMACC_CASSERT_MSG(_please_allocate_at_least_one_FieldTmp_in_memory_param, fieldTmpNumSlots > 0); + auto fieldTmp = dc.get(FieldTmp::getUniqueId(0), true); + auto& fieldBuffer = fieldTmp->getGridBuffer(); - accessOffset[d] = 0; - fileAccessOffset[d] = domainOffset[d] - fileDomain.getOffset()[d]; - continue; - } + deviceDataBox = fieldBuffer.getDeviceBuffer().getDataBox(); - /* file domain and sim domain do not intersect, do not load anything */ - accessSpace[d] = 0; - break; - } + GridController& gc = Environment::get().GridController(); + const pmacc::Selection localDomain = Environment::get().SubGrid().getLocalDomain(); + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(0); + const uint32_t maxOpenFilesPerNode = 1; - /* allocate temporary buffer for hdf5 data */ - using ValueType = typename FieldTmp::ValueType::type; - ValueType *tmpBfr = nullptr; + /* get a new ParallelDomainCollector for our MPI rank only*/ + ParallelDomainCollector pdc( + MPI_COMM_SELF, + gc.getCommunicator().getMPIInfo(), + Dimensions(1, 1, 1), + maxOpenFilesPerNode); - size_t accessSize = accessSpace.productOfComponents(); - if (accessSize > 0) - { - tmpBfr = new ValueType[accessSize]; - - Dimensions sizeRead(0, 0, 0); - pdc.read( - ParamClass::iteration, - fileAccessSpace, - fileAccessOffset, - ParamClass::datasetName, - sizeRead, - tmpBfr); - - if (sizeRead.getScalarSize() != accessSize) + try { - __delete(tmpBfr); - return; + /* setup ParallelDomainCollector pdc to read the density information from hdf5 */ + DataCollector::FileCreationAttr attr; + DataCollector::initFileCreationAttr(attr); + attr.fileAccType = DataCollector::FAT_READ; + + pdc.open(ParamClass::filename, attr); + + /* set which part of the hdf5 file our MPI rank reads */ + DataSpace globalSlideOffset; + globalSlideOffset.y() = numSlides * localDomain.size.y(); + + Dimensions domainOffset(0, 0, 0); + for(uint32_t d = 0; d < simDim; ++d) + domainOffset[d] = localDomain.offset[d] + globalSlideOffset[d]; + + if(gc.getPosition().y() == 0) + domainOffset[1] += window.globalDimensions.offset.y(); + + DataSpace localDomainSize = localDomain.size; + Dimensions domainSize(1, 1, 1); + for(uint32_t d = 0; d < simDim; ++d) + domainSize[d] = localDomainSize[d]; + + /* clear host buffer with default value */ + fieldBuffer.getHostBuffer().setValue(float1_X(ParamClass::defaultDensity)); + + /* get dimensions and offsets (collective call) */ + Domain fileDomain = pdc.getGlobalDomain(ParamClass::iteration, ParamClass::datasetName); + Dimensions fileDomainEnd = fileDomain.getOffset() + fileDomain.getSize(); + DataSpace accessSpace; + DataSpace accessOffset; + + Dimensions fileAccessSpace(1, 1, 1); + Dimensions fileAccessOffset(0, 0, 0); + + /* For each dimension, compute how file domain and local simulation domain overlap + * and which sizes and offsets are required for loading data from the file. + **/ + for(uint32_t d = 0; d < simDim; ++d) + { + /* file domain in/in-after sim domain */ + if(fileDomain.getOffset()[d] >= domainOffset[d] + && fileDomain.getOffset()[d] <= domainOffset[d] + domainSize[d]) + { + accessSpace[d] = std::min( + domainOffset[d] + domainSize[d] - fileDomain.getOffset()[d], + fileDomain.getSize()[d]); + fileAccessSpace[d] = accessSpace[d]; + + accessOffset[d] = fileDomain.getOffset()[d] - domainOffset[d]; + fileAccessOffset[d] = 0; + continue; + } + + /* file domain before-in sim domain */ + if(fileDomainEnd[d] >= domainOffset[d] && fileDomainEnd[d] <= domainOffset[d] + domainSize[d]) + { + accessSpace[d] = fileDomainEnd[d] - domainOffset[d]; + fileAccessSpace[d] = accessSpace[d]; + + accessOffset[d] = 0; + fileAccessOffset[d] = domainOffset[d] - fileDomain.getOffset()[d]; + continue; + } + + /* sim domain in file domain */ + if(domainOffset[d] >= fileDomain.getOffset()[d] + && domainOffset[d] + domainSize[d] <= fileDomainEnd[d]) + { + accessSpace[d] = domainSize[d]; + fileAccessSpace[d] = accessSpace[d]; + + accessOffset[d] = 0; + fileAccessOffset[d] = domainOffset[d] - fileDomain.getOffset()[d]; + continue; + } + + /* file domain and sim domain do not intersect, do not load anything */ + accessSpace[d] = 0; + break; + } + + /* allocate temporary buffer for hdf5 data */ + using ValueType = typename FieldTmp::ValueType::type; + ValueType* tmpBfr = nullptr; + + size_t accessSize = accessSpace.productOfComponents(); + if(accessSize > 0) + { + tmpBfr = new ValueType[accessSize]; + + Dimensions sizeRead(0, 0, 0); + pdc.read( + ParamClass::iteration, + fileAccessSpace, + fileAccessOffset, + ParamClass::datasetName, + sizeRead, + tmpBfr); + + if(sizeRead.getScalarSize() != accessSize) + { + __delete(tmpBfr); + return; + } + + /* get the databox of the host buffer */ + auto dataBox = fieldBuffer.getHostBuffer().getDataBox(); + /* get a 1D access object to the databox */ + using D1Box = DataBoxDim1Access; + DataSpace guards = fieldBuffer.getGridLayout().getGuard(); + D1Box d1RAccess(dataBox.shift(guards + accessOffset), accessSpace); + + /* copy from temporary buffer to fieldTmp host buffer */ + for(int i = 0; i < accessSpace.productOfComponents(); ++i) + { + d1RAccess[i].x() = tmpBfr[i]; + } + + __delete(tmpBfr); + } + + pdc.close(); + + /* copy host data to the device */ + fieldBuffer.hostToDevice(); + __getTransactionEvent().waitForFinished(); } - - /* get the databox of the host buffer */ - auto dataBox = fieldBuffer.getHostBuffer().getDataBox(); - /* get a 1D access object to the databox */ - using D1Box = DataBoxDim1Access< typename FieldTmp::DataBoxType >; - DataSpace guards = fieldBuffer.getGridLayout().getGuard(); - D1Box d1RAccess(dataBox.shift(guards + accessOffset), accessSpace); - - /* copy from temporary buffer to fieldTmp host buffer */ - for (int i = 0; i < accessSpace.productOfComponents(); ++i) + catch(const DCException& e) { - d1RAccess[i].x() = tmpBfr[i]; + std::cerr << e.what() << std::endl; + return; } - __delete(tmpBfr); + return; } - pdc.close(); - - /* copy host data to the device */ - fieldBuffer.hostToDevice(); - __getTransactionEvent().waitForFinished(); - - } - catch (const DCException& e) - { - std::cerr << e.what() << std::endl; - return; - } - - return; - } - - PMACC_ALIGN(deviceDataBox,FieldTmp::DataBoxType); - PMACC_ALIGN(totalGpuOffset,DataSpace); -}; -} -} + PMACC_ALIGN(deviceDataBox, FieldTmp::DataBoxType); + PMACC_ALIGN(totalGpuOffset, DataSpace); + }; + } // namespace densityProfiles +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/GaussianCloudImpl.def b/include/picongpu/particles/densityProfiles/GaussianCloudImpl.def index d03178f90e..822af5e1fe 100644 --- a/include/picongpu/particles/densityProfiles/GaussianCloudImpl.def +++ b/include/picongpu/particles/densityProfiles/GaussianCloudImpl.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,9 +22,9 @@ namespace picongpu { -namespace densityProfiles -{ - template - struct GaussianCloudImpl; -} -} + namespace densityProfiles + { + template + struct GaussianCloudImpl; + } +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/GaussianCloudImpl.hpp b/include/picongpu/particles/densityProfiles/GaussianCloudImpl.hpp index 591879015a..9bbfde77ab 100644 --- a/include/picongpu/particles/densityProfiles/GaussianCloudImpl.hpp +++ b/include/picongpu/particles/densityProfiles/GaussianCloudImpl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -25,54 +25,51 @@ namespace picongpu { -namespace densityProfiles -{ - -template -struct GaussianCloudImpl : public T_ParamClass -{ - using ParamClass = T_ParamClass; - - template - struct apply + namespace densityProfiles { - using type = GaussianCloudImpl; - }; + template + struct GaussianCloudImpl : public T_ParamClass + { + using ParamClass = T_ParamClass; - HINLINE GaussianCloudImpl(uint32_t currentStep) - { - } + template + struct apply + { + using type = GaussianCloudImpl; + }; - /** Calculate the normalized density - * - * @param totalCellOffset total offset including all slides [in cells] - */ - HDINLINE float_X operator()(const DataSpace& totalCellOffset) - { - const float_64 unit_length = UNIT_LENGTH; - const float_X vacuum_y = float_X(ParamClass::vacuumCellsY) * cellSize.y(); - const floatD_X center = precisionCast(ParamClass::center_SI / unit_length); - const floatD_X sigma = precisionCast(ParamClass::sigma_SI / unit_length); + HINLINE GaussianCloudImpl(uint32_t currentStep) + { + } + + /** Calculate the normalized density + * + * @param totalCellOffset total offset including all slides [in cells] + */ + HDINLINE float_X operator()(const DataSpace& totalCellOffset) + { + const float_64 unit_length = UNIT_LENGTH; + const float_X vacuum_y = float_X(ParamClass::vacuumCellsY) * cellSize.y(); + const floatD_X center = precisionCast(ParamClass::center_SI / unit_length); + const floatD_X sigma = precisionCast(ParamClass::sigma_SI / unit_length); - const floatD_X globalCellPos( - precisionCast(totalCellOffset) * - cellSize.shrink() - ); + const floatD_X globalCellPos(precisionCast(totalCellOffset) * cellSize.shrink()); - if (globalCellPos.y() < vacuum_y) return float_X(0.0); + if(globalCellPos.y() < vacuum_y) + return float_X(0.0); - /* for x, y, z calculate: x-x0 / sigma_x */ - const floatD_X r0overSigma = (globalCellPos - center) / sigma; - /* get lenghts of r0 over sigma */ - const float_X exponent = math::abs(r0overSigma); + /* for x, y, z calculate: x-x0 / sigma_x */ + const floatD_X r0overSigma = (globalCellPos - center) / sigma; + /* get lenghts of r0 over sigma */ + const float_X exponent = math::abs(r0overSigma); - /* calculate exp(factor * exponent**power) */ - const float_X power = ParamClass::gasPower; - const float_X factor = ParamClass::gasFactor; - const float_X density = math::exp(factor * math::pow(exponent, power)); + /* calculate exp(factor * exponent**power) */ + const float_X power = ParamClass::gasPower; + const float_X factor = ParamClass::gasFactor; + const float_X density = math::exp(factor * math::pow(exponent, power)); - return density; - } -}; -} -} + return density; + } + }; + } // namespace densityProfiles +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/GaussianImpl.def b/include/picongpu/particles/densityProfiles/GaussianImpl.def index 4e9f340828..eea88488e8 100644 --- a/include/picongpu/particles/densityProfiles/GaussianImpl.def +++ b/include/picongpu/particles/densityProfiles/GaussianImpl.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,9 +22,9 @@ namespace picongpu { -namespace densityProfiles -{ - template - struct GaussianImpl; -} -} + namespace densityProfiles + { + template + struct GaussianImpl; + } +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/GaussianImpl.hpp b/include/picongpu/particles/densityProfiles/GaussianImpl.hpp index ddbdc141ef..c9c6799b4e 100644 --- a/include/picongpu/particles/densityProfiles/GaussianImpl.hpp +++ b/include/picongpu/particles/densityProfiles/GaussianImpl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -25,60 +25,56 @@ namespace picongpu { -namespace densityProfiles -{ - -template -struct GaussianImpl : public T_ParamClass -{ - using ParamClass = T_ParamClass; - - template - struct apply + namespace densityProfiles { - using type = GaussianImpl; - }; + template + struct GaussianImpl : public T_ParamClass + { + using ParamClass = T_ParamClass; - HINLINE GaussianImpl(uint32_t currentStep) - { - } + template + struct apply + { + using type = GaussianImpl; + }; - /** Calculate the normalized density - * - * @param totalCellOffset total offset including all slides [in cells] - */ - HDINLINE float_X operator()(const DataSpace& totalCellOffset) - { - const float_X vacuum_y = float_X(ParamClass::vacuumCellsY) * cellSize.y(); - const float_X gas_center_left = ParamClass::gasCenterLeft_SI / UNIT_LENGTH; - const float_X gas_center_right = ParamClass::gasCenterRight_SI / UNIT_LENGTH; - const float_X gas_sigma_left = ParamClass::gasSigmaLeft_SI / UNIT_LENGTH; - const float_X gas_sigma_right = ParamClass::gasSigmaRight_SI / UNIT_LENGTH; + HINLINE GaussianImpl(uint32_t currentStep) + { + } - const floatD_X globalCellPos( - precisionCast(totalCellOffset) * - cellSize.shrink() - ); + /** Calculate the normalized density + * + * @param totalCellOffset total offset including all slides [in cells] + */ + HDINLINE float_X operator()(const DataSpace& totalCellOffset) + { + const float_X vacuum_y = float_X(ParamClass::vacuumCellsY) * cellSize.y(); + const float_X gas_center_left = ParamClass::gasCenterLeft_SI / UNIT_LENGTH; + const float_X gas_center_right = ParamClass::gasCenterRight_SI / UNIT_LENGTH; + const float_X gas_sigma_left = ParamClass::gasSigmaLeft_SI / UNIT_LENGTH; + const float_X gas_sigma_right = ParamClass::gasSigmaRight_SI / UNIT_LENGTH; - if (globalCellPos.y() * cellSize.y() < vacuum_y) - { - return float_X(0.0); - } + const floatD_X globalCellPos(precisionCast(totalCellOffset) * cellSize.shrink()); - float_X exponent = float_X(0.0); - if (globalCellPos.y() < gas_center_left) - { - exponent = math::abs((globalCellPos.y() - gas_center_left) / gas_sigma_left); - } - else if (globalCellPos.y() >= gas_center_right) - { - exponent = math::abs((globalCellPos.y() - gas_center_right) / gas_sigma_right); - } + if(globalCellPos.y() * cellSize.y() < vacuum_y) + { + return float_X(0.0); + } + + float_X exponent = float_X(0.0); + if(globalCellPos.y() < gas_center_left) + { + exponent = math::abs((globalCellPos.y() - gas_center_left) / gas_sigma_left); + } + else if(globalCellPos.y() >= gas_center_right) + { + exponent = math::abs((globalCellPos.y() - gas_center_right) / gas_sigma_right); + } - const float_X gas_power = ParamClass::gasPower; - const float_X density = math::exp(float_X(ParamClass::gasFactor) * math::pow(exponent, gas_power)); - return density; - } -}; -} -} + const float_X gas_power = ParamClass::gasPower; + const float_X density = math::exp(float_X(ParamClass::gasFactor) * math::pow(exponent, gas_power)); + return density; + } + }; + } // namespace densityProfiles +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/HomogenousImpl.def b/include/picongpu/particles/densityProfiles/HomogenousImpl.def index 4373124129..266a6d4609 100644 --- a/include/picongpu/particles/densityProfiles/HomogenousImpl.def +++ b/include/picongpu/particles/densityProfiles/HomogenousImpl.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,8 +22,8 @@ namespace picongpu { -namespace densityProfiles -{ - struct HomogenousImpl; -} -} + namespace densityProfiles + { + struct HomogenousImpl; + } +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/HomogenousImpl.hpp b/include/picongpu/particles/densityProfiles/HomogenousImpl.hpp index 58d86cca33..cdfdc95db0 100644 --- a/include/picongpu/particles/densityProfiles/HomogenousImpl.hpp +++ b/include/picongpu/particles/densityProfiles/HomogenousImpl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -24,30 +24,29 @@ namespace picongpu { -namespace densityProfiles -{ - -struct HomogenousImpl -{ - template - struct apply + namespace densityProfiles { - using type = HomogenousImpl; - }; + struct HomogenousImpl + { + template + struct apply + { + using type = HomogenousImpl; + }; - HINLINE HomogenousImpl(uint32_t currentStep) - { - } + HINLINE HomogenousImpl(uint32_t currentStep) + { + } - /** Calculate the normalized density - * - * @param totalCellOffset total offset including all slides [in cells] - * @return float_X always 1.0 - */ - HDINLINE float_X operator()(const DataSpace& totalCellOffset) - { - return float_X(1.0); - } -}; -} -} + /** Calculate the normalized density + * + * @param totalCellOffset total offset including all slides [in cells] + * @return float_X always 1.0 + */ + HDINLINE float_X operator()(const DataSpace& totalCellOffset) + { + return float_X(1.0); + } + }; + } // namespace densityProfiles +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/IProfile.def b/include/picongpu/particles/densityProfiles/IProfile.def index bca4e955fd..29c540cf27 100644 --- a/include/picongpu/particles/densityProfiles/IProfile.def +++ b/include/picongpu/particles/densityProfiles/IProfile.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,9 +22,9 @@ namespace picongpu { -namespace densityProfiles -{ - template - struct IProfile; -} -} + namespace densityProfiles + { + template + struct IProfile; + } +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/IProfile.hpp b/include/picongpu/particles/densityProfiles/IProfile.hpp index 3d6665eee1..865ec74969 100644 --- a/include/picongpu/particles/densityProfiles/IProfile.hpp +++ b/include/picongpu/particles/densityProfiles/IProfile.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -22,26 +22,47 @@ #include "picongpu/simulation_defines.hpp" #include "picongpu/particles/densityProfiles/IProfile.def" +#include -namespace picongpu -{ -namespace densityProfiles -{ -template -struct IProfile : private T_Base +namespace picongpu { - - using Base = T_Base; - - HINLINE IProfile(uint32_t currentStep) : Base(currentStep) + namespace densityProfiles { - } + /** Wrapper around a given density profile functor + * + * Defines density profile "concept" interface and compile-time checks that + * the given profile type is compatible to it + * + * @tparam T_Profile wrapped density profile functor type + */ + template + struct IProfile : private T_Profile + { + /** Create a profile functor for the given time iteration + * + * @param currentStep current time iteration + */ + HINLINE IProfile(uint32_t const currentStep) : T_Profile(currentStep) + { + } - HDINLINE float_X operator()(const DataSpace& totalCellOffset) - { - return Base::operator()(totalCellOffset); - } -}; -} -} + /** Calculate physical particle density value for the given cell + * + * It concerns real (physical, not macro-) particles. + * The result is in units of BASE_DENSITY times PIC units of volume**-3. + * + * The density is assumed constant inside a cell, so the underlying + * functor should preferably return a value in the cell center. + * + * @param totalCellOffset total offset from the start of the global + * simulation area, including all slides [in cells] + */ + HDINLINE float_X operator()(pmacc::DataSpace const& totalCellOffset) + { + return T_Profile::operator()(totalCellOffset); + } + }; + + } // namespace densityProfiles +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/LinearExponentialImpl.def b/include/picongpu/particles/densityProfiles/LinearExponentialImpl.def index 6cd80440b6..beb3baf659 100644 --- a/include/picongpu/particles/densityProfiles/LinearExponentialImpl.def +++ b/include/picongpu/particles/densityProfiles/LinearExponentialImpl.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,9 +22,9 @@ namespace picongpu { -namespace densityProfiles -{ - template - struct LinearExponentialImpl; -} -} + namespace densityProfiles + { + template + struct LinearExponentialImpl; + } +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/LinearExponentialImpl.hpp b/include/picongpu/particles/densityProfiles/LinearExponentialImpl.hpp index a341a8367f..5a31c4a7d5 100644 --- a/include/picongpu/particles/densityProfiles/LinearExponentialImpl.hpp +++ b/include/picongpu/particles/densityProfiles/LinearExponentialImpl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -24,55 +24,51 @@ namespace picongpu { -namespace densityProfiles -{ - -template -struct LinearExponentialImpl : public T_ParamClass -{ - using ParamClass = T_ParamClass; - - template - struct apply + namespace densityProfiles { - using type = LinearExponentialImpl; - }; + template + struct LinearExponentialImpl : public T_ParamClass + { + using ParamClass = T_ParamClass; - HINLINE LinearExponentialImpl(uint32_t currentStep) - { + template + struct apply + { + using type = LinearExponentialImpl; + }; - } + HINLINE LinearExponentialImpl(uint32_t currentStep) + { + } - /* Calculate the normalized density - * - * @param totalCellOffset total offset including all slides [in cells] - */ - HDINLINE float_X operator()(const DataSpace& totalCellOffset) - { - const float_X vacuum_y = float_X(ParamClass::vacuumCellsY) * cellSize.y(); - const float_X gas_a = ParamClass::gasA_SI * UNIT_LENGTH; - const float_X gas_d = ParamClass::gasD_SI * UNIT_LENGTH; - const float_X gas_y_max = ParamClass::gasYMax_SI / UNIT_LENGTH; + /* Calculate the normalized density + * + * @param totalCellOffset total offset including all slides [in cells] + */ + HDINLINE float_X operator()(const DataSpace& totalCellOffset) + { + const float_X vacuum_y = float_X(ParamClass::vacuumCellsY) * cellSize.y(); + const float_X gas_a = ParamClass::gasA_SI * UNIT_LENGTH; + const float_X gas_d = ParamClass::gasD_SI * UNIT_LENGTH; + const float_X gas_y_max = ParamClass::gasYMax_SI / UNIT_LENGTH; - const floatD_X globalCellPos( - precisionCast(totalCellOffset) * - cellSize.shrink() - ); - float_X density = float_X(0.0); + const floatD_X globalCellPos(precisionCast(totalCellOffset) * cellSize.shrink()); + float_X density = float_X(0.0); - if (globalCellPos.y() < vacuum_y) return density; + if(globalCellPos.y() < vacuum_y) + return density; - if (globalCellPos.y() <= gas_y_max) // linear slope - density = gas_a * globalCellPos.y() + ParamClass::gasB; - else // exponential slope - density = math::exp((globalCellPos.y() - gas_y_max) * gas_d); + if(globalCellPos.y() <= gas_y_max) // linear slope + density = gas_a * globalCellPos.y() + ParamClass::gasB; + else // exponential slope + density = math::exp((globalCellPos.y() - gas_y_max) * gas_d); - // avoid < 0 densities for the linear slope - if (density < float_X(0.0)) - density = float_X(0.0); + // avoid < 0 densities for the linear slope + if(density < float_X(0.0)) + density = float_X(0.0); - return density; - } -}; -} -} + return density; + } + }; + } // namespace densityProfiles +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/SphereFlanksImpl.def b/include/picongpu/particles/densityProfiles/SphereFlanksImpl.def index c9e0c0ff8e..7325dcc398 100644 --- a/include/picongpu/particles/densityProfiles/SphereFlanksImpl.def +++ b/include/picongpu/particles/densityProfiles/SphereFlanksImpl.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,9 +22,9 @@ namespace picongpu { -namespace densityProfiles -{ - template - struct SphereFlanksImpl; -} -} + namespace densityProfiles + { + template + struct SphereFlanksImpl; + } +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/SphereFlanksImpl.hpp b/include/picongpu/particles/densityProfiles/SphereFlanksImpl.hpp index 1a1e7f41c3..ea53395ad3 100644 --- a/include/picongpu/particles/densityProfiles/SphereFlanksImpl.hpp +++ b/include/picongpu/particles/densityProfiles/SphereFlanksImpl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -24,61 +24,57 @@ namespace picongpu { - -namespace densityProfiles -{ - -template -struct SphereFlanksImpl : public T_ParamClass -{ - using ParamClass = T_ParamClass; - - template - struct apply + namespace densityProfiles { - using type = SphereFlanksImpl; - }; + template + struct SphereFlanksImpl : public T_ParamClass + { + using ParamClass = T_ParamClass; - HINLINE SphereFlanksImpl(uint32_t currentStep) - { - } + template + struct apply + { + using type = SphereFlanksImpl; + }; - /** Calculate the normalized density - * - * @param totalCellOffset total offset including all slides [in cells] - */ - HDINLINE float_X operator()(const DataSpace& totalCellOffset) - { - const float_64 unit_length = UNIT_LENGTH; - const float_X vacuum_y = float_X(ParamClass::vacuumCellsY) * cellSize.y(); - const floatD_X center = precisionCast(ParamClass::center_SI / unit_length); - const float_X r = ParamClass::r_SI / unit_length; - const float_X ri = ParamClass::ri_SI / unit_length; - const float_X exponent = ParamClass::exponent_SI * unit_length; + HINLINE SphereFlanksImpl(uint32_t currentStep) + { + } + + /** Calculate the normalized density + * + * @param totalCellOffset total offset including all slides [in cells] + */ + HDINLINE float_X operator()(const DataSpace& totalCellOffset) + { + const float_64 unit_length = UNIT_LENGTH; + const float_X vacuum_y = float_X(ParamClass::vacuumCellsY) * cellSize.y(); + const floatD_X center = precisionCast(ParamClass::center_SI / unit_length); + const float_X r = ParamClass::r_SI / unit_length; + const float_X ri = ParamClass::ri_SI / unit_length; + const float_X exponent = ParamClass::exponent_SI * unit_length; - const floatD_X globalCellPos( - precisionCast(totalCellOffset) * - cellSize.shrink() - ); + const floatD_X globalCellPos(precisionCast(totalCellOffset) * cellSize.shrink()); - if (globalCellPos.y() < vacuum_y) return float_X(0.0); + if(globalCellPos.y() < vacuum_y) + return float_X(0.0); - const float_X distance = math::abs(globalCellPos - center); + const float_X distance = math::abs(globalCellPos - center); - /* "shell": inner radius */ - if (distance < ri) - return float_X(0.0); - /* "hard core" */ - else if (distance <= r) - return float_X(1.0); + /* "shell": inner radius */ + if(distance < ri) + return float_X(0.0); + /* "hard core" */ + else if(distance <= r) + return float_X(1.0); - /* "soft exp. flanks" - * note: by definition (return, see above) the - * argument [ r - distance ] will be element of (-inf, 0) */ - else - return math::exp((r - distance) * exponent); - } -}; -} -} + /* "soft exp. flanks" + * note: by definition (return, see above) the + * argument [ r - distance ] will be element of (-inf, 0) */ + else + return math::exp((r - distance) * exponent); + } + }; + } // namespace densityProfiles +} // namespace picongpu diff --git a/include/picongpu/particles/densityProfiles/profiles.def b/include/picongpu/particles/densityProfiles/profiles.def index 1160d49b2d..f59dfe7778 100644 --- a/include/picongpu/particles/densityProfiles/profiles.def +++ b/include/picongpu/particles/densityProfiles/profiles.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Alexander Grund +/* Copyright 2014-2021 Rene Widera, Alexander Grund * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/densityProfiles/profiles.hpp b/include/picongpu/particles/densityProfiles/profiles.hpp index f71c178cab..084b5f857d 100644 --- a/include/picongpu/particles/densityProfiles/profiles.hpp +++ b/include/picongpu/particles/densityProfiles/profiles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -28,6 +28,6 @@ #include "picongpu/particles/densityProfiles/SphereFlanksImpl.hpp" #include "picongpu/particles/densityProfiles/EveryNthCellImpl.hpp" -#if( ENABLE_HDF5 == 1 ) +#if(ENABLE_HDF5 == 1) # include "picongpu/particles/densityProfiles/FromHDF5Impl.hpp" #endif diff --git a/include/picongpu/particles/filter/All.def b/include/picongpu/particles/filter/All.def index c638b2c17e..3f685026d4 100644 --- a/include/picongpu/particles/filter/All.def +++ b/include/picongpu/particles/filter/All.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,17 +22,16 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ - - /** check if a particle handle is valid - * - * the particle method `::isValidHandle()` is called. - */ - struct All; + namespace particles + { + namespace filter + { + /** check if a particle handle is valid + * + * the particle method `::isValidHandle()` is called. + */ + struct All; -} //namespace filter -} //namespace particles -} //namespace picongpu + } // namespace filter + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/particles/filter/All.hpp b/include/picongpu/particles/filter/All.hpp index 2893a61cb6..114d5ba302 100644 --- a/include/picongpu/particles/filter/All.hpp +++ b/include/picongpu/particles/filter/All.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -24,80 +24,60 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ - -namespace acc -{ - - //! check the particle handle - struct All + namespace particles { - - /** check particle handle - * - * @tparam T_Particle pmacc::Particles, type of the particle - * @tparam alpaka accelerator type - * - * @param alpaka accelerator - * @param particle particle which is checked - * @return true if particle handle is valid, else false - */ - template< - typename T_Particle, - typename T_Acc - > - HDINLINE bool operator()( - T_Acc const &, - T_Particle const & particle - ) + namespace filter { - return particle.isHandleValid( ); - } - }; + namespace acc + { + //! check the particle handle + struct All + { + /** check particle handle + * + * @tparam T_Particle pmacc::Particles, type of the particle + * @tparam alpaka accelerator type + * + * @param alpaka accelerator + * @param particle particle which is checked + * @return true if particle handle is valid, else false + */ + template + HDINLINE bool operator()(T_Acc const&, T_Particle const& particle) + { + return particle.isHandleValid(); + } + }; -} // namespace acc + } // namespace acc - struct All - { - template< typename T_SpeciesType > - struct apply - { - using type = All; - }; + struct All + { + template + struct apply + { + using type = All; + }; - /** create filter for the accelerator - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @param offset (in superCells, without any guards) relative - * to the origin of the local domain - * @param configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::All - operator( )( - T_Acc const & acc, - DataSpace< simDim > const &, - T_WorkerCfg const & - ) const - { - return acc::All{ }; + /** create filter for the accelerator + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @param offset (in superCells, without any guards) relative + * to the origin of the local domain + * @param configuration of the worker + */ + template + HDINLINE acc::All operator()(T_Acc const& acc, DataSpace const&, T_WorkerCfg const&) const + { + return acc::All{}; + } - } - - static - HINLINE std::string - getName( ) - { - return std::string("all"); - } - }; + static HINLINE std::string getName() + { + return std::string("all"); + } + }; -} //namespace filter -} //namespace particles -} //namespace picongpu + } // namespace filter + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/particles/filter/IUnary.def b/include/picongpu/particles/filter/IUnary.def index 4b7aac2595..9b5af034bb 100644 --- a/include/picongpu/particles/filter/IUnary.def +++ b/include/picongpu/particles/filter/IUnary.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,23 +27,17 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ - - /** interface for a unary particle filter - * - * @tparam T_UnaryFilter unary particle filter must contain `bool operator()(P && particle)` - */ - template< - typename T_UnaryFilter - > - using IUnary = pmacc::filter::Interface< - T_UnaryFilter, - 1u - >; + namespace particles + { + namespace filter + { + /** interface for a unary particle filter + * + * @tparam T_UnaryFilter unary particle filter must contain `bool operator()(P && particle)` + */ + template + using IUnary = pmacc::filter::Interface; -} // namespace filter -} // namespace particles + } // namespace filter + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/filter/RelativeGlobalDomainPosition.def b/include/picongpu/particles/filter/RelativeGlobalDomainPosition.def index c451c90c00..9a072af569 100644 --- a/include/picongpu/particles/filter/RelativeGlobalDomainPosition.def +++ b/include/picongpu/particles/filter/RelativeGlobalDomainPosition.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,39 +22,39 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ -namespace param -{ - struct RelativeGlobalDomainPosition + namespace particles { - /* lowerBound is included in the range*/ - static constexpr float_X lowerBound = 0.0; - /* upperBound is excluded in the range*/ - static constexpr float_X upperBound = 1.0; - /* dimension for the filter - * x = 0; y= 1; z = 2 - */ - static constexpr uint32_t dimension = 0; + namespace filter + { + namespace param + { + struct RelativeGlobalDomainPosition + { + /* lowerBound is included in the range*/ + static constexpr float_X lowerBound = 0.0; + /* upperBound is excluded in the range*/ + static constexpr float_X upperBound = 1.0; + /* dimension for the filter + * x = 0; y= 1; z = 2 + */ + static constexpr uint32_t dimension = 0; - // name of the filter - static constexpr char const * name = "relativeGlobalDomainPosition"; - }; -} // namespace param + // name of the filter + static constexpr char const* name = "relativeGlobalDomainPosition"; + }; + } // namespace param - /** filter particle dependent on the global position - * - * Check if a particle is within a relative area in one direction of the global - * domain. - * - * @tparam T_Params picongpu::particles::filter::param::RelativeGlobalDomainPosition, - * parameter to configure the functor - */ - template< typename T_Params = param::RelativeGlobalDomainPosition > - struct RelativeGlobalDomainPosition; + /** filter particle dependent on the global position + * + * Check if a particle is within a relative area in one direction of the global + * domain. + * + * @tparam T_Params picongpu::particles::filter::param::RelativeGlobalDomainPosition, + * parameter to configure the functor + */ + template + struct RelativeGlobalDomainPosition; -} //namespace filter -} //namespace particles -} //namespace picongpu + } // namespace filter + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/particles/filter/RelativeGlobalDomainPosition.hpp b/include/picongpu/particles/filter/RelativeGlobalDomainPosition.hpp index ecea8ba1fc..544d84783a 100644 --- a/include/picongpu/particles/filter/RelativeGlobalDomainPosition.hpp +++ b/include/picongpu/particles/filter/RelativeGlobalDomainPosition.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * * This file is part of PIConGPU. @@ -29,161 +29,132 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ - -namespace acc -{ - template< typename T_Params > - struct RelativeGlobalDomainPosition + namespace particles { - using Params = T_Params; - - HDINLINE RelativeGlobalDomainPosition( - DataSpace< simDim > const & localDomainOffset, - DataSpace< simDim > const & globalDomainSize, - DataSpace< simDim > const & localSuperCellOffset - ) : - m_localDomainOffset( localDomainOffset ), - m_globalDomainSize( globalDomainSize ), - m_localSuperCellOffset( localSuperCellOffset ) + namespace filter { - } - - template< - typename T_Acc, - typename T_Particle - > - HDINLINE bool operator()( - T_Acc const &, - T_Particle const & particle - ) - { - if( particle.isHandleValid( ) ) + namespace acc { - using SuperCellSize = typename T_Particle::SuperCellSize; - /* offset of the superCell (in cells, without any guards) to the origin of the global domain */ - DataSpace< simDim > globalSuperCellOffset = m_localDomainOffset + ( - m_localSuperCellOffset * - SuperCellSize::toRT( ) - ); - return isParticleInsideRange( particle, globalSuperCellOffset); - } - return false; - } - - private: - - /** check if a particle is located in the user defined range - * - * @tparam T_Particle type of the particle - * @param particle particle than needs to be checked - * @param globalSuperCellOffset offset of the superCell (in cells, without any guards) - * to the origin of the global domain - */ - template< typename T_Particle > - HDINLINE bool isParticleInsideRange( T_Particle const & particle, DataSpace< simDim > const & globalSuperCellOffset ) const - { - using SuperCellSize = typename T_Particle::SuperCellSize; - - int const particleCellIdx = particle[ localCellIdx_ ]; - DataSpace< simDim > const cellInSuperCell( DataSpaceOperations< simDim >:: - template map< SuperCellSize >( particleCellIdx ) ); - DataSpace< simDim > const globalParticleOffset( - globalSuperCellOffset + - cellInSuperCell - ); - - float_X const relativePosition = float_X( globalParticleOffset[ Params::dimension ] ) / - float_X( m_globalDomainSize[ Params::dimension ] ); - - return ( Params::lowerBound <= relativePosition && - relativePosition < Params::upperBound ); - } - - DataSpace< simDim > const m_localDomainOffset; - DataSpace< simDim > const m_globalDomainSize; - DataSpace< simDim > const m_localSuperCellOffset; - }; - -} // namespace acc - - template< typename T_Params > - struct RelativeGlobalDomainPosition - { - using Params = T_Params; - - template< typename T_SpeciesType > - struct apply - { - using type = RelativeGlobalDomainPosition; - }; - - HINLINE RelativeGlobalDomainPosition( ) - { - SubGrid< simDim > const & subGrid = Environment< simDim >::get( ).SubGrid( ); - globalDomainSize = subGrid.getGlobalDomain( ).size; - localDomainOffset = subGrid.getLocalDomain( ).offset; - } - - /** create filter for the accelerator - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @param localSupercellOffset offset (in superCells, without any guards) relative - * to the origin of the local domain - * @param configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::RelativeGlobalDomainPosition< Params > - operator( )( - T_Acc const & acc, - DataSpace< simDim > const & localSuperCellOffset, - T_WorkerCfg const & - ) const - { - return acc::RelativeGlobalDomainPosition< Params >( - localDomainOffset, - globalDomainSize, - localSuperCellOffset - ); - - } - - static - HINLINE std::string - getName( ) + template + struct RelativeGlobalDomainPosition + { + using Params = T_Params; + + HDINLINE RelativeGlobalDomainPosition( + DataSpace const& localDomainOffset, + DataSpace const& globalDomainSize, + DataSpace const& localSuperCellOffset) + : m_localDomainOffset(localDomainOffset) + , m_globalDomainSize(globalDomainSize) + , m_localSuperCellOffset(localSuperCellOffset) + { + } + + template + HDINLINE bool operator()(T_Acc const&, T_Particle const& particle) + { + if(particle.isHandleValid()) + { + using SuperCellSize = typename T_Particle::SuperCellSize; + /* offset of the superCell (in cells, without any guards) to the origin of the global + * domain */ + DataSpace globalSuperCellOffset + = m_localDomainOffset + (m_localSuperCellOffset * SuperCellSize::toRT()); + return isParticleInsideRange(particle, globalSuperCellOffset); + } + return false; + } + + private: + /** check if a particle is located in the user defined range + * + * @tparam T_Particle type of the particle + * @param particle particle than needs to be checked + * @param globalSuperCellOffset offset of the superCell (in cells, without any guards) + * to the origin of the global domain + */ + template + HDINLINE bool isParticleInsideRange( + T_Particle const& particle, + DataSpace const& globalSuperCellOffset) const + { + using SuperCellSize = typename T_Particle::SuperCellSize; + + int const particleCellIdx = particle[localCellIdx_]; + DataSpace const cellInSuperCell( + DataSpaceOperations::template map(particleCellIdx)); + DataSpace const globalParticleOffset(globalSuperCellOffset + cellInSuperCell); + + float_X const relativePosition = float_X(globalParticleOffset[Params::dimension]) + / float_X(m_globalDomainSize[Params::dimension]); + + return (Params::lowerBound <= relativePosition && relativePosition < Params::upperBound); + } + + DataSpace const m_localDomainOffset; + DataSpace const m_globalDomainSize; + DataSpace const m_localSuperCellOffset; + }; + + } // namespace acc + + template + struct RelativeGlobalDomainPosition + { + using Params = T_Params; + + template + struct apply + { + using type = RelativeGlobalDomainPosition; + }; + + HINLINE RelativeGlobalDomainPosition() + { + SubGrid const& subGrid = Environment::get().SubGrid(); + globalDomainSize = subGrid.getGlobalDomain().size; + localDomainOffset = subGrid.getLocalDomain().offset; + } + + /** create filter for the accelerator + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @param localSupercellOffset offset (in superCells, without any guards) relative + * to the origin of the local domain + * @param configuration of the worker + */ + template + HDINLINE acc::RelativeGlobalDomainPosition operator()( + T_Acc const& acc, + DataSpace const& localSuperCellOffset, + T_WorkerCfg const&) const + { + return acc::RelativeGlobalDomainPosition( + localDomainOffset, + globalDomainSize, + localSuperCellOffset); + } + + static HINLINE std::string getName() + { + // we provide the name from the param class + return T_Params::name; + } + + DataSpace localDomainOffset; + DataSpace globalDomainSize; + }; + + } // namespace filter + + namespace traits { - // we provide the name from the param class - return T_Params::name; - } - - DataSpace< simDim > localDomainOffset; - DataSpace< simDim > globalDomainSize; - }; - -} //namespace filter - -namespace traits -{ - template< - typename T_Species, - typename T_Params - > - struct SpeciesEligibleForSolver< - T_Species, - filter::RelativeGlobalDomainPosition< T_Params > - > - { - using type = typename pmacc::traits::HasIdentifiers< - typename T_Species::FrameType, - MakeSeq_t< localCellIdx > - >::type; - }; -} // namespace traits -} // namespace particles + template + struct SpeciesEligibleForSolver> + { + using type = typename pmacc::traits:: + HasIdentifiers>::type; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/filter/filter.def b/include/picongpu/particles/filter/filter.def index f304e066ea..dfe6820cc9 100644 --- a/include/picongpu/particles/filter/filter.def +++ b/include/picongpu/particles/filter/filter.def @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/filter/filter.hpp b/include/picongpu/particles/filter/filter.hpp index 47c8022daa..f80c643db3 100644 --- a/include/picongpu/particles/filter/filter.hpp +++ b/include/picongpu/particles/filter/filter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/filter/generic/Free.def b/include/picongpu/particles/filter/generic/Free.def index 4b33ffa07f..de2786d237 100644 --- a/include/picongpu/particles/filter/generic/Free.def +++ b/include/picongpu/particles/filter/generic/Free.def @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,44 +22,43 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ -namespace generic -{ - - /** call simple free user defined filter - * - * @tparam T_Functor user defined filter - * **optional**: can implement **one** host side constructor - * `T_Functor()` or `T_Functor(uint32_t currentTimeStep)` - * - * example for `particleFilters.param`: each particle with in-cell position greater than 0.5 - * @code{.cpp} - * - * struct FunctorEachParticleAboveMiddleOfTheCell - * { - * template< typename T_Particle > - * HDINLINE bool operator()( T_Particle const & particle ) - * { - * bool result = false; - * if( particle[ position_ ].y() >= float_X( 0.5 ) ) - * result = true; - * return result; - * } - * static constexpr char const * name = "eachParticleAboveMiddleOfTheCell"; - * }; - * - * using EachParticleAboveMiddleOfTheCell = generic::Free< - * FunctorEachParticleAboveMiddleOfTheCell - * >; - * @endcode - */ - template< typename T_Functor > - struct Free; + namespace particles + { + namespace filter + { + namespace generic + { + /** call simple free user defined filter + * + * @tparam T_Functor user defined filter + * **optional**: can implement **one** host side constructor + * `T_Functor()` or `T_Functor(uint32_t currentTimeStep)` + * + * example for `particleFilters.param`: each particle with in-cell position greater than 0.5 + * @code{.cpp} + * + * struct FunctorEachParticleAboveMiddleOfTheCell + * { + * template< typename T_Particle > + * HDINLINE bool operator()( T_Particle const & particle ) + * { + * bool result = false; + * if( particle[ position_ ].y() >= float_X( 0.5 ) ) + * result = true; + * return result; + * } + * static constexpr char const * name = "eachParticleAboveMiddleOfTheCell"; + * }; + * + * using EachParticleAboveMiddleOfTheCell = generic::Free< + * FunctorEachParticleAboveMiddleOfTheCell + * >; + * @endcode + */ + template + struct Free; -} // namespace generic -} // namespace filter -} // namespace particles + } // namespace generic + } // namespace filter + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/filter/generic/Free.hpp b/include/picongpu/particles/filter/generic/Free.hpp index 9db8566a50..9065b1c9d8 100644 --- a/include/picongpu/particles/filter/generic/Free.hpp +++ b/include/picongpu/particles/filter/generic/Free.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -28,109 +28,89 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ -namespace generic -{ -namespace acc -{ - /** wrapper for the user filter on the accelerator - * - * @tparam T_Functor user defined filter - */ - template< typename T_Functor > - struct Free : private T_Functor - { - //! type of the user filter - using Functor = T_Functor; - - //! store user filter instance - HDINLINE Free( Functor const & filter ) : - Functor( filter ) - { - } - - /** execute the user filter - * - * @tparam T_Args type of the arguments passed to the user filter - * - * @param particle particle to use for the filtering - */ - template< - typename T_Acc, - typename T_Particle - > - HDINLINE - bool operator( )( - T_Acc const &, - T_Particle const & particle - ) - { - bool const isValid = particle.isHandleValid( ); - - return isValid && Functor::operator( )( particle ); - } - - }; -} // namespace acc - - template< typename T_Functor > - struct Free : protected functor::User< T_Functor > + namespace particles { - - using Functor = functor::User< T_Functor >; - - template< typename T_SpeciesType > - struct apply + namespace filter { - using type = Free; - }; - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE Free( uint32_t currentStep ) : Functor( currentStep ) - { - } - - /** create device filter - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param offset (in supercells, without any guards) to the - * origin of the local domain - * @param configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::Free< Functor > - operator()( - T_Acc const &, - DataSpace< simDim > const &, - T_WorkerCfg const & - ) const - { - return acc::Free< Functor >( *static_cast< Functor const * >( this ) ); - } - - static - HINLINE std::string - getName( ) - { - // provide the name from the user functor - return Functor::name; - } - - }; - -} // namespace generic -} // namespace filter -} // namespace particles + namespace generic + { + namespace acc + { + /** wrapper for the user filter on the accelerator + * + * @tparam T_Functor user defined filter + */ + template + struct Free : private T_Functor + { + //! type of the user filter + using Functor = T_Functor; + + //! store user filter instance + HDINLINE Free(Functor const& filter) : Functor(filter) + { + } + + /** execute the user filter + * + * @tparam T_Args type of the arguments passed to the user filter + * + * @param particle particle to use for the filtering + */ + template + HDINLINE bool operator()(T_Acc const&, T_Particle const& particle) + { + bool const isValid = particle.isHandleValid(); + + return isValid && Functor::operator()(particle); + } + }; + } // namespace acc + + template + struct Free : protected functor::User + { + using Functor = functor::User; + + template + struct apply + { + using type = Free; + }; + + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE Free(uint32_t currentStep) : Functor(currentStep) + { + } + + /** create device filter + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param offset (in supercells, without any guards) to the + * origin of the local domain + * @param configuration of the worker + */ + template + HDINLINE acc::Free operator()(T_Acc const&, DataSpace const&, T_WorkerCfg const&) + const + { + return acc::Free(*static_cast(this)); + } + + static HINLINE std::string getName() + { + // provide the name from the user functor + return Functor::name; + } + }; + + } // namespace generic + } // namespace filter + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/filter/generic/FreeRng.def b/include/picongpu/particles/filter/generic/FreeRng.def index 82643f78f2..aa8165cc49 100644 --- a/include/picongpu/particles/filter/generic/FreeRng.def +++ b/include/picongpu/particles/filter/generic/FreeRng.def @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,52 +27,48 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ -namespace generic -{ - - /** call simple free user defined functor and provide a random number generator - * - * - * @tparam T_Functor user defined unary functor - * @tparam T_Distribution pmacc::random::distributions, random number distribution - * - * example for `particleFilters.param`: get every second particle - * (random sample of 50%) - * @code{.cpp} - * - * struct FunctorEachSecondParticle - * { - * template< typename T_Rng, typename T_Particle > - * HDINLINE bool operator()( - * T_Rng & rng, - * T_Particle const & particle - * ) - * { - * bool result = false; - * if( rng() >= float_X( 0.5 ) ) - * result = true; - * return result; - * } - * static constexpr char const * name = "eachSecondParticle"; - * }; - * - * using EachSecondParticle = generic::FreeRng< - * FunctorEachSecondParticle, - * pmacc::random::distributions::Uniform< float_X > - * >; - * @endcode - */ - template< - typename T_Functor, - typename T_Distribution - > - struct FreeRng; + namespace particles + { + namespace filter + { + namespace generic + { + /** call simple free user defined functor and provide a random number generator + * + * + * @tparam T_Functor user defined unary functor + * @tparam T_Distribution pmacc::random::distributions, random number distribution + * + * example for `particleFilters.param`: get every second particle + * (random sample of 50%) + * @code{.cpp} + * + * struct FunctorEachSecondParticle + * { + * template< typename T_Rng, typename T_Particle > + * HDINLINE bool operator()( + * T_Rng & rng, + * T_Particle const & particle + * ) + * { + * bool result = false; + * if( rng() >= float_X( 0.5 ) ) + * result = true; + * return result; + * } + * static constexpr char const * name = "eachSecondParticle"; + * }; + * + * using EachSecondParticle = generic::FreeRng< + * FunctorEachSecondParticle, + * pmacc::random::distributions::Uniform< float_X > + * >; + * @endcode + */ + template + struct FreeRng; -} // namespace generic -} // namespace filter -} // namespace particles + } // namespace generic + } // namespace filter + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/filter/generic/FreeRng.hpp b/include/picongpu/particles/filter/generic/FreeRng.hpp index 2667644f29..2f5f282d80 100644 --- a/include/picongpu/particles/filter/generic/FreeRng.hpp +++ b/include/picongpu/particles/filter/generic/FreeRng.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -29,156 +29,105 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ -namespace generic -{ -namespace acc -{ - template< - typename T_Functor, - typename T_RngType - > - struct FreeRng : private T_Functor - { - - using Functor = T_Functor; - using RngType = T_RngType; - - HDINLINE FreeRng( - Functor const & functor, - RngType const & rng - ) : - T_Functor( functor ), m_rng( rng ) - { - } - - /** call user functor - * - * The random number generator is initialized with the first call. - * - * @tparam T_Particle type of the particle to manipulate - * @tparam T_Args type of the arguments passed to the user functor - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param particle particle which is given to the user functor - * @return void is used to enable the operator if the user functor except two arguments - */ - template< - typename T_Particle, - typename ... T_Args, - typename T_Acc - > - HDINLINE - bool operator()( - T_Acc const &, - T_Particle const & particle - ) - { - namespace nvrng = nvidia::rng; - - bool const isValid = particle.isHandleValid( ); - - return isValid && Functor::operator()( - m_rng, - particle - ); - } - - private: - - RngType m_rng; - }; -} // namespace acc - - template< - typename T_Functor, - typename T_Distribution - > - struct FreeRng : - protected functor::User< T_Functor >, - private picongpu::particles::functor::misc::Rng< - T_Distribution - > + namespace particles { - template< typename T_SpeciesType > - struct apply - { - using type = FreeRng; - }; - - using RngGenerator = picongpu::particles::functor::misc::Rng< - T_Distribution - >; - - using RngType = typename RngGenerator::RandomGen; - - using Functor = functor::User< T_Functor >; - using Distribution = T_Distribution; - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE FreeRng( uint32_t currentStep ) : - Functor( currentStep ), - RngGenerator( currentStep ) - { - } - - /** create functor for the accelerator - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset offset (in superCells, without any guards) relative - * to the origin of the local domain - * @param workerCfg configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE auto - operator()( - T_Acc const & acc, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & workerCfg - ) const - -> acc::FreeRng< - Functor, - RngType - > - { - RngType const rng = ( *static_cast< RngGenerator const * >( this ) )( - acc, - localSupercellOffset, - workerCfg - ); - - return acc::FreeRng< - Functor, - RngType - >( - *static_cast< Functor const * >( this ), - rng - ); - } - - static - HINLINE std::string - getName( ) + namespace filter { - // we provide the name from the param class - return Functor::name; - } - }; - -} // namespace generic -} // namespace filter -} // namespace particles + namespace generic + { + namespace acc + { + template + struct FreeRng : private T_Functor + { + using Functor = T_Functor; + using RngType = T_RngType; + + HDINLINE FreeRng(Functor const& functor, RngType const& rng) : T_Functor(functor), m_rng(rng) + { + } + + /** call user functor + * + * The random number generator is initialized with the first call. + * + * @tparam T_Particle type of the particle to manipulate + * @tparam T_Args type of the arguments passed to the user functor + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param particle particle which is given to the user functor + * @return void is used to enable the operator if the user functor except two arguments + */ + template + HDINLINE bool operator()(T_Acc const&, T_Particle const& particle) + { + bool const isValid = particle.isHandleValid(); + + return isValid && Functor::operator()(m_rng, particle); + } + + private: + RngType m_rng; + }; + } // namespace acc + + template + struct FreeRng + : protected functor::User + , private picongpu::particles::functor::misc::Rng + { + template + struct apply + { + using type = FreeRng; + }; + + using RngGenerator = picongpu::particles::functor::misc::Rng; + + using RngType = typename RngGenerator::RandomGen; + + using Functor = functor::User; + using Distribution = T_Distribution; + + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE FreeRng(uint32_t currentStep) : Functor(currentStep), RngGenerator(currentStep) + { + } + + /** create functor for the accelerator + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset offset (in superCells, without any guards) relative + * to the origin of the local domain + * @param workerCfg configuration of the worker + */ + template + HDINLINE auto operator()( + T_Acc const& acc, + DataSpace const& localSupercellOffset, + T_WorkerCfg const& workerCfg) const -> acc::FreeRng + { + RngType const rng + = (*static_cast(this))(acc, localSupercellOffset, workerCfg); + + return acc::FreeRng(*static_cast(this), rng); + } + + static HINLINE std::string getName() + { + // we provide the name from the param class + return Functor::name; + } + }; + + } // namespace generic + } // namespace filter + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/filter/generic/FreeTotalCellOffset.def b/include/picongpu/particles/filter/generic/FreeTotalCellOffset.def index a51a2401ca..1bb3732aea 100644 --- a/include/picongpu/particles/filter/generic/FreeTotalCellOffset.def +++ b/include/picongpu/particles/filter/generic/FreeTotalCellOffset.def @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,49 +27,48 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ -namespace generic -{ - - /** call simple free user defined functor and provide the cell information - * - * The functor passes the cell offset of the particle relative to the total - * domain origin into the functor. - * - * @tparam T_Functor user defined unary functor - * - * example for `particleFilters.param`: each particle with a cell offset of 5 - * in X direction - * @code{.cpp} - * - * struct FunctorEachParticleInXCell5 - * { - * template< typename T_Particle > - * HDINLINE bool operator()( - * DataSpace< simDim > const & particleOffsetToTotalOrigin, - * T_Particle const & particle - * ) - * { - * bool result = false; - * if( particleOffsetToTotalOrigin.x() == 5 ) - * result = true; - * return result; - * } - * static constexpr char const * name = "eachParticleInXCell5"; - * }; - * - * using EachParticleInXCell5 = generic::FreeTotalCellOffset< - * FunctorEachParticleInXCell5 - * >; - * @endcode - */ - template< typename T_Functor > - struct FreeTotalCellOffset; + namespace particles + { + namespace filter + { + namespace generic + { + /** call simple free user defined functor and provide the cell information + * + * The functor passes the cell offset of the particle relative to the total + * domain origin into the functor. + * + * @tparam T_Functor user defined unary functor + * + * example for `particleFilters.param`: each particle with a cell offset of 5 + * in X direction + * @code{.cpp} + * + * struct FunctorEachParticleInXCell5 + * { + * template< typename T_Particle > + * HDINLINE bool operator()( + * DataSpace< simDim > const & particleOffsetToTotalOrigin, + * T_Particle const & particle + * ) + * { + * bool result = false; + * if( particleOffsetToTotalOrigin.x() == 5 ) + * result = true; + * return result; + * } + * static constexpr char const * name = "eachParticleInXCell5"; + * }; + * + * using EachParticleInXCell5 = generic::FreeTotalCellOffset< + * FunctorEachParticleInXCell5 + * >; + * @endcode + */ + template + struct FreeTotalCellOffset; -} // namespace generic -} // namespace filter -} // namespace particles + } // namespace generic + } // namespace filter + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/filter/generic/FreeTotalCellOffset.hpp b/include/picongpu/particles/filter/generic/FreeTotalCellOffset.hpp index 744c3e6ee8..b0d76bdfd1 100644 --- a/include/picongpu/particles/filter/generic/FreeTotalCellOffset.hpp +++ b/include/picongpu/particles/filter/generic/FreeTotalCellOffset.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -28,138 +28,113 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ -namespace generic -{ -namespace acc -{ - template< typename T_Functor > - struct FreeTotalCellOffset : private T_Functor + namespace particles { - - using Functor = T_Functor; - - HDINLINE FreeTotalCellOffset( - Functor const & functor, - DataSpace< simDim > const & superCellToLocalOriginCellOffset - ) : - T_Functor( functor ), - m_superCellToLocalOriginCellOffset( superCellToLocalOriginCellOffset ) + namespace filter { - } - - /** call user functor - * - * The random number generator is initialized with the first call. - * - * @tparam T_Particle type of the particle to manipulate - * @tparam T_Args type of the arguments passed to the user functor - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param particle particle which is given to the user functor - * @return void is used to enable the operator if the user functor except two arguments - */ - template< - typename T_Particle, - typename T_Acc - > - HDINLINE - bool operator()( - T_Acc const &, - T_Particle const & particle - ) - { - bool filterResult = false; - if( particle.isHandleValid( ) ) + namespace generic { - DataSpace< simDim > const cellInSuperCell( - DataSpaceOperations< simDim >::template map< SuperCellSize > ( particle[ localCellIdx_ ] ) - ); - filterResult = Functor::operator( )( - m_superCellToLocalOriginCellOffset + cellInSuperCell, - particle - ); - } - return filterResult; - } - - private: - - DataSpace< simDim > const m_superCellToLocalOriginCellOffset; - }; -} // namespace acc - - template< typename T_Functor > - struct FreeTotalCellOffset : - protected functor::User< T_Functor >, - private functor::misc::TotalCellOffset - { - using CellOffsetFunctor = functor::misc::TotalCellOffset; - using Functor = functor::User< T_Functor >; - - template< typename T_SpeciesType > - struct apply - { - using type = FreeTotalCellOffset; - }; - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE FreeTotalCellOffset( uint32_t currentStep ) : - Functor( currentStep ), - CellOffsetFunctor( currentStep ) - { - } - - /** create functor for the accelerator - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset offset (in superCells, without any guards) relative - * to the origin of the local domain - * @param workerCfg configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE auto - operator()( - T_Acc const & acc, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & workerCfg - ) const - -> acc::FreeTotalCellOffset< Functor > - { - auto & cellOffsetFunctor = *static_cast< CellOffsetFunctor const * >( this ); - return acc::FreeTotalCellOffset< Functor >( - *static_cast< Functor const * >( this ), - cellOffsetFunctor( - acc, - localSupercellOffset, - workerCfg - ) - ); - } - - static - HINLINE std::string - getName( ) - { - // we provide the name from the param class - return Functor::name; - } - }; - -} // namespace generic -} // namespace filter -} // namespace particles + namespace acc + { + template + struct FreeTotalCellOffset : private T_Functor + { + using Functor = T_Functor; + + HDINLINE FreeTotalCellOffset( + Functor const& functor, + DataSpace const& superCellToLocalOriginCellOffset) + : T_Functor(functor) + , m_superCellToLocalOriginCellOffset(superCellToLocalOriginCellOffset) + { + } + + /** call user functor + * + * The random number generator is initialized with the first call. + * + * @tparam T_Particle type of the particle to manipulate + * @tparam T_Args type of the arguments passed to the user functor + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param particle particle which is given to the user functor + * @return void is used to enable the operator if the user functor except two arguments + */ + template + HDINLINE bool operator()(T_Acc const&, T_Particle const& particle) + { + bool filterResult = false; + if(particle.isHandleValid()) + { + DataSpace const cellInSuperCell( + DataSpaceOperations::template map(particle[localCellIdx_])); + filterResult = Functor::operator()( + m_superCellToLocalOriginCellOffset + cellInSuperCell, + particle); + } + return filterResult; + } + + private: + DataSpace const m_superCellToLocalOriginCellOffset; + }; + } // namespace acc + + template + struct FreeTotalCellOffset + : protected functor::User + , private functor::misc::TotalCellOffset + { + using CellOffsetFunctor = functor::misc::TotalCellOffset; + using Functor = functor::User; + + template + struct apply + { + using type = FreeTotalCellOffset; + }; + + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE FreeTotalCellOffset(uint32_t currentStep) + : Functor(currentStep) + , CellOffsetFunctor(currentStep) + { + } + + /** create functor for the accelerator + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset offset (in superCells, without any guards) relative + * to the origin of the local domain + * @param workerCfg configuration of the worker + */ + template + HDINLINE auto operator()( + T_Acc const& acc, + DataSpace const& localSupercellOffset, + T_WorkerCfg const& workerCfg) const -> acc::FreeTotalCellOffset + { + auto& cellOffsetFunctor = *static_cast(this); + return acc::FreeTotalCellOffset( + *static_cast(this), + cellOffsetFunctor(acc, localSupercellOffset, workerCfg)); + } + + static HINLINE std::string getName() + { + // we provide the name from the param class + return Functor::name; + } + }; + + } // namespace generic + } // namespace filter + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/IFlyLite.hpp b/include/picongpu/particles/flylite/IFlyLite.hpp index 7a0b9b8cae..1f54dc3164 100644 --- a/include/picongpu/particles/flylite/IFlyLite.hpp +++ b/include/picongpu/particles/flylite/IFlyLite.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * * This file is part of PIConGPU. @@ -31,57 +31,48 @@ namespace picongpu { -namespace particles -{ -namespace flylite -{ - /** Interface for a method of solving population kinetics - */ - class IFlyLite + namespace particles { - public: - /** Allocate & Initialize Memory Buffers for Algorithms - * - * @param gridSizeLocal local size of electro-magnetic fields on the cells - * @param ionSpeciesName unique name for the ion species - */ - virtual void init( - pmacc::DataSpace< simDim > const & gridSizeLocal, - std::string const & ionSpeciesName - ) = 0; - - /** Calculate Evolution of Populations for One Time Step - * - * Interface for the update of the atomic populations during the PIC - * cycle. - * - * @param ionSpeciesName unique name for the ion species - * @param currentStep the current time step of the simulation - */ - template< - typename T_IonSpecies - > - void update( - std::string const & ionSpeciesName, - uint32_t currentStep - ) + namespace flylite { - boost::ignore_unused( ionSpeciesName, currentStep ); - /* The compiler is allowed to evaluate an expression those not depends on a template parameter - * even if the class is never instantiated. In that case static assert is always - * evaluated (e.g. with clang), this results in an error if the condition is false. - * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html - * - * A workaround is to add a template dependency to the expression. - * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. + /** Interface for a method of solving population kinetics */ - PMACC_STATIC_ASSERT_MSG( - false && sizeof(T_IonSpecies) != 0, - FLYlite_the_update_method_for_ion_population_kinetics_is_not_implemented - ); - } + class IFlyLite + { + public: + /** Allocate & Initialize Memory Buffers for Algorithms + * + * @param gridSizeLocal local size of electro-magnetic fields on the cells + * @param ionSpeciesName unique name for the ion species + */ + virtual void init(pmacc::DataSpace const& gridSizeLocal, std::string const& ionSpeciesName) + = 0; - }; -} // namespace flylite -} // namespace particles + /** Calculate Evolution of Populations for One Time Step + * + * Interface for the update of the atomic populations during the PIC + * cycle. + * + * @param ionSpeciesName unique name for the ion species + * @param currentStep the current time step of the simulation + */ + template + void update(std::string const& ionSpeciesName, uint32_t currentStep) + { + boost::ignore_unused(ionSpeciesName, currentStep); + /* The compiler is allowed to evaluate an expression those not depends on a template parameter + * even if the class is never instantiated. In that case static assert is always + * evaluated (e.g. with clang), this results in an error if the condition is false. + * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html + * + * A workaround is to add a template dependency to the expression. + * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. + */ + PMACC_STATIC_ASSERT_MSG( + false && sizeof(T_IonSpecies) != 0, + FLYlite_the_update_method_for_ion_population_kinetics_is_not_implemented, ); + } + }; + } // namespace flylite + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/NonLTE.def b/include/picongpu/particles/flylite/NonLTE.def index 11cf3c1a58..8b78f6f2ee 100644 --- a/include/picongpu/particles/flylite/NonLTE.def +++ b/include/picongpu/particles/flylite/NonLTE.def @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * * This file is part of PIConGPU. @@ -23,32 +23,31 @@ namespace picongpu { -namespace particles -{ -namespace flylite -{ - /** Non-LTE Steady-State - * - * Implementation of non-LTE ionization dynamics. - * @todo later on, add references on the overall model here. - * - * @todo add T_OtherIonsList for multi ion species IPD - * - * @tparam T_ElectronsList A mpl sequence of picongpu::Particles with a list - * of electron species for local density and energy - * histogram binning - * - * @tparam T_PhotonsList A mpl sequence of picongpu::Particles with a list - * of photon species for local energy histogram - * binning - */ - template< - /* typename T_OtherIonsList, */ - typename T_ElectronsList, - typename T_PhotonsList - > - class NonLTE; + namespace particles + { + namespace flylite + { + /** Non-LTE Steady-State + * + * Implementation of non-LTE ionization dynamics. + * @todo later on, add references on the overall model here. + * + * @todo add T_OtherIonsList for multi ion species IPD + * + * @tparam T_ElectronsList A mpl sequence of picongpu::Particles with a list + * of electron species for local density and energy + * histogram binning + * + * @tparam T_PhotonsList A mpl sequence of picongpu::Particles with a list + * of photon species for local energy histogram + * binning + */ + template< + /* typename T_OtherIonsList, */ + typename T_ElectronsList, + typename T_PhotonsList> + class NonLTE; -} // namespace flylite -} // namespace particles + } // namespace flylite + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/NonLTE.hpp b/include/picongpu/particles/flylite/NonLTE.hpp index 7550c52a19..f1b61c1aa3 100644 --- a/include/picongpu/particles/flylite/NonLTE.hpp +++ b/include/picongpu/particles/flylite/NonLTE.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -31,70 +31,51 @@ namespace picongpu { -namespace particles -{ -namespace flylite -{ - template< - //! @todo for multi ion species IPD: typename T_OtherIonsList, - typename T_ElectronsList, - typename T_PhotonsList - > - class NonLTE : public IFlyLite + namespace particles { - public: - //! @todo for multi ion species IPD: using OtherIonsList = T_OtherIonsList; - - using ElectronsList = T_ElectronsList; - using PhotonsList = T_PhotonsList; + namespace flylite + { + template< + //! @todo for multi ion species IPD: typename T_OtherIonsList, + typename T_ElectronsList, + typename T_PhotonsList> + class NonLTE : public IFlyLite + { + public: + //! @todo for multi ion species IPD: using OtherIonsList = T_OtherIonsList; - virtual - void - init( - pmacc::DataSpace< simDim > const & gridSizeLocal, - std::string const & ionSpeciesName - ); + using ElectronsList = T_ElectronsList; + using PhotonsList = T_PhotonsList; - /** Update atomic configurations - * - * Prepares auxiliary fields for the non-LTE atomic physics model and - * updates the configurations & charge states of an ion species. - * - * @tparam T_IonSpeciesType a picongpu::Particles class with an ion - * species - * - * @param ionSpeciesName unique name of the ion species in T_IonSpeciesType - * @param currentStep the current time step - */ - template< - typename T_IonSpeciesType - > - void - update( - std::string const & ionSpeciesName, - uint32_t currentStep - ); + virtual void init(pmacc::DataSpace const& gridSizeLocal, std::string const& ionSpeciesName); - private: - /** Calculate new values in helper fields - * - * Prepares helper fields by calculating local densities and energy - * histograms. - * - * @param ionSpeciesName unique name of the ion species in T_IonSpeciesType - * @param currentStep the current time step - */ - template< - typename T_IonSpeciesType - > - void - fillHelpers( - std::string const & ionSpeciesName, - uint32_t currentStep - ); + /** Update atomic configurations + * + * Prepares auxiliary fields for the non-LTE atomic physics model and + * updates the configurations & charge states of an ion species. + * + * @tparam T_IonSpeciesType a picongpu::Particles class with an ion + * species + * + * @param ionSpeciesName unique name of the ion species in T_IonSpeciesType + * @param currentStep the current time step + */ + template + void update(std::string const& ionSpeciesName, uint32_t currentStep); - }; + private: + /** Calculate new values in helper fields + * + * Prepares helper fields by calculating local densities and energy + * histograms. + * + * @param ionSpeciesName unique name of the ion species in T_IonSpeciesType + * @param currentStep the current time step + */ + template + void fillHelpers(std::string const& ionSpeciesName, uint32_t currentStep); + }; -} // namespace flylite -} // namespace particles + } // namespace flylite + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/NonLTE.tpp b/include/picongpu/particles/flylite/NonLTE.tpp index aae44341bc..0dda1cbf11 100644 --- a/include/picongpu/particles/flylite/NonLTE.tpp +++ b/include/picongpu/particles/flylite/NonLTE.tpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -27,172 +27,115 @@ #include "picongpu/particles/particleToGrid/derivedAttributes/Density.def" #include "picongpu/particles/traits/GetShape.hpp" -/* pmacc */ #include #include #include -#include #include namespace picongpu { -namespace particles -{ -namespace flylite -{ - template< - //! @todo for multi ion species IPD: typename T_OtherIonsList - - typename T_ElectronsList, - typename T_PhotonsList - > - void - NonLTE< - T_ElectronsList, - T_PhotonsList - >::init( - pmacc::DataSpace< simDim > const & gridSizeLocal, - std::string const & ionSpeciesName - ) + namespace particles { - //! GPU-local number of cells in regular resolution (like FieldE & B) - pmacc::DataSpace< simDim > m_gridSizeLocal = gridSizeLocal; - //! GPU-local number of cells in averaged (reduced) resolution - pmacc::DataSpace< simDim > m_avgGridSizeLocal = m_gridSizeLocal / picongpu::flylite::spatialAverageBox::toRT(); - - DataConnector &dc = Environment<>::get().DataConnector(); - - using pmacc::memory::makeUnique; - // once allocated for all ion species to share - if( ! dc.hasId( helperFields::LocalEnergyHistogram::getName( "electrons" ) ) ) - dc.consume( - makeUnique< helperFields::LocalEnergyHistogram >( + namespace flylite + { + template< + //! @todo for multi ion species IPD: typename T_OtherIonsList + + typename T_ElectronsList, + typename T_PhotonsList> + void NonLTE::init( + pmacc::DataSpace const& gridSizeLocal, + std::string const& ionSpeciesName) + { + //! GPU-local number of cells in regular resolution (like FieldE & B) + pmacc::DataSpace m_gridSizeLocal = gridSizeLocal; + //! GPU-local number of cells in averaged (reduced) resolution + pmacc::DataSpace m_avgGridSizeLocal + = m_gridSizeLocal / picongpu::flylite::spatialAverageBox::toRT(); + + DataConnector& dc = Environment<>::get().DataConnector(); + + // once allocated for all ion species to share + if(!dc.hasId(helperFields::LocalEnergyHistogram::getName("electrons"))) + dc.consume(std::make_unique("electrons", m_avgGridSizeLocal)); + + if(!dc.hasId(helperFields::LocalEnergyHistogram::getName("photons"))) + dc.consume(std::make_unique("photons", m_avgGridSizeLocal)); + + if(!dc.hasId(helperFields::LocalDensity::getName("electrons"))) + dc.consume(std::make_unique("electrons", m_avgGridSizeLocal)); + + // for each ion species + if(!dc.hasId(helperFields::LocalRateMatrix::getName(ionSpeciesName))) + dc.consume(std::make_unique(ionSpeciesName, m_avgGridSizeLocal)); + + if(!dc.hasId(helperFields::LocalDensity::getName(ionSpeciesName))) + dc.consume(std::make_unique(ionSpeciesName, m_avgGridSizeLocal)); + } + + template< + //! @todo for multi ion species IPD: typename T_OtherIonsList, + + typename T_ElectronsList, + typename T_PhotonsList> + template + void NonLTE< + //! @todo for multi ion species IPD: T_OtherIonsList, + + T_ElectronsList, + T_PhotonsList>::update(std::string const& ionSpeciesName, uint32_t currentStep) + { + using IonSpeciesType = T_IonSpeciesType; + + // calculate density fields and energy histograms + fillHelpers(ionSpeciesName, currentStep); + + //! @todo calculate rate matrix + //! @todo implicit ODE solve to evolve populations + //! @todo modify f_e of free electrons + //! @todo modify f_ph of photon field (absorb) + //! @todo change charges, create electrons & photons + } + + template< + //! @todo for multi ion species IPD: typename T_OtherIonsList, + + typename T_ElectronsList, + typename T_PhotonsList> + template + void NonLTE< + //! @todo for multi ion species IPD: T_OtherIonsList, + + T_ElectronsList, + T_PhotonsList>::fillHelpers(std::string const& ionSpeciesName, uint32_t currentStep) + { + using IonSpeciesType = T_IonSpeciesType; + + // calculate density fields + helperFields::FillLocalDensity> fillDensityIons{}; + fillDensityIons(currentStep, ionSpeciesName); + + helperFields::FillLocalDensity fillDensityElectrons{}; + fillDensityElectrons(currentStep, "electrons"); + + // calculate energy histograms: f(e), f(ph) + helperFields::FillLocalEnergyHistogram fillEnergyHistogramElectrons{}; + fillEnergyHistogramElectrons( + currentStep, "electrons", - m_avgGridSizeLocal - ) - ); + picongpu::flylite::electronMinEnergy, + picongpu::flylite::electronMaxEnergy); - if( ! dc.hasId( helperFields::LocalEnergyHistogram::getName( "photons" ) ) ) - dc.consume( - makeUnique< helperFields::LocalEnergyHistogram >( + helperFields::FillLocalEnergyHistogram fillEnergyHistogramPhotons{}; + fillEnergyHistogramPhotons( + currentStep, "photons", - m_avgGridSizeLocal - ) - ); + picongpu::flylite::photonMinEnergy, + picongpu::flylite::photonMaxEnergy); + } - if( ! dc.hasId( helperFields::LocalDensity::getName( "electrons" ) ) ) - dc.consume( - makeUnique< helperFields::LocalDensity >( - "electrons", - m_avgGridSizeLocal - ) - ); - - // for each ion species - if( ! dc.hasId( helperFields::LocalRateMatrix::getName( ionSpeciesName ) ) ) - dc.consume( - makeUnique< helperFields::LocalRateMatrix >( - ionSpeciesName, - m_avgGridSizeLocal - ) - ); - - if( ! dc.hasId( helperFields::LocalDensity::getName( ionSpeciesName ) ) ) - dc.consume( - makeUnique< helperFields::LocalDensity >( - ionSpeciesName, - m_avgGridSizeLocal - ) - ); - } - - template< - //! @todo for multi ion species IPD: typename T_OtherIonsList, - - typename T_ElectronsList, - typename T_PhotonsList - > - template< - typename T_IonSpeciesType - > - void - NonLTE< - //! @todo for multi ion species IPD: T_OtherIonsList, - - T_ElectronsList, - T_PhotonsList - >::update( - std::string const & ionSpeciesName, - uint32_t currentStep - ) - { - using IonSpeciesType = T_IonSpeciesType; - - // calculate density fields and energy histograms - fillHelpers< IonSpeciesType >( ionSpeciesName, currentStep ); - - //! @todo calculate rate matrix - //! @todo implicit ODE solve to evolve populations - //! @todo modify f_e of free electrons - //! @todo modify f_ph of photon field (absorb) - //! @todo change charges, create electrons & photons - } - - template< - //! @todo for multi ion species IPD: typename T_OtherIonsList, - - typename T_ElectronsList, - typename T_PhotonsList - > - template< - typename T_IonSpeciesType - > - void - NonLTE< - //! @todo for multi ion species IPD: T_OtherIonsList, - - T_ElectronsList, - T_PhotonsList - >::fillHelpers( - std::string const & ionSpeciesName, - uint32_t currentStep - ) - { - using IonSpeciesType = T_IonSpeciesType; - - // calculate density fields - helperFields::FillLocalDensity< MakeSeq_t< IonSpeciesType > > fillDensityIons{}; - fillDensityIons( - currentStep, - ionSpeciesName - ); - - helperFields::FillLocalDensity< T_ElectronsList > fillDensityElectrons{}; - fillDensityElectrons( - currentStep, - "electrons" - ); - - // calculate energy histograms: f(e), f(ph) - helperFields::FillLocalEnergyHistogram< T_ElectronsList > fillEnergyHistogramElectrons{}; - fillEnergyHistogramElectrons( - currentStep, - "electrons", - picongpu::flylite::electronMinEnergy, - picongpu::flylite::electronMaxEnergy - ); - - helperFields::FillLocalEnergyHistogram< T_PhotonsList > fillEnergyHistogramPhotons{}; - fillEnergyHistogramPhotons( - currentStep, - "photons", - picongpu::flylite::photonMinEnergy, - picongpu::flylite::photonMaxEnergy - ); - } - -} // namespace flylite -} // namespace particles + } // namespace flylite + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/helperFields/LocalDensity.hpp b/include/picongpu/particles/flylite/helperFields/LocalDensity.hpp index 86fb052be5..a2e0a9e76a 100644 --- a/include/picongpu/particles/flylite/helperFields/LocalDensity.hpp +++ b/include/picongpu/particles/flylite/helperFields/LocalDensity.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -33,78 +33,69 @@ namespace picongpu { -namespace particles -{ -namespace flylite -{ -namespace helperFields -{ - class LocalDensity : - public ISimulationData + namespace particles { - public: - using ValueType = float_X; + namespace flylite + { + namespace helperFields + { + class LocalDensity : public ISimulationData + { + public: + using ValueType = float_X; - private: - GridBuffer< ValueType, simDim >* m_density; - std::string m_speciesGroup; + private: + GridBuffer* m_density; + std::string m_speciesGroup; - public: - /** Allocate and initialize local (number) density - * - * @param speciesGroup unique naming for the species inside this density, - * e.g. a collection of electron species or ions - * @param sizeLocal spatial size of the local density value - */ - LocalDensity( - std::string const & speciesGroup, - DataSpace< simDim > const & sizeLocal - ) : - m_density( nullptr ), - m_speciesGroup( speciesGroup ) - { - // without guards - m_density = new GridBuffer< ValueType, simDim >( sizeLocal ); - } + public: + /** Allocate and initialize local (number) density + * + * @param speciesGroup unique naming for the species inside this density, + * e.g. a collection of electron species or ions + * @param sizeLocal spatial size of the local density value + */ + LocalDensity(std::string const& speciesGroup, DataSpace const& sizeLocal) + : m_density(nullptr) + , m_speciesGroup(speciesGroup) + { + // without guards + m_density = new GridBuffer(sizeLocal); + } - ~LocalDensity() - { - __delete( m_density ); - } + ~LocalDensity() + { + __delete(m_density); + } - static std::string - getName( std::string const & speciesGroup ) - { - return speciesGroup + "_LocalDensity"; - } + static std::string getName(std::string const& speciesGroup) + { + return speciesGroup + "_LocalDensity"; + } - std::string - getName( ) - { - return getName( m_speciesGroup ); - } + std::string getName() + { + return getName(m_speciesGroup); + } - GridBuffer< ValueType, simDim >& - getGridBuffer( ) - { - return *m_density; - } + GridBuffer& getGridBuffer() + { + return *m_density; + } - /* implement ISimulationData members */ - void - synchronize() override - { - m_density->deviceToHost( ); - } + /* implement ISimulationData members */ + void synchronize() override + { + m_density->deviceToHost(); + } - SimulationDataId - getUniqueId() override - { - return getName(); - } - }; + SimulationDataId getUniqueId() override + { + return getName(); + } + }; -} // namespace helperFields -} // namespace flylite -} // namespace particles + } // namespace helperFields + } // namespace flylite + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/helperFields/LocalDensity.kernel b/include/picongpu/particles/flylite/helperFields/LocalDensity.kernel index e2be05a3a4..b82dbac0c0 100644 --- a/include/picongpu/particles/flylite/helperFields/LocalDensity.kernel +++ b/include/picongpu/particles/flylite/helperFields/LocalDensity.kernel @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl, Rene Widera +/* Copyright 2017-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -34,124 +34,98 @@ namespace picongpu { -namespace particles -{ -namespace flylite -{ -namespace helperFields -{ - /** Average a FieldTmp density to a smaller resolution - * - * Average a FieldTmp density to a smaller (per-supercell) resolution and - * add it to a local density field. - * - * @tparam T_numWorkers number of workers for lockstep execution per block, - * arbitrary for reduce since it will loop over the - * source size when necessary - */ - template< - uint32_t T_numWorkers - > - struct KernelAverageDensity + namespace particles { - /** Functor - * - * @tparam T_TmpBox pmacc::DataBox with full-resolution density - * @tparam T_LocalDensityBox pmacc::DataBox local density with less - * resolution - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param fieldTmp pmacc::DataBox with FieldTmp density scalar field - * @param localDensity pmacc::DataBox with global memory, e.g. for each - * supercell's density - */ - template< - typename T_TmpBox, - typename T_LocalDensityBox, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_TmpBox fieldTmp, - T_LocalDensityBox localDensity - ) const + namespace flylite { - using picongpu::flylite::spatialAverageBox; - using ValueType = typename T_TmpBox::ValueType; - constexpr uint32_t numWorkers = T_numWorkers; - - // cell index in the average box in reduced resolution - DataSpace< simDim > const avgBoxCell( blockIdx ); - // first cell index inside FieldTmp (originating from BORDER) for block - DataSpace< simDim > const fieldTmpBlockOriginCell = avgBoxCell * spatialAverageBox::toRT(); - // our workers per block are started 1D - uint32_t const linearThreadIdx( threadIdx.x ); - - // shift the fieldTmp to the start of average box - auto fieldTmpBlock = fieldTmp.shift( fieldTmpBlockOriginCell ); - - // shared memory for reduce - PMACC_SMEM( - acc, - shReduceBuffer, - memory::Array< - ValueType, - numWorkers - > - ); - - // re-map access indices to local average view - using D1Box = DataBoxDim1Access< T_TmpBox >; - D1Box d1access( - fieldTmpBlock, - spatialAverageBox::toRT() - ); - - __syncthreads(); - - uint32_t const numAvgCells = pmacc::math::CT::volume< spatialAverageBox >::type::value; - - nvidia::reduce::kernel::Reduce< - ValueType, - numAvgCells, - numWorkers - > reduce{}; - - - reduce( - acc, - mappings::threads::WorkerCfg< numWorkers >( linearThreadIdx ), - numAvgCells, - /* access inside local average view */ - d1access, - numAvgCells, - nvidia::functors::Add(), - shReduceBuffer - ); - - /* continue with master - * - * - before working with this field, multiply by - * particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE - * - divide by for average by numAvgCells - * - write back to global - * - * - change those lines if you want to re-use this kernel for a vector field - */ - if( linearThreadIdx == 0 ) + namespace helperFields { - ValueType localAverageResult = shReduceBuffer[ 0 ] * - particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE / - float_X( numAvgCells ); - - localDensity( avgBoxCell ) = - static_cast< typename T_LocalDensityBox::ValueType >( localAverageResult ); - } - } - }; - -} // namespace helperFields -} // namespace flylite -} // namespace particles + /** Average a FieldTmp density to a smaller resolution + * + * Average a FieldTmp density to a smaller (per-supercell) resolution and + * add it to a local density field. + * + * @tparam T_numWorkers number of workers for lockstep execution per block, + * arbitrary for reduce since it will loop over the + * source size when necessary + */ + template + struct KernelAverageDensity + { + /** Functor + * + * @tparam T_TmpBox pmacc::DataBox with full-resolution density + * @tparam T_LocalDensityBox pmacc::DataBox local density with less + * resolution + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param fieldTmp pmacc::DataBox with FieldTmp density scalar field + * @param localDensity pmacc::DataBox with global memory, e.g. for each + * supercell's density + */ + template + DINLINE void operator()(T_Acc const& acc, T_TmpBox fieldTmp, T_LocalDensityBox localDensity) const + { + using picongpu::flylite::spatialAverageBox; + using ValueType = typename T_TmpBox::ValueType; + constexpr uint32_t numWorkers = T_numWorkers; + + // cell index in the average box in reduced resolution + DataSpace const avgBoxCell(cupla::blockIdx(acc)); + // first cell index inside FieldTmp (originating from BORDER) for block + DataSpace const fieldTmpBlockOriginCell = avgBoxCell * spatialAverageBox::toRT(); + // our workers per block are started 1D + uint32_t const linearThreadIdx(cupla::threadIdx(acc).x); + + // shift the fieldTmp to the start of average box + auto fieldTmpBlock = fieldTmp.shift(fieldTmpBlockOriginCell); + + // shared memory for reduce + PMACC_SMEM(acc, shReduceBuffer, memory::Array); + + // re-map access indices to local average view + using D1Box = DataBoxDim1Access; + D1Box d1access(fieldTmpBlock, spatialAverageBox::toRT()); + + cupla::__syncthreads(acc); + + uint32_t const numAvgCells = pmacc::math::CT::volume::type::value; + + nvidia::reduce::kernel::Reduce reduce{}; + + + reduce( + acc, + mappings::threads::WorkerCfg(linearThreadIdx), + numAvgCells, + /* access inside local average view */ + d1access, + numAvgCells, + nvidia::functors::Add(), + shReduceBuffer); + + /* continue with master + * + * - before working with this field, multiply by + * particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE + * - divide by for average by numAvgCells + * - write back to global + * + * - change those lines if you want to re-use this kernel for a vector field + */ + if(linearThreadIdx == 0) + { + ValueType localAverageResult = shReduceBuffer[0] + * particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE / float_X(numAvgCells); + + localDensity(avgBoxCell) + = static_cast(localAverageResult); + } + } + }; + + } // namespace helperFields + } // namespace flylite + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/helperFields/LocalDensityFunctors.hpp b/include/picongpu/particles/flylite/helperFields/LocalDensityFunctors.hpp index a8cc35d766..d7b67d6018 100644 --- a/include/picongpu/particles/flylite/helperFields/LocalDensityFunctors.hpp +++ b/include/picongpu/particles/flylite/helperFields/LocalDensityFunctors.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -35,136 +35,113 @@ namespace picongpu { -namespace particles -{ -namespace flylite -{ -namespace helperFields -{ -namespace detail -{ - /** Average a group of species to a local density - * - * Takes a single species and fills the LocalDensity with it. - * - * @tparam T_SpeciesType a picongpu::Particles class with a particle species - */ - template< - typename T_SpeciesType - > - struct AddSingleDensity - { - using SpeciesType = T_SpeciesType; - using FrameType = typename SpeciesType::FrameType; - using ShapeType = typename GetShape< SpeciesType >::type; - - /** Functor - * - * @param currentStep the current time step - * @param fieldTmp a slot of FieldTmp to add a density to - */ - void operator()( - uint32_t currentStep, - std::shared_ptr< FieldTmp > & fieldTmp - ) - { - DataConnector &dc = Environment<>::get().DataConnector(); - - // load particle without copy particle data to host - auto speciesTmp = dc.get< SpeciesType >( FrameType::getName(), true ); - - using Density = particleToGrid::ComputeGridValuePerFrame< - ShapeType, - particleToGrid::derivedAttributes::Density - >; - fieldTmp->template computeValue< CORE + BORDER, Density >( *speciesTmp, currentStep ); - - dc.releaseData( FrameType::getName() ); - } - }; -} - /** Average a group of species to a local density - * - * Takes a list of species and fills the LocalDensity with it. - * Ideally executed for a list of electron species or an ion species. - * - * @tparam T_SpeciesList sequence of picongpu::Particles to create a - * local density from - */ - template< - typename T_SpeciesList - > - struct FillLocalDensity + namespace particles { - using SpeciesList = T_SpeciesList; - - /** Functor - * - * @param currentStep the current time step - * @param speciesGroup naming for the group of species in T_SpeciesList - */ - void operator()( - uint32_t currentStep, - std::string const & speciesGroup - ) + namespace flylite { - // generating a density requires at least one slot in FieldTmp - PMACC_CASSERT_MSG( - _please_allocate_at_least_one_FieldTmp_in_memory_param, - fieldTmpNumSlots > 0 - ); - - DataConnector &dc = Environment<>::get().DataConnector(); - - // load FieldTmp without copy data to host and zero it - auto fieldTmp = dc.get< FieldTmp >( - FieldTmp::getUniqueId( 0 ), - true - ); - using DensityValueType = typename FieldTmp::ValueType; - fieldTmp->getGridBuffer().getDeviceBuffer().setValue( DensityValueType::create(0.0) ); - - // add density of each species in list to FieldTmp - meta::ForEach< SpeciesList, detail::AddSingleDensity< bmpl::_1 > > addSingleDensity; - addSingleDensity( currentStep, fieldTmp ); - - /* create valid density in the BORDER region - * note: for average != supercell multiples the GUARD of fieldTmp - * also needs to be filled in the communication above - */ - EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); - __setTransactionEvent(fieldTmpEvent); - - /* average summed density in FieldTmp down to local resolution and - * write in new field - */ - auto nlocal = dc.get< LocalDensity >( - helperFields::LocalDensity::getName( speciesGroup ), - true - ); - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - PMACC_KERNEL( helperFields::KernelAverageDensity< numWorkers >{ } ) - ( - // one block per averaged density value - nlocal->getGridBuffer().getGridLayout().getDataSpaceWithoutGuarding(), - numWorkers - ) - ( - // start in border (jump over GUARD area) - fieldTmp->getDeviceDataBox().shift( SuperCellSize::toRT() * GuardSize::toRT() ), - // start in border (has no GUARD area) - nlocal->getGridBuffer().getDeviceBuffer( ).getDataBox( ) - ); - - // release fields - dc.releaseData( FieldTmp::getUniqueId( 0 ) ); - dc.releaseData( helperFields::LocalDensity::getName( speciesGroup ) ); - } - }; - -} // namespace helperFields -} // namespace flylite -} // namespace particles + namespace helperFields + { + namespace detail + { + /** Average a group of species to a local density + * + * Takes a single species and fills the LocalDensity with it. + * + * @tparam T_SpeciesType a picongpu::Particles class with a particle species + */ + template + struct AddSingleDensity + { + using SpeciesType = T_SpeciesType; + using FrameType = typename SpeciesType::FrameType; + using ShapeType = typename GetShape::type; + + /** Functor + * + * @param currentStep the current time step + * @param fieldTmp a slot of FieldTmp to add a density to + */ + void operator()(uint32_t currentStep, std::shared_ptr& fieldTmp) + { + DataConnector& dc = Environment<>::get().DataConnector(); + + // load particle without copy particle data to host + auto speciesTmp = dc.get(FrameType::getName(), true); + + using Density = particleToGrid:: + ComputeGridValuePerFrame; + fieldTmp->template computeValue(*speciesTmp, currentStep); + + dc.releaseData(FrameType::getName()); + } + }; + } // namespace detail + /** Average a group of species to a local density + * + * Takes a list of species and fills the LocalDensity with it. + * Ideally executed for a list of electron species or an ion species. + * + * @tparam T_SpeciesList sequence of picongpu::Particles to create a + * local density from + */ + template + struct FillLocalDensity + { + using SpeciesList = T_SpeciesList; + + /** Functor + * + * @param currentStep the current time step + * @param speciesGroup naming for the group of species in T_SpeciesList + */ + void operator()(uint32_t currentStep, std::string const& speciesGroup) + { + // generating a density requires at least one slot in FieldTmp + PMACC_CASSERT_MSG( + _please_allocate_at_least_one_FieldTmp_in_memory_param, + fieldTmpNumSlots > 0); + + DataConnector& dc = Environment<>::get().DataConnector(); + + // load FieldTmp without copy data to host and zero it + auto fieldTmp = dc.get(FieldTmp::getUniqueId(0), true); + using DensityValueType = typename FieldTmp::ValueType; + fieldTmp->getGridBuffer().getDeviceBuffer().setValue(DensityValueType::create(0.0)); + + // add density of each species in list to FieldTmp + meta::ForEach> addSingleDensity; + addSingleDensity(currentStep, fieldTmp); + + /* create valid density in the BORDER region + * note: for average != supercell multiples the GUARD of fieldTmp + * also needs to be filled in the communication above + */ + EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); + __setTransactionEvent(fieldTmpEvent); + + /* average summed density in FieldTmp down to local resolution and + * write in new field + */ + auto nlocal = dc.get(helperFields::LocalDensity::getName(speciesGroup), true); + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + PMACC_KERNEL(helperFields::KernelAverageDensity{}) + ( + // one block per averaged density value + nlocal->getGridBuffer().getGridLayout().getDataSpaceWithoutGuarding(), + numWorkers)( + // start in border (jump over GUARD area) + fieldTmp->getDeviceDataBox().shift(SuperCellSize::toRT() * GuardSize::toRT()), + // start in border (has no GUARD area) + nlocal->getGridBuffer().getDeviceBuffer().getDataBox()); + + // release fields + dc.releaseData(FieldTmp::getUniqueId(0)); + dc.releaseData(helperFields::LocalDensity::getName(speciesGroup)); + } + }; + + } // namespace helperFields + } // namespace flylite + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogram.hpp b/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogram.hpp index bfe601bda5..9a7303e812 100644 --- a/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogram.hpp +++ b/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogram.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -32,91 +32,68 @@ namespace picongpu { -namespace particles -{ -namespace flylite -{ -namespace helperFields -{ - using namespace pmacc; - - class LocalEnergyHistogram : - public ISimulationData + namespace particles { - private: - using EnergyHistogram = - memory::Array< - float_X, - picongpu::flylite::energies - >; - GridBuffer< - EnergyHistogram, - simDim - > * m_energyHistogram; - std::string m_speciesGroup; - - public: - /** Allocate and Initialize local Energy Histogram - * - * @param speciesGroup unique naming for the species inside this histogram, - * e.g. a collection of electron species or photon species - * @param histSizeLocal spatial size of the local energy histogram - */ - LocalEnergyHistogram( - std::string const & speciesGroup, - DataSpace< simDim > const & histSizeLocal - ) : - m_energyHistogram( nullptr ), - m_speciesGroup( speciesGroup ) + namespace flylite { - m_energyHistogram = - new GridBuffer< - EnergyHistogram, - simDim - >( histSizeLocal ); - } + namespace helperFields + { + using namespace pmacc; - ~LocalEnergyHistogram() - { - __delete( m_energyHistogram ); - } + class LocalEnergyHistogram : public ISimulationData + { + private: + using EnergyHistogram = memory::Array; + GridBuffer* m_energyHistogram; + std::string m_speciesGroup; - static std::string - getName( std::string const & speciesGroup ) - { - return speciesGroup + "_LocalEnergyHistogram"; - } + public: + /** Allocate and Initialize local Energy Histogram + * + * @param speciesGroup unique naming for the species inside this histogram, + * e.g. a collection of electron species or photon species + * @param histSizeLocal spatial size of the local energy histogram + */ + LocalEnergyHistogram(std::string const& speciesGroup, DataSpace const& histSizeLocal) + : m_energyHistogram(nullptr) + , m_speciesGroup(speciesGroup) + { + m_energyHistogram = new GridBuffer(histSizeLocal); + } - std::string - getName( ) - { - return getName( m_speciesGroup ); - } + ~LocalEnergyHistogram() + { + __delete(m_energyHistogram); + } - GridBuffer< - EnergyHistogram, - simDim - > & - getGridBuffer( ) - { - return *m_energyHistogram; - } + static std::string getName(std::string const& speciesGroup) + { + return speciesGroup + "_LocalEnergyHistogram"; + } - /* implement ISimulationData members */ - void - synchronize() override - { - m_energyHistogram->deviceToHost( ); - } + std::string getName() + { + return getName(m_speciesGroup); + } - SimulationDataId - getUniqueId() override - { - return getName(); - } - }; + GridBuffer& getGridBuffer() + { + return *m_energyHistogram; + } + + /* implement ISimulationData members */ + void synchronize() override + { + m_energyHistogram->deviceToHost(); + } + + SimulationDataId getUniqueId() override + { + return getName(); + } + }; -} // namespace helperFields -} // namespace flylite -} // namespace particles + } // namespace helperFields + } // namespace flylite + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogram.kernel b/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogram.kernel index c2b237c961..bfe8754d6d 100644 --- a/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogram.kernel +++ b/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogram.kernel @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl, Rene Widera +/* Copyright 2017-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -35,253 +35,178 @@ namespace picongpu { -namespace particles -{ -namespace flylite -{ -namespace helperFields -{ - /** Generate and add a local energy histogram - * - * Generate a (per-supercell) energy histogram and add it to global memory. - * - * @tparam T_numWorkers number of workers for lockstep execution per block, - * usually equal to the number of particles per frame - * (which is equal to the supercell size) - */ - template< uint32_t T_numWorkers > - struct KernelAddLocalEnergyHistogram + namespace particles { - /** Functor - * - * The functor is executed frame-list-wise, meaning locally per - * supercell. All particles of a supercell generate a shared memory - * histogram and write that back into global memory. Particles outside - * of the range of the histogram are ignored and not counted. - * - * @todo In case the local averging in flylite shall be larger then a - * supercell (in multiples of integers), the results need to be merged. - * - * @tparam T_ParBox pmacc::ParticlesBox, particle box type - * @tparam T_LocalEnergyHistogramBox pmacc::DataBox, local energy histograms, - * e.g. for each supercell - * @tparam T_Acc alpaka accelerator type - * - * @param acc alpaka accelerator - * @param pb particles of a species - * @param energyHistogramBox box with global memory for each supercell's histogram - * @param minEnergy minimum energy to account for (eV) - * @param maxEnergy maximum energy to account for (eV) - */ - template< - typename T_ParBox, - typename T_LocalEnergyHistogramBox, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_ParBox & pb, - T_LocalEnergyHistogramBox & energyHistogramBox, - float_X const minEnergy, - float_X const maxEnergy, - T_Mapping const mapper - ) const + namespace flylite { - using picongpu::flylite::spatialAverageBox; - constexpr uint16_t numBins = picongpu::flylite::energies; - constexpr uint32_t numWorkers = T_numWorkers; - - using namespace pmacc::mappings::threads; - using SuperCellSize = typename MappingDesc::SuperCellSize; - using FramePtr = typename T_ParBox::FramePtr; - constexpr uint32_t maxParticlesPerFrame = pmacc::math::CT::volume< SuperCellSize >::type::value; - - PMACC_SMEM( - acc, - frame, - FramePtr - ); - PMACC_SMEM( - acc, - particlesInSuperCell, - lcellId_t - ); - - // our workers per block are started 1D - uint32_t const workerIdx = threadIdx.x; - - // supercell index of current (frame-wise) supercell including GUARD - DataSpace< simDim > const superCellIdx( - mapper.getSuperCellIndex( DataSpace< simDim >( blockIdx ) ) - ); - /* index inside local energy histogram in averaged space (has no GUARD) - * integer division: we average over multiples of supercells; - * this index selects the according local energy - * histogram in global RAM - */ - DataSpace< simDim > const localEnergyBlock = - ( superCellIdx - GuardSize::toRT() ) * - SuperCellSize::toRT() / spatialAverageBox::toRT(); - - /* shift the energyHistogramBox to the local spatial average box and - * get a reference on the histogram - */ - auto & localEnergyHistogram = *energyHistogramBox.shift( localEnergyBlock ); - - // shared memory for local energy histogram - PMACC_SMEM( - acc, - shLocalEnergyHistogram, - memory::Array< - float_X, - numBins - > - ); - - using MasterOnly = IdxConfig< - 1, - numWorkers - >; - - // get frame lists of this supercell - ForEachIdx< MasterOnly >{ workerIdx }( - [&]( - uint32_t const, - uint32_t const - ) - { - frame = pb.getLastFrame( superCellIdx ); - particlesInSuperCell = pb.getSuperCell( superCellIdx ).getSizeLastFrame( ); - } - ); - - // empty the histogram to contain only zeroes - ForEachIdx< - IdxConfig< - numWorkers, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - /* set all bins to 0 */ - for( int i = linearIdx; i < numBins; i += numWorkers ) - shLocalEnergyHistogram[ i ] = float_X( 0. ); - } - ); - - __syncthreads(); - - // return if the supercell has no particles - if( !frame.isValid( ) ) - return; - - // iterate the frame list - while( frame.isValid() ) + namespace helperFields { - // move over all particles in a frame - ForEachIdx< - IdxConfig< - maxParticlesPerFrame, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) + /** Generate and add a local energy histogram + * + * Generate a (per-supercell) energy histogram and add it to global memory. + * + * @tparam T_numWorkers number of workers for lockstep execution per block, + * usually equal to the number of particles per frame + * (which is equal to the supercell size) + */ + template + struct KernelAddLocalEnergyHistogram + { + /** Functor + * + * The functor is executed frame-list-wise, meaning locally per + * supercell. All particles of a supercell generate a shared memory + * histogram and write that back into global memory. Particles outside + * of the range of the histogram are ignored and not counted. + * + * @todo In case the local averging in flylite shall be larger then a + * supercell (in multiples of integers), the results need to be merged. + * + * @tparam T_ParBox pmacc::ParticlesBox, particle box type + * @tparam T_LocalEnergyHistogramBox pmacc::DataBox, local energy histograms, + * e.g. for each supercell + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param pb particles of a species + * @param energyHistogramBox box with global memory for each supercell's histogram + * @param minEnergy minimum energy to account for (eV) + * @param maxEnergy maximum energy to account for (eV) + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_ParBox& pb, + T_LocalEnergyHistogramBox& energyHistogramBox, + float_X const minEnergy, + float_X const maxEnergy, + T_Mapping const mapper) const { - if( linearIdx < particlesInSuperCell ) + using picongpu::flylite::spatialAverageBox; + constexpr uint16_t numBins = picongpu::flylite::energies; + constexpr uint32_t numWorkers = T_numWorkers; + + using namespace pmacc::mappings::threads; + using SuperCellSize = typename MappingDesc::SuperCellSize; + using FramePtr = typename T_ParBox::FramePtr; + constexpr uint32_t maxParticlesPerFrame = pmacc::math::CT::volume::type::value; + + PMACC_SMEM(acc, frame, FramePtr); + PMACC_SMEM(acc, particlesInSuperCell, lcellId_t); + + // our workers per block are started 1D + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + // supercell index of current (frame-wise) supercell including GUARD + DataSpace const superCellIdx( + mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + /* index inside local energy histogram in averaged space (has no GUARD) + * integer division: we average over multiples of supercells; + * this index selects the according local energy + * histogram in global RAM + */ + DataSpace const localEnergyBlock + = (superCellIdx - GuardSize::toRT()) * SuperCellSize::toRT() / spatialAverageBox::toRT(); + + /* shift the energyHistogramBox to the local spatial average box and + * get a reference on the histogram + */ + auto& localEnergyHistogram = *energyHistogramBox.shift(localEnergyBlock); + + // shared memory for local energy histogram + PMACC_SMEM(acc, shLocalEnergyHistogram, memory::Array); + + using MasterOnly = IdxConfig<1, numWorkers>; + + // get frame lists of this supercell + ForEachIdx{workerIdx}([&](uint32_t const, uint32_t const) { + frame = pb.getLastFrame(superCellIdx); + particlesInSuperCell = pb.getSuperCell(superCellIdx).getSizeLastFrame(); + }); + + // empty the histogram to contain only zeroes + ForEachIdx>{workerIdx}( + [&](uint32_t const linearIdx, uint32_t const) { + /* set all bins to 0 */ + for(int i = linearIdx; i < numBins; i += numWorkers) + shLocalEnergyHistogram[i] = float_X(0.); + }); + + cupla::__syncthreads(acc); + + // return if the supercell has no particles + if(!frame.isValid()) + return; + + // iterate the frame list + while(frame.isValid()) { - auto const particle = frame[ linearIdx ]; - /* kinetic Energy for Particles: E^2 = p^2*c^2 + m^2*c^4 - * = c^2 * [p^2 + m^2*c^2] - */ - float3_X const mom = particle[ momentum_ ]; - - float_X const weighting = particle[ weighting_ ]; - float_X const mass = attribute::getMass( - weighting, - particle - ); - - // calculate kinetic energy of the macro particle - float_X particleEnergy = KinEnergy< >( )( - mom, - mass - ); - - particleEnergy /= weighting; - - // calculate bin number - int binNumber = math::floor( - ( particleEnergy - minEnergy ) / - ( maxEnergy - minEnergy ) * static_cast< float_X >( numBins ) - ); - - /* all entries larger than maxEnergy or smaller - * than minEnergy are ignored - */ - if( binNumber >= 0 and binNumber < numBins ) - { - // artifical norm for reduce - float_X const normedWeighting = weighting / - float_X( particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE ); - - atomicAdd( - &( shLocalEnergyHistogram[ binNumber ] ), - normedWeighting, - ::alpaka::hierarchy::Threads{} - ); - } + // move over all particles in a frame + ForEachIdx>{ + workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + if(linearIdx < particlesInSuperCell) + { + auto const particle = frame[linearIdx]; + /* kinetic Energy for Particles: E^2 = p^2*c^2 + m^2*c^4 + * = c^2 * [p^2 + m^2*c^2] + */ + float3_X const mom = particle[momentum_]; + + float_X const weighting = particle[weighting_]; + float_X const mass = attribute::getMass(weighting, particle); + + // calculate kinetic energy of the macro particle + float_X particleEnergy = KinEnergy<>()(mom, mass); + + particleEnergy /= weighting; + + // calculate bin number + int binNumber = math::floor( + (particleEnergy - minEnergy) / (maxEnergy - minEnergy) + * static_cast(numBins)); + + /* all entries larger than maxEnergy or smaller + * than minEnergy are ignored + */ + if(binNumber >= 0 and binNumber < numBins) + { + // artifical norm for reduce + float_X const normedWeighting + = weighting / float_X(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE); + + cupla::atomicAdd( + acc, + &(shLocalEnergyHistogram[binNumber]), + normedWeighting, + ::alpaka::hierarchy::Threads{}); + } + } + }); + + cupla::__syncthreads(acc); + + // go to next frame + ForEachIdx{workerIdx}([&](uint32_t const, uint32_t const) { + frame = pb.getPreviousFrame(frame); + particlesInSuperCell = maxParticlesPerFrame; + }); + cupla::__syncthreads(acc); } - } - ); - - __syncthreads(); - // go to next frame - ForEachIdx< MasterOnly >{ workerIdx }( - [&]( - uint32_t const, - uint32_t const - ) - { - frame = pb.getPreviousFrame( frame ); - particlesInSuperCell = maxParticlesPerFrame; + // write histogram back to global memory (add) + ForEachIdx>{workerIdx}( + [&](uint32_t const linearIdx, uint32_t const) { + for(int i = linearIdx; i < numBins; i += numWorkers) + cupla::atomicAdd( + acc, + &(localEnergyHistogram[i]), + shLocalEnergyHistogram[i], + ::alpaka::hierarchy::Blocks{}); + }); } - ); - __syncthreads(); - } - - // write histogram back to global memory (add) - ForEachIdx< - IdxConfig< - numWorkers, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - for( int i = linearIdx; i < numBins; i += numWorkers ) - atomicAdd( - &( localEnergyHistogram[ i ] ), - shLocalEnergyHistogram[ i ], - ::alpaka::hierarchy::Blocks{} - ); - } - ); - } - }; + }; -} // namespace helperFields -} // namespace flylite -} // namespace particles + } // namespace helperFields + } // namespace flylite + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogramFunctors.hpp b/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogramFunctors.hpp index 08fe303aee..d5c55d6dc6 100644 --- a/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogramFunctors.hpp +++ b/include/picongpu/particles/flylite/helperFields/LocalEnergyHistogramFunctors.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -34,135 +34,122 @@ namespace picongpu { -namespace particles -{ -namespace flylite -{ -namespace helperFields -{ -namespace detail -{ - /** Takes a single species and adds it to a LocalEnergyHistogram - * - * @tparam T_SpeciesType a picongpu::Particles class with a particle species - */ - template< - typename T_SpeciesType - > - struct AddSingleEnergyHistogram - { - using SpeciesType = T_SpeciesType; - using FrameType = typename SpeciesType::FrameType; - - /** Functor - * - * @param currentStep the current time step - * @param eneHistLocal the GridBuffer for local energy histograms - * @param minEnergy minimum energy to account for (eV) - * @param maxEnergy maximum energy to account for (eV) - */ - void operator()( - uint32_t currentStep, - std::shared_ptr< LocalEnergyHistogram > & eneHistLocal, - float_X const minEnergy, - float_X const maxEnergy - ) - { - DataConnector &dc = Environment<>::get().DataConnector(); - - // load particle without copy particle data to host - auto speciesTmp = dc.get< SpeciesType >( FrameType::getName(), true ); - - // mapper to access species in CORE & BORDER only - MappingDesc cellDescription( - speciesTmp->getParticlesBuffer().getSuperCellsLayout().getDataSpace() * SuperCellSize::toRT(), - GuardSize::toRT() - ); - AreaMapping< - CORE + BORDER, - MappingDesc - > mapper( cellDescription ); - - // add energy histogram on top of existing data - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - PMACC_KERNEL( helperFields::KernelAddLocalEnergyHistogram< numWorkers >{ } ) - ( - // one block per local energy histogram - mapper.getGridDim(), - numWorkers - ) - ( - // start in border (jump over GUARD area) - speciesTmp->getDeviceParticlesBox(), - // start in border (has no GUARD area) - eneHistLocal->getGridBuffer().getDeviceBuffer( ).getDataBox( ), - minEnergy, - maxEnergy, - mapper - ); - - dc.releaseData( FrameType::getName() ); - } - }; -} - /** Add a group of species to a local energy histogram - * - * Takes a list of species and fills the LocalEnergyHistogram with it. - * Ideally executed for a list of electron species or an photon species. - * - * @tparam T_SpeciesList sequence of picongpu::Particles to create a - * local energy histogram from - */ - template< - typename T_SpeciesList - > - struct FillLocalEnergyHistogram + namespace particles { - using SpeciesList = T_SpeciesList; - - /** Functor - * - * @param currentStep the current time step - * @param speciesGroup naming for the group of species in T_SpeciesList - * @param minEnergy minimum energy to account for (eV) - * @param maxEnergy maximum energy to account for (eV) - */ - void operator()( - uint32_t currentStep, - std::string const & speciesGroup, - float_X const minEnergy, - float_X const maxEnergy - ) + namespace flylite { - DataConnector &dc = Environment<>::get().DataConnector(); - - /* load local energy histogram field without copy data to host and - * zero it - */ - auto eneHistLocal = dc.get< LocalEnergyHistogram >( - helperFields::LocalEnergyHistogram::getName( speciesGroup ), - true - ); - - // reset local energy histograms - eneHistLocal->getGridBuffer().getDeviceBuffer().setValue( float_X( 0.0 ) ); - - // add local energy histogram of each species in list - meta::ForEach< SpeciesList, detail::AddSingleEnergyHistogram< bmpl::_1 > > addSingleEnergyHistogram; - addSingleEnergyHistogram( currentStep, eneHistLocal, minEnergy, maxEnergy ); - - /* note: for average != supercell the BORDER region would need to be - * build up via communication accordingly - */ - - // release fields - dc.releaseData( helperFields::LocalEnergyHistogram::getName( speciesGroup ) ); - } - }; - -} // namespace helperFields -} // namespace flylite -} // namespace particles + namespace helperFields + { + namespace detail + { + /** Takes a single species and adds it to a LocalEnergyHistogram + * + * @tparam T_SpeciesType a picongpu::Particles class with a particle species + */ + template + struct AddSingleEnergyHistogram + { + using SpeciesType = T_SpeciesType; + using FrameType = typename SpeciesType::FrameType; + + /** Functor + * + * @param currentStep the current time step + * @param eneHistLocal the GridBuffer for local energy histograms + * @param minEnergy minimum energy to account for (eV) + * @param maxEnergy maximum energy to account for (eV) + */ + void operator()( + uint32_t currentStep, + std::shared_ptr& eneHistLocal, + float_X const minEnergy, + float_X const maxEnergy) + { + DataConnector& dc = Environment<>::get().DataConnector(); + + // load particle without copy particle data to host + auto speciesTmp = dc.get(FrameType::getName(), true); + + // mapper to access species in CORE & BORDER only + MappingDesc cellDescription( + speciesTmp->getParticlesBuffer().getSuperCellsLayout().getDataSpace() + * SuperCellSize::toRT(), + GuardSize::toRT()); + AreaMapping mapper(cellDescription); + + // add energy histogram on top of existing data + constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< + pmacc::math::CT::volume::type::value>::value; + PMACC_KERNEL(helperFields::KernelAddLocalEnergyHistogram{}) + ( + // one block per local energy histogram + mapper.getGridDim(), + numWorkers)( + // start in border (jump over GUARD area) + speciesTmp->getDeviceParticlesBox(), + // start in border (has no GUARD area) + eneHistLocal->getGridBuffer().getDeviceBuffer().getDataBox(), + minEnergy, + maxEnergy, + mapper); + + dc.releaseData(FrameType::getName()); + } + }; + } // namespace detail + /** Add a group of species to a local energy histogram + * + * Takes a list of species and fills the LocalEnergyHistogram with it. + * Ideally executed for a list of electron species or an photon species. + * + * @tparam T_SpeciesList sequence of picongpu::Particles to create a + * local energy histogram from + */ + template + struct FillLocalEnergyHistogram + { + using SpeciesList = T_SpeciesList; + + /** Functor + * + * @param currentStep the current time step + * @param speciesGroup naming for the group of species in T_SpeciesList + * @param minEnergy minimum energy to account for (eV) + * @param maxEnergy maximum energy to account for (eV) + */ + void operator()( + uint32_t currentStep, + std::string const& speciesGroup, + float_X const minEnergy, + float_X const maxEnergy) + { + DataConnector& dc = Environment<>::get().DataConnector(); + + /* load local energy histogram field without copy data to host and + * zero it + */ + auto eneHistLocal = dc.get( + helperFields::LocalEnergyHistogram::getName(speciesGroup), + true); + + // reset local energy histograms + eneHistLocal->getGridBuffer().getDeviceBuffer().setValue(float_X(0.0)); + + // add local energy histogram of each species in list + meta::ForEach> + addSingleEnergyHistogram; + addSingleEnergyHistogram(currentStep, eneHistLocal, minEnergy, maxEnergy); + + /* note: for average != supercell the BORDER region would need to be + * build up via communication accordingly + */ + + // release fields + dc.releaseData(helperFields::LocalEnergyHistogram::getName(speciesGroup)); + } + }; + + } // namespace helperFields + } // namespace flylite + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/helperFields/LocalRateMatrix.hpp b/include/picongpu/particles/flylite/helperFields/LocalRateMatrix.hpp index d2baff549b..e1bb20098c 100644 --- a/include/picongpu/particles/flylite/helperFields/LocalRateMatrix.hpp +++ b/include/picongpu/particles/flylite/helperFields/LocalRateMatrix.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -32,86 +32,71 @@ namespace picongpu { -namespace particles -{ -namespace flylite -{ -namespace helperFields -{ - using namespace pmacc; - - class LocalRateMatrix : - public ISimulationData + namespace particles { - private: - /** A[iz, numpop, numpop] */ - using RateMatrix = memory::Array< - memory::Array< - memory::Array< - float_X, - picongpu::flylite::populations - >, - picongpu::flylite::populations - >, - picongpu::flylite::ionizationStates - >; - GridBuffer< RateMatrix, simDim >* m_rateMatrix; - std::string m_speciesName; - - public: - /** Allocate and initialize local rate matrix for ion state transitions - * - * @param histSizeLocal spatial size of the local energy histogram - */ - LocalRateMatrix( - std::string const & ionSpeciesName, - DataSpace< simDim > const & histSizeLocal - ) : - m_rateMatrix( nullptr ), - m_speciesName( ionSpeciesName ) + namespace flylite { - m_rateMatrix = - new GridBuffer< RateMatrix, simDim >( histSizeLocal ); - } + namespace helperFields + { + using namespace pmacc; - ~LocalRateMatrix() - { - __delete( m_rateMatrix ); - } + class LocalRateMatrix : public ISimulationData + { + private: + /** A[iz, numpop, numpop] */ + using RateMatrix = memory::Array< + memory::Array< + memory::Array, + picongpu::flylite::populations>, + picongpu::flylite::ionizationStates>; + GridBuffer* m_rateMatrix; + std::string m_speciesName; - static std::string - getName( std::string const & speciesGroup ) - { - return speciesGroup + "_RateMatrix"; - } + public: + /** Allocate and initialize local rate matrix for ion state transitions + * + * @param histSizeLocal spatial size of the local energy histogram + */ + LocalRateMatrix(std::string const& ionSpeciesName, DataSpace const& histSizeLocal) + : m_rateMatrix(nullptr) + , m_speciesName(ionSpeciesName) + { + m_rateMatrix = new GridBuffer(histSizeLocal); + } - std::string - getName( ) - { - return getName( m_speciesName ); - } + ~LocalRateMatrix() + { + __delete(m_rateMatrix); + } - GridBuffer< RateMatrix, simDim >& - getGridBuffer( ) - { - return *m_rateMatrix; - } + static std::string getName(std::string const& speciesGroup) + { + return speciesGroup + "_RateMatrix"; + } - /* implement ISimulationData members */ - void - synchronize() override - { - m_rateMatrix->deviceToHost( ); - } + std::string getName() + { + return getName(m_speciesName); + } - SimulationDataId - getUniqueId() override - { - return getName(); - } - }; + GridBuffer& getGridBuffer() + { + return *m_rateMatrix; + } + + /* implement ISimulationData members */ + void synchronize() override + { + m_rateMatrix->deviceToHost(); + } + + SimulationDataId getUniqueId() override + { + return getName(); + } + }; -} // namespace helperFields -} // namespace flylite -} // namespace particles + } // namespace helperFields + } // namespace flylite + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/flylite/types/Superconfig.hpp b/include/picongpu/particles/flylite/types/Superconfig.hpp index 5c99c1f906..e6925f3053 100644 --- a/include/picongpu/particles/flylite/types/Superconfig.hpp +++ b/include/picongpu/particles/flylite/types/Superconfig.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -25,34 +25,28 @@ namespace picongpu { -namespace flylite -{ -namespace types -{ - /** Ion Superconfiguration - * - * This is the attribute type for an ion's screened hydrogenic - * superconfiguration. - * - * See for details on screened hydrogenic levels: - * H.-K. Chung, S.H. Hansen, H.A. Scott. - * *Generalized Collisional Radiative Model Using* - * *Screened Hydrogenic Levels*, - * in Modern Methods in Collisional-Radiative Modeling of Plasmas, - * edited by Y. Ralchenko (Springer, 2016) pp.51-79 - * - * @tparam T_Type the float type to use, e.g. float_64 - * @tparam T_populations the number of populations to store for each ion, - * range: [0, 255] - */ - template< - typename T_Type, - uint8_t T_populations - > - using Superconfig = pmacc::math::Vector< - T_Type, - T_populations - >; -} // namespace types -} // namespace flylite + namespace flylite + { + namespace types + { + /** Ion Superconfiguration + * + * This is the attribute type for an ion's screened hydrogenic + * superconfiguration. + * + * See for details on screened hydrogenic levels: + * H.-K. Chung, S.H. Hansen, H.A. Scott. + * *Generalized Collisional Radiative Model Using* + * *Screened Hydrogenic Levels*, + * in Modern Methods in Collisional-Radiative Modeling of Plasmas, + * edited by Y. Ralchenko (Springer, 2016) pp.51-79 + * + * @tparam T_Type the float type to use, e.g. float_64 + * @tparam T_populations the number of populations to store for each ion, + * range: [0, 255] + */ + template + using Superconfig = pmacc::math::Vector; + } // namespace types + } // namespace flylite } // namespace picongpu diff --git a/include/picongpu/particles/functor/User.def b/include/picongpu/particles/functor/User.def index 077f0112da..ba98aa1904 100644 --- a/include/picongpu/particles/functor/User.def +++ b/include/picongpu/particles/functor/User.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,20 +22,19 @@ namespace picongpu { -namespace particles -{ -namespace functor -{ - - /** call simple free user defined functor - * - * @tparam T_Functor user defined functor - * **optional**: can implement **one** host side constructor - * `T_Functor()` or `T_Functor(uint32_t currentTimeStep)` - */ - template< typename T_Functor > - struct User; + namespace particles + { + namespace functor + { + /** call simple free user defined functor + * + * @tparam T_Functor user defined functor + * **optional**: can implement **one** host side constructor + * `T_Functor()` or `T_Functor(uint32_t currentTimeStep)` + */ + template + struct User; -} // namespace functor -} // namespace particles + } // namespace functor + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/functor/User.hpp b/include/picongpu/particles/functor/User.hpp index eb9644eb86..3bc05b9902 100644 --- a/include/picongpu/particles/functor/User.hpp +++ b/include/picongpu/particles/functor/User.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Axel Huebl +/* Copyright 2013-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -27,58 +27,48 @@ namespace picongpu { -namespace particles -{ -namespace functor -{ - template< typename T_Functor > - struct User : public T_Functor + namespace particles { - - using Functor = T_Functor; - - /** constructor - * - * This constructor is only compiled if the user functor has - * a host side constructor with one (uint32_t) argument. - * - * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable - * the constructor - * @param currentStep current simulation time step - * @param is used to enable/disable the constructor (do not pass any value to this parameter) - */ - template< typename DeferFunctor = Functor > - HINLINE User( - uint32_t currentStep, - typename std::enable_if< - std::is_constructible< - DeferFunctor, - uint32_t - >::value - >::type* = 0 - ) : Functor( currentStep ) + namespace functor { - } + template + struct User : public T_Functor + { + using Functor = T_Functor; - /** constructor - * - * This constructor is only compiled if the user functor has a default constructor. - * - * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable - * the constructor - * @param current simulation time step - * @param is used to enable/disable the constructor (do not pass any value to this parameter) - */ - template< typename DeferFunctor = Functor > - HINLINE User( - uint32_t, - typename std::enable_if< - std::is_constructible< DeferFunctor >::value - >::type* = 0 - ) : Functor( ) - { - } - }; -} // namespace functor -} // namespace particles + /** constructor + * + * This constructor is only compiled if the user functor has + * a host side constructor with one (uint32_t) argument. + * + * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable + * the constructor + * @param currentStep current simulation time step + * @param is used to enable/disable the constructor (do not pass any value to this parameter) + */ + template + HINLINE User( + uint32_t currentStep, + typename std::enable_if::value>::type* = 0) + : Functor(currentStep) + { + } + + /** constructor + * + * This constructor is only compiled if the user functor has a default constructor. + * + * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable + * the constructor + * @param current simulation time step + * @param is used to enable/disable the constructor (do not pass any value to this parameter) + */ + template + HINLINE User(uint32_t, typename std::enable_if::value>::type* = 0) + : Functor() + { + } + }; + } // namespace functor + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/functor/functor.def b/include/picongpu/particles/functor/functor.def index 3677d5efab..827631ecc9 100644 --- a/include/picongpu/particles/functor/functor.def +++ b/include/picongpu/particles/functor/functor.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Axel Huebl +/* Copyright 2014-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/functor/functor.hpp b/include/picongpu/particles/functor/functor.hpp index bb183e1cbe..ccdd2c2d7a 100644 --- a/include/picongpu/particles/functor/functor.hpp +++ b/include/picongpu/particles/functor/functor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Axel Huebl +/* Copyright 2014-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/functor/misc/Rng.def b/include/picongpu/particles/functor/misc/Rng.def index 707cf2b175..6e47b82e7f 100644 --- a/include/picongpu/particles/functor/misc/Rng.def +++ b/include/picongpu/particles/functor/misc/Rng.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,23 +27,20 @@ namespace picongpu { -namespace particles -{ -namespace functor -{ -namespace misc -{ - - /** provide a random number generator - * - * @tparam T_Distribution pmacc::random::distributions, random number distribution - */ - template< - typename T_Distribution - > - struct Rng; + namespace particles + { + namespace functor + { + namespace misc + { + /** provide a random number generator + * + * @tparam T_Distribution pmacc::random::distributions, random number distribution + */ + template + struct Rng; -} // namespace misc -} // namespace functor -} // namespace particles + } // namespace misc + } // namespace functor + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/functor/misc/Rng.hpp b/include/picongpu/particles/functor/misc/Rng.hpp index c141fb1ddc..b820036137 100644 --- a/include/picongpu/particles/functor/misc/Rng.hpp +++ b/include/picongpu/particles/functor/misc/Rng.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Alexander Grund +/* Copyright 2015-2021 Rene Widera, Alexander Grund * * This file is part of PIConGPU. * @@ -22,8 +22,6 @@ #include "picongpu/simulation_defines.hpp" #include "picongpu/particles/functor/misc/RngWrapper.hpp" -#include -#include #include #include #include @@ -36,81 +34,61 @@ namespace picongpu { -namespace particles -{ -namespace functor -{ -namespace misc -{ - /** call simple free user defined functor and provide a random number generator - * - * @tparam T_Distribution random number distribution - */ - template< - typename T_Distribution - > - struct Rng + namespace particles { - using Distribution = T_Distribution; - using RNGFactory = pmacc::random::RNGProvider< - simDim, - random::Generator - >; - using RngHandle = typename RNGFactory::Handle; - using RandomGen = RngWrapper< - cupla::Acc, - typename RngHandle::GetRandomType< Distribution >::type - >; - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE Rng( uint32_t currentStep ) : rngHandle( RNGFactory::createHandle() ) + namespace functor { - } + namespace misc + { + /** call simple free user defined functor and provide a random number generator + * + * @tparam T_Distribution random number distribution + */ + template + struct Rng + { + using Distribution = T_Distribution; + using RNGFactory = pmacc::random::RNGProvider; + using RngHandle = typename RNGFactory::Handle; + using RandomGen = RngWrapper::type>; + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE Rng(uint32_t currentStep) : rngHandle(RNGFactory::createHandle()) + { + } - /** create functor a random number generator - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset offset (in superCells, without any guards) relative - * to the origin of the local domain - * @param workerCfg configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE - RandomGen - operator()( - T_Acc const & acc, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & workerCfg - ) const - { - namespace nvrng = nvidia::rng; - RngHandle tmp( rngHandle ); - tmp.init( - localSupercellOffset * SuperCellSize::toRT() + - DataSpaceOperations< simDim >::template map< SuperCellSize >( workerCfg.getWorkerIdx( ) ) - ); - return RandomGen( - acc, - tmp.applyDistribution< Distribution >() - ); - } + /** create functor a random number generator + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset offset (in superCells, without any guards) relative + * to the origin of the local domain + * @param workerCfg configuration of the worker + */ + template + HDINLINE RandomGen operator()( + T_Acc const& acc, + DataSpace const& localSupercellOffset, + T_WorkerCfg const& workerCfg) const + { + RngHandle tmp(rngHandle); + tmp.init( + localSupercellOffset * SuperCellSize::toRT() + + DataSpaceOperations::template map(workerCfg.getWorkerIdx())); + return RandomGen(acc, tmp.applyDistribution()); + } - private: - RngHandle rngHandle; - }; + private: + RngHandle rngHandle; + }; -} // namepsace misc -} // namespace functor -} // namespace particles + } // namespace misc + } // namespace functor + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/functor/misc/RngWrapper.hpp b/include/picongpu/particles/functor/misc/RngWrapper.hpp index 727dd1a01f..13bdf47964 100644 --- a/include/picongpu/particles/functor/misc/RngWrapper.hpp +++ b/include/picongpu/particles/functor/misc/RngWrapper.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -26,49 +26,45 @@ namespace picongpu { -namespace particles -{ -namespace functor -{ -namespace misc -{ - - /** wraps an random number generator together with an alpaka accelerator - * - * This class allows to generate random numbers without passing the accelerator - * to each functor call. - * - * @tparam T_Acc type of the alpaka accelerator - * @tparam T_Rng type of the random number generator - */ - template< - typename T_Acc, - typename T_Rng - > - struct RngWrapper + namespace particles { - DINLINE RngWrapper( - T_Acc const & acc, - T_Rng const & rng + namespace functor + { + namespace misc + { + /** wraps an random number generator together with an alpaka accelerator + * + * This class allows to generate random numbers without passing the accelerator + * to each functor call. + * + * @tparam T_Acc type of the alpaka accelerator + * @tparam T_Rng type of the random number generator + */ + template + struct RngWrapper + { + DINLINE RngWrapper( + T_Acc const& acc, + T_Rng const& rng - ) : - m_acc( &acc ), - m_rng( rng ) - { } + ) + : m_acc(&acc) + , m_rng(rng) + { + } - //! generate a random number - DINLINE - typename T_Rng::result_type - operator()() - { - return m_rng( *m_acc ); - } + //! generate a random number + DINLINE + typename T_Rng::result_type operator()() + { + return m_rng(*m_acc); + } - T_Acc const * m_acc; - mutable T_Rng m_rng; - }; + T_Acc const* m_acc; + mutable T_Rng m_rng; + }; -} // namepsace misc -} // namespace functor -} // namespace particles + } // namespace misc + } // namespace functor + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/functor/misc/TotalCellOffset.def b/include/picongpu/particles/functor/misc/TotalCellOffset.def index f4fac65b49..d3e46da377 100644 --- a/include/picongpu/particles/functor/misc/TotalCellOffset.def +++ b/include/picongpu/particles/functor/misc/TotalCellOffset.def @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,15 +22,15 @@ namespace picongpu { -namespace particles -{ -namespace functor -{ -namespace misc -{ - //! Provide the cell offset of a supercell to the total domain origin - struct TotalCellOffset; -} // namespace misc -} // namespace functor -} // namespace particles + namespace particles + { + namespace functor + { + namespace misc + { + //! Provide the cell offset of a supercell to the total domain origin + struct TotalCellOffset; + } // namespace misc + } // namespace functor + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/functor/misc/TotalCellOffset.hpp b/include/picongpu/particles/functor/misc/TotalCellOffset.hpp index 8d5a8edc81..33ba9e725c 100644 --- a/include/picongpu/particles/functor/misc/TotalCellOffset.hpp +++ b/include/picongpu/particles/functor/misc/TotalCellOffset.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -25,63 +25,55 @@ namespace picongpu { -namespace particles -{ -namespace functor -{ -namespace misc -{ - struct TotalCellOffset + namespace particles { - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE TotalCellOffset( uint32_t currentStep ) - { - uint32_t const numSlides = MovingWindow::getInstance( ).getSlideCounter( currentStep ); - SubGrid< simDim > const & subGrid = Environment< simDim >::get( ).SubGrid( ); - DataSpace< simDim > const localCells = subGrid.getLocalDomain( ).size; - gpuCellOffsetToTotalOrigin = subGrid.getLocalDomain( ).offset; - gpuCellOffsetToTotalOrigin.y( ) += numSlides * localCells.y( ); - } - - /** get cell offset of the supercell - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param offset (in supercells, without any guards) to the - * origin of the local domain - * @param configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE DataSpace< simDim > - operator()( - T_Acc const & acc, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & - ) const + namespace functor { - DataSpace< simDim > const superCellToLocalOriginCellOffset( - localSupercellOffset * SuperCellSize::toRT( ) - ); + namespace misc + { + struct TotalCellOffset + { + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE TotalCellOffset(uint32_t currentStep) + { + uint32_t const numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + SubGrid const& subGrid = Environment::get().SubGrid(); + DataSpace const localCells = subGrid.getLocalDomain().size; + gpuCellOffsetToTotalOrigin = subGrid.getLocalDomain().offset; + gpuCellOffsetToTotalOrigin.y() += numSlides * localCells.y(); + } - return gpuCellOffsetToTotalOrigin + superCellToLocalOriginCellOffset; - } + /** get cell offset of the supercell + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param offset (in supercells, without any guards) to the + * origin of the local domain + * @param configuration of the worker + */ + template + HDINLINE DataSpace operator()( + T_Acc const& acc, + DataSpace const& localSupercellOffset, + T_WorkerCfg const&) const + { + DataSpace const superCellToLocalOriginCellOffset( + localSupercellOffset * SuperCellSize::toRT()); - private: + return gpuCellOffsetToTotalOrigin + superCellToLocalOriginCellOffset; + } - //! offset in cells to the total domain origin - DataSpace< simDim > gpuCellOffsetToTotalOrigin; - }; + private: + //! offset in cells to the total domain origin + DataSpace gpuCellOffsetToTotalOrigin; + }; -} // namespace misc -} // namespace functor -} // namespace particles + } // namespace misc + } // namespace functor + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/interpolationMemoryPolicy/ShiftToValidRange.hpp b/include/picongpu/particles/interpolationMemoryPolicy/ShiftToValidRange.hpp index b59791d6e8..d28b6dcb47 100644 --- a/include/picongpu/particles/interpolationMemoryPolicy/ShiftToValidRange.hpp +++ b/include/picongpu/particles/interpolationMemoryPolicy/ShiftToValidRange.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Richard Pausch +/* Copyright 2016-2021 Richard Pausch * * This file is part of PIConGPU. * @@ -18,39 +18,35 @@ */ - namespace picongpu { -namespace particles -{ - -namespace interpolationMemoryPolicy -{ -/** Shift position to valid range [0,1) - * and repositions memory accordingly. - * This is necessary if a particle moves - * outside of its cell during a sub-stepping cycle - * Returns: shifted position and shifted memory. */ -struct ShiftToValidRange -{ - template< typename T_MemoryType, typename T_PosType > - HDINLINE - T_MemoryType memory( const T_MemoryType& mem, const T_PosType& pos ) const + namespace particles { - const T_PosType pos_floor = math::floor(pos); - return mem( precisionCast(pos_floor) ); - } + namespace interpolationMemoryPolicy + { + /** Shift position to valid range [0,1) + * and repositions memory accordingly. + * This is necessary if a particle moves + * outside of its cell during a sub-stepping cycle + * Returns: shifted position and shifted memory. */ + struct ShiftToValidRange + { + template + HDINLINE T_MemoryType memory(const T_MemoryType& mem, const T_PosType& pos) const + { + const T_PosType pos_floor = math::floor(pos); + return mem(precisionCast(pos_floor)); + } - template< typename T_PosType > - HDINLINE - T_PosType position( const T_PosType& pos ) const - { - const T_PosType pos_floor = math::floor(pos); - return pos - pos_floor; - } -}; + template + HDINLINE T_PosType position(const T_PosType& pos) const + { + const T_PosType pos_floor = math::floor(pos); + return pos - pos_floor; + } + }; -} // namespace interpolationMemoryShift + } // namespace interpolationMemoryPolicy -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/None/AlgorithmNone.hpp b/include/picongpu/particles/ionization/None/AlgorithmNone.hpp index f50d82735a..8755803ec4 100644 --- a/include/picongpu/particles/ionization/None/AlgorithmNone.hpp +++ b/include/picongpu/particles/ionization/None/AlgorithmNone.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Marco Garten +/* Copyright 2014-2021 Marco Garten * * This file is part of PIConGPU. * @@ -31,36 +31,32 @@ namespace picongpu { -namespace particles -{ -namespace ionization -{ - - /** \struct AlgorithmNone - * - * \brief ionization algorithm that does nothing - */ - struct AlgorithmNone + namespace particles { - - /** Functor implementation - * - * \tparam EType type of electric field - * \tparam BType type of magnetic field - * \tparam ParticleType type of particle to be ionized - * - * \param bField magnetic field value at t=0 - * \param eField electric field value at t=0 - * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 - */ - template - HDINLINE void - operator()( const BType bField, const EType eField, ParticleType& parentIon ) + namespace ionization { - - } - }; - -} // namespace ionization -} // namespace particles + /** \struct AlgorithmNone + * + * \brief ionization algorithm that does nothing + */ + struct AlgorithmNone + { + /** Functor implementation + * + * \tparam EType type of electric field + * \tparam BType type of magnetic field + * \tparam ParticleType type of particle to be ionized + * + * \param bField magnetic field value at t=0 + * \param eField electric field value at t=0 + * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 + */ + template + HDINLINE void operator()(const BType bField, const EType eField, ParticleType& parentIon) + { + } + }; + + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byCollision/ThomasFermi/AlgorithmThomasFermi.hpp b/include/picongpu/particles/ionization/byCollision/ThomasFermi/AlgorithmThomasFermi.hpp index 324b2354ee..dc90fde0b3 100644 --- a/include/picongpu/particles/ionization/byCollision/ThomasFermi/AlgorithmThomasFermi.hpp +++ b/include/picongpu/particles/ionization/byCollision/ThomasFermi/AlgorithmThomasFermi.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Marco Garten, Axel Huebl +/* Copyright 2016-2021 Marco Garten, Axel Huebl * * This file is part of PIConGPU. * @@ -36,218 +36,216 @@ namespace picongpu { -namespace particles -{ -namespace ionization -{ - - /** AlgorithmThomasFermi - * - * ionization prediction for the Thomas-Fermi ionization model - * - */ - struct AlgorithmThomasFermi + namespace particles { - /** Detailed Balance implementation of the Thomas-Fermi model - * - * This model uses local ion density and "temperature" values as input - * parameters to calculate an average charge state. - * A physical temperature requires a defined equilibrium state. - * Typical high power laser-plasma interaction is highly - * non-equilibrated, though. The name "temperature" is kept to illustrate - * the origination from the Thomas-Fermi model. It is nevertheless - * more accurate to think of it as an averaged kinetic energy - * which is not backed by the model and should therefore only be used with - * a certain suspicion in such Non-LTE scenarios. - * - * @tparam ParticleType type of particle for which to calculate - * an average charge state - * - * @param temperature electron "temperature" value calculated from average - * kinetic electron energy per ion in units of eV - * @param massDensity ion mass density in units of g/cm^3 - * - * @return average charge state prediction according to the Thomas-Fermi model - */ - template< typename ParticleType > - HDINLINE float_X - detailedBalanceThomasFermi( float_X const temperature, float_X const massDensity, ParticleType & parentIon ) + namespace ionization { + /** AlgorithmThomasFermi + * + * ionization prediction for the Thomas-Fermi ionization model + * + */ + struct AlgorithmThomasFermi + { + /** Detailed Balance implementation of the Thomas-Fermi model + * + * This model uses local ion density and "temperature" values as input + * parameters to calculate an average charge state. + * A physical temperature requires a defined equilibrium state. + * Typical high power laser-plasma interaction is highly + * non-equilibrated, though. The name "temperature" is kept to illustrate + * the origination from the Thomas-Fermi model. It is nevertheless + * more accurate to think of it as an averaged kinetic energy + * which is not backed by the model and should therefore only be used with + * a certain suspicion in such Non-LTE scenarios. + * + * @tparam ParticleType type of particle for which to calculate + * an average charge state + * + * @param temperature electron "temperature" value calculated from average + * kinetic electron energy per ion in units of eV + * @param massDensity ion mass density in units of g/cm^3 + * + * @return average charge state prediction according to the Thomas-Fermi model + */ + template + HDINLINE float_X detailedBalanceThomasFermi( + float_X const temperature, + float_X const massDensity, + ParticleType& parentIon) + { + /* @TODO replace the float_64 with float_X and make sure the values are scaled to PIConGPU units */ + constexpr float_64 protonNumber = GetAtomicNumbers::type::numberOfProtons; + constexpr float_64 neutronNumber = GetAtomicNumbers::type::numberOfNeutrons; - /* @TODO replace the float_64 with float_X and make sure the values are scaled to PIConGPU units */ - constexpr float_64 protonNumber = GetAtomicNumbers< ParticleType >::type::numberOfProtons; - constexpr float_64 neutronNumber = GetAtomicNumbers< ParticleType >::type::numberOfNeutrons; - - /* atomic mass number (usually A) A = N + Z */ - constexpr float_64 massNumber = neutronNumber + protonNumber; - - float_64 const T_0 = temperature / math::pow( protonNumber, float_64( 4. / 3. ) ); - - float_64 const T_F = T_0 / ( float_64( 1. ) + T_0 ); - - /* for all the fitting parameters @see ionizer.param */ - - /** this is weird - I have to define temporary variables because - * otherwise the math::pow function won't recognize those at the - * exponent position */ - constexpr float_64 TFA2_temp = thomasFermi::TFA2; - constexpr float_64 TFA4_temp = thomasFermi::TFA4; - constexpr float_64 TFBeta_temp = thomasFermi::TFBeta; - - float_64 const A = thomasFermi::TFA1 * math::pow( T_0, TFA2_temp ) + thomasFermi::TFA3 * math::pow( T_0, TFA4_temp ); - - float_64 const B = -math::exp( thomasFermi::TFB0 + thomasFermi::TFB1 * T_F + thomasFermi::TFB2 * math::pow( T_F, float_64( 7. ) ) ); - - float_64 const C = thomasFermi::TFC1 * T_F + thomasFermi::TFC2; - - constexpr float_64 invAtomicTimesMassNumber = float_64( 1. ) / ( protonNumber * massNumber ); - float_64 const R = massDensity * invAtomicTimesMassNumber; - - float_64 const Q_1 = A * math::pow( R, B ); - - float_64 const Q = math::pow( math::pow( R, C ) + math::pow( Q_1, C ), float_64( 1. ) / C ); + /* atomic mass number (usually A) A = N + Z */ + constexpr float_64 massNumber = neutronNumber + protonNumber; - float_64 const x = thomasFermi::TFAlpha * math::pow( Q, TFBeta_temp ); + float_64 const T_0 = temperature / math::pow(protonNumber, float_64(4. / 3.)); - /* Thomas-Fermi average ionization state */ - float_X const ZStar = static_cast< float_X >( - protonNumber * x / ( - float_64( 1. ) + x + - math::sqrt( float_64( 1. ) + float_64( 2. ) * x ) - ) - ); + float_64 const T_F = T_0 / (float_64(1.) + T_0); - return ZStar; - } + /* for all the fitting parameters @see ionizer.param */ - /** Functor implementation - * - * Calling this functor gives a prediction for an integer number of new - * free macro electrons to create. This prediction is based on the - * average charge state in the Thomas-Fermi model. - * The functor calculates the integer number of bound electrons from - * this state by a Monte-Carlo step. - * - * @tparam ParticleType type of particle to be ionized - * - * @param ZStar average charge state in the Thomas-Fermi model - * @param parentIon particle instance to be ionized - * @param randNr random number - * - * @return numNewFreeMacroElectrons number of new macro electrons to - * create, range: [0, boundElectrons] - */ - template< typename ParticleType > - HDINLINE uint32_t - operator( )( float_X const kinEnergyDensity, float_X const density, ParticleType & parentIon, float_X randNr ) - { + /** this is weird - I have to define temporary variables because + * otherwise the math::pow function won't recognize those at the + * exponent position */ + constexpr float_64 TFA2_temp = thomasFermi::TFA2; + constexpr float_64 TFA4_temp = thomasFermi::TFA4; + constexpr float_64 TFBeta_temp = thomasFermi::TFBeta; - /* initialize functor return value: number of new macro electrons to create */ - uint32_t numNewFreeMacroElectrons = 0u; + float_64 const A = thomasFermi::TFA1 * math::pow(T_0, TFA2_temp) + + thomasFermi::TFA3 * math::pow(T_0, TFA4_temp); - float_64 const densityUnit = static_cast< float_64 >( particleToGrid::derivedAttributes::Density( ).getUnit( )[ 0 ] ); - float_64 const kinEnergyDensityUnit = static_cast< float_64 >( particleToGrid::derivedAttributes::EnergyDensity( ).getUnit( )[ 0 ] ); - /* convert from kinetic energy density to average kinetic energy per particle */ - float_64 const kinEnergyUnit = kinEnergyDensityUnit / densityUnit; - float_64 const avgKinEnergy = kinEnergyDensity / density * kinEnergyUnit; - /** convert kinetic energy in J to "temperature" in eV by assuming an ideal electron gas - * E_kin = 3/2 k*T - */ - constexpr float_64 convKinEnergyToTemperature = UNITCONV_Joule_to_keV * float_64( 1.e3 ) * float_64( 2./3. ); - /** electron "temperature" in electron volts */ - float_64 const temperature = avgKinEnergy * convKinEnergyToTemperature; + float_64 const B = -math::exp( + thomasFermi::TFB0 + thomasFermi::TFB1 * T_F + + thomasFermi::TFB2 * math::pow(T_F, float_64(7.))); - /* conversion factors from number density to mass density */ - constexpr float_64 nAvogadro = SI::N_AVOGADRO; - constexpr float_64 convM3ToCM3 = 1.e6; + float_64 const C = thomasFermi::TFC1 * T_F + thomasFermi::TFC2; - /* @TODO replace the float_64 with float_X and make sure the values are scaled to PIConGPU units */ - constexpr float_64 protonNumber = GetAtomicNumbers::type::numberOfProtons; - constexpr float_64 neutronNumber = GetAtomicNumbers::type::numberOfNeutrons; + constexpr float_64 invAtomicTimesMassNumber = float_64(1.) / (protonNumber * massNumber); + float_64 const R = massDensity * invAtomicTimesMassNumber; - /* atomic mass number (usually A) A = N + Z */ - constexpr float_64 massNumber = neutronNumber + protonNumber; + float_64 const Q_1 = A * math::pow(R, B); - float_64 const convToMassDensity = densityUnit * massNumber / nAvogadro / convM3ToCM3; - /** mass density in units of g/cm^3 */ - float_64 const massDensity = density * convToMassDensity; + float_64 const Q = math::pow(math::pow(R, C) + math::pow(Q_1, C), float_64(1.) / C); - /** lower ion density cutoff - * - * The Thomas-Fermi model yields unphysical artifacts for low densities. - * If `density` is lower than a user-definable ion number density value the model will not be applied. - */ - constexpr float_X lowerDensityCutoff = particles::ionization::thomasFermi::CUTOFF_LOW_DENSITY; - /** lower electron temperature cutoff - * - * The Thomas-Fermi model also yields partly unphysical artifacts for low electron temperatures. - * If `temperature` is lower than a user-definable ion number temperature value the model will not be applied. - */ - constexpr float_X lowerTemperatureCutoff = particles::ionization::thomasFermi::CUTOFF_LOW_TEMPERATURE_EV; + float_64 const x = thomasFermi::TFAlpha * math::pow(Q, TFBeta_temp); - if( - density * densityUnit >= lowerDensityCutoff && - temperature >= lowerTemperatureCutoff - ) - { + /* Thomas-Fermi average ionization state */ + float_X const ZStar = static_cast( + protonNumber * x / (float_64(1.) + x + math::sqrt(float_64(1.) + float_64(2.) * x))); - float_64 const chargeState = attribute::getChargeState( parentIon ); - /* @TODO replace the float_64 with float_X and make sure the values are scaled to PIConGPU units */ - constexpr float_64 protonNumber = GetAtomicNumbers< ParticleType >::type::numberOfProtons; + return ZStar; + } - /* only ionize not-fully ionized ions */ - if( chargeState < protonNumber ) + /** Functor implementation + * + * Calling this functor gives a prediction for an integer number of new + * free macro electrons to create. This prediction is based on the + * average charge state in the Thomas-Fermi model. + * The functor calculates the integer number of bound electrons from + * this state by a Monte-Carlo step. + * + * @tparam ParticleType type of particle to be ionized + * + * @param ZStar average charge state in the Thomas-Fermi model + * @param parentIon particle instance to be ionized + * @param randNr random number + * + * @return numNewFreeMacroElectrons number of new macro electrons to + * create, range: [0, boundElectrons] + */ + template + HDINLINE uint32_t operator()( + float_X const kinEnergyDensity, + float_X const density, + ParticleType& parentIon, + float_X randNr) { - /* Thomas-Fermi calculation step: - * Determines the new average charge state for each ion under - * LTE conditions. + /* initialize functor return value: number of new macro electrons to create */ + uint32_t numNewFreeMacroElectrons = 0u; + + float_64 const densityUnit + = static_cast(particleToGrid::derivedAttributes::Density().getUnit()[0]); + float_64 const kinEnergyDensityUnit + = static_cast(particleToGrid::derivedAttributes::EnergyDensity().getUnit()[0]); + /* convert from kinetic energy density to average kinetic energy per particle */ + float_64 const kinEnergyUnit = kinEnergyDensityUnit / densityUnit; + float_64 const avgKinEnergy = kinEnergyDensity / density * kinEnergyUnit; + /** convert kinetic energy in J to "temperature" in eV by assuming an ideal electron gas + * E_kin = 3/2 k*T */ - float_X const ZStar = detailedBalanceThomasFermi( - temperature, - massDensity, - parentIon - ); - - /* integral part of the average charge state */ - float_X intZStar; - /* fractional part of the average charge state */ - float_X const fracZStar = math::modf( ZStar, &intZStar ); - - /* Determine new charge state. - * We do a Monte-Carlo step to distribute charge states between - * the two "surrounding" integer numbers if ZStar has a non-zero - * fractional part. - */ - float_X const newChargeState = - intZStar + - float_X( 1.0 ) * ( randNr < fracZStar ); - - /* define number of bound macro electrons before ionization */ - float_X const prevBoundElectrons = parentIon[ boundElectrons_ ]; - - /** determine the new number of bound electrons from the TF ionization state - * @TODO introduce partial macroparticle ionization / ionization distribution at some point + constexpr float_64 convKinEnergyToTemperature + = UNITCONV_Joule_to_keV * float_64(1.e3) * float_64(2. / 3.); + /** electron "temperature" in electron volts */ + float_64 const temperature = avgKinEnergy * convKinEnergyToTemperature; + + /* conversion factors from number density to mass density */ + constexpr float_64 nAvogadro = SI::N_AVOGADRO; + constexpr float_64 convM3ToCM3 = 1.e6; + + /* @TODO replace the float_64 with float_X and make sure the values are scaled to PIConGPU units */ + constexpr float_64 protonNumber = GetAtomicNumbers::type::numberOfProtons; + constexpr float_64 neutronNumber = GetAtomicNumbers::type::numberOfNeutrons; + + /* atomic mass number (usually A) A = N + Z */ + constexpr float_64 massNumber = neutronNumber + protonNumber; + + float_64 const convToMassDensity = densityUnit * massNumber / nAvogadro / convM3ToCM3; + /** mass density in units of g/cm^3 */ + float_64 const massDensity = density * convToMassDensity; + + /** lower ion density cutoff + * + * The Thomas-Fermi model yields unphysical artifacts for low densities. + * If `density` is lower than a user-definable ion number density value the model will not be + * applied. */ - float_X const newBoundElectrons = protonNumber - newChargeState; - - /* Only account for ionization: we only increase the charge - * state of an ion if necessary, but ignore recombination of - * electrons as prediced by the implemented detailed balance - * algorithm. + constexpr float_X lowerDensityCutoff = particles::ionization::thomasFermi::CUTOFF_LOW_DENSITY; + /** lower electron temperature cutoff + * + * The Thomas-Fermi model also yields partly unphysical artifacts for low electron temperatures. + * If `temperature` is lower than a user-definable ion number temperature value the model will not + * be applied. */ - if( prevBoundElectrons > newBoundElectrons ) - /* determine number of new free macro electrons - * to be created in the ionization routine - */ - numNewFreeMacroElectrons = static_cast< uint32_t >( prevBoundElectrons - newBoundElectrons ); + constexpr float_X lowerTemperatureCutoff + = particles::ionization::thomasFermi::CUTOFF_LOW_TEMPERATURE_EV; + + if(density * densityUnit >= lowerDensityCutoff && temperature >= lowerTemperatureCutoff) + { + float_64 const chargeState = attribute::getChargeState(parentIon); + /* @TODO replace the float_64 with float_X and make sure the values are scaled to PIConGPU + * units */ + constexpr float_64 protonNumber = GetAtomicNumbers::type::numberOfProtons; + + /* only ionize not-fully ionized ions */ + if(chargeState < protonNumber) + { + /* Thomas-Fermi calculation step: + * Determines the new average charge state for each ion under + * LTE conditions. + */ + float_X const ZStar = detailedBalanceThomasFermi(temperature, massDensity, parentIon); + + /* integral part of the average charge state */ + float_X intZStar; + /* fractional part of the average charge state */ + float_X const fracZStar = pmacc::math::modf(ZStar, &intZStar); + + /* Determine new charge state. + * We do a Monte-Carlo step to distribute charge states between + * the two "surrounding" integer numbers if ZStar has a non-zero + * fractional part. + */ + float_X const newChargeState = intZStar + float_X(1.0) * (randNr < fracZStar); + + /* define number of bound macro electrons before ionization */ + float_X const prevBoundElectrons = parentIon[boundElectrons_]; + + /** determine the new number of bound electrons from the TF ionization state + * @TODO introduce partial macroparticle ionization / ionization distribution at some point + */ + float_X const newBoundElectrons = protonNumber - newChargeState; + + /* Only account for ionization: we only increase the charge + * state of an ion if necessary, but ignore recombination of + * electrons as prediced by the implemented detailed balance + * algorithm. + */ + if(prevBoundElectrons > newBoundElectrons) + /* determine number of new free macro electrons + * to be created in the ionization routine + */ + numNewFreeMacroElectrons + = static_cast(prevBoundElectrons - newBoundElectrons); + } + } + + return numNewFreeMacroElectrons; } - } - - return numNewFreeMacroElectrons; - } - - }; + }; -} // namespace ionization -} // namespace particles + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi.def b/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi.def index 52a3dd523f..1a4848d9e2 100644 --- a/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi.def +++ b/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi.def @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Marco Garten +/* Copyright 2016-2021 Marco Garten * * This file is part of PIConGPU. * @@ -23,54 +23,54 @@ namespace picongpu { -namespace particles -{ -namespace ionization -{ - /** Thomas-Fermi impact ionization model - * - * \tparam T_DestSpecies electron species to be created - * \tparam T_SrcSpecies particle species that is ionized - * default is boost::mpl placeholder because specialization - * cannot be known in list of particle species' flags - * \see speciesDefinition.param - */ - template - struct ThomasFermi_Impl; - - /** Thomas-Fermi impact ionization model - * - * This ionization model describes the atom inside the Thomas-Fermi framework - * in a self-consistent way. There the electrons are modeled as a density - * with respect to the distance from the core while the atomic core is often - * assumed as a point charge. The atomic potential is considered to be finite - * as a result of matter density and it defines the so-called "ion sphere". - * Due to the overlap of adjacent ion spheres the ionization barrier can be - * lowered and electrons become quasi-free in the system (resonance states). - * The Thomas-Fermi model calculates an average ionization degree only with - * respect to charge density and temperature. Through further assumptions - * and fitting parameters the model gets extended to arbitrary temperatures - * and atoms. - * - * See table IV from Pressure Ionization, Resonances, and the Continuity of - * Bound and Free States - * \url http://www.sciencedirect.com/science/article/pii/S0065219908601451 - * doi:10.1016/S0065-2199(08)60145-1 - * - * \tparam T_DestSpecies electron species to be created - * - * wrapper class, - * needed because the SrcSpecies cannot be known during the - * first specialization of the ionization model in the particle definition - * \see speciesDefinition.param - */ - template - struct ThomasFermi + namespace particles { - using IonizationAlgorithm = particles::ionization::AlgorithmThomasFermi; - using type = ThomasFermi_Impl; - }; + namespace ionization + { + /** Thomas-Fermi impact ionization model + * + * \tparam T_DestSpecies electron species to be created + * \tparam T_SrcSpecies particle species that is ionized + * default is boost::mpl placeholder because specialization + * cannot be known in list of particle species' flags + * \see speciesDefinition.param + */ + template + struct ThomasFermi_Impl; + + /** Thomas-Fermi impact ionization model + * + * This ionization model describes the atom inside the Thomas-Fermi framework + * in a self-consistent way. There the electrons are modeled as a density + * with respect to the distance from the core while the atomic core is often + * assumed as a point charge. The atomic potential is considered to be finite + * as a result of matter density and it defines the so-called "ion sphere". + * Due to the overlap of adjacent ion spheres the ionization barrier can be + * lowered and electrons become quasi-free in the system (resonance states). + * The Thomas-Fermi model calculates an average ionization degree only with + * respect to charge density and temperature. Through further assumptions + * and fitting parameters the model gets extended to arbitrary temperatures + * and atoms. + * + * See table IV from Pressure Ionization, Resonances, and the Continuity of + * Bound and Free States + * \url http://www.sciencedirect.com/science/article/pii/S0065219908601451 + * doi:10.1016/S0065-2199(08)60145-1 + * + * \tparam T_DestSpecies electron species to be created + * + * wrapper class, + * needed because the SrcSpecies cannot be known during the + * first specialization of the ionization model in the particle definition + * \see speciesDefinition.param + */ + template + struct ThomasFermi + { + using IonizationAlgorithm = particles::ionization::AlgorithmThomasFermi; + using type = ThomasFermi_Impl; + }; -} // namespace ionization -} // namespace particles + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp b/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp index 5687955ced..823bfd1268 100644 --- a/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp +++ b/include/picongpu/particles/ionization/byCollision/ThomasFermi/ThomasFermi_Impl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Marco Garten, Axel Huebl +/* Copyright 2016-2021 Marco Garten, Axel Huebl * * This file is part of PIConGPU. * @@ -22,7 +22,6 @@ #include "picongpu/simulation_defines.hpp" #include #include -#include "picongpu/traits/UsesRNG.hpp" #include "picongpu/fields/CellType.hpp" #include "picongpu/fields/FieldTmp.hpp" @@ -43,357 +42,289 @@ namespace picongpu { -namespace traits -{ - /** specialization of the UsesRNG trait - * --> ionization module uses random number generation - */ - template - struct UsesRNG > : - public boost::true_type + namespace particles { - }; -} // namespace traits - -namespace particles -{ -namespace ionization -{ - - /** ThomasFermi_Impl - * - * Thomas-Fermi pressure ionization - Implementation - * - * @tparam T_IonizationAlgorithm functor that returns a number of - * new free macro electrons to create, range: [0, boundElectrons] - * @tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created - * @tparam T_SrcSpecies type or name as boost::mpl::string of the particle species that is ionized - */ - template - struct ThomasFermi_Impl - { - - using DestSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_DestSpecies - >; - using SrcSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SrcSpecies - >; - - using FrameType = typename SrcSpecies::FrameType; - - /** specify field to particle interpolation scheme - * - * @todo this needs to be done independently/twice if ion species (rho) and electron - * species (ene) are of different shape - */ - using Field2ParticleInterpolation = typename pmacc::traits::Resolve< - typename GetFlagType >::type - >::type; - - /* margins around the supercell for the interpolation of the field on the cells */ - using LowerMargin = typename GetMargin::LowerMargin ; - using UpperMargin = typename GetMargin::UpperMargin; - - /* relevant area of a block */ - using BlockArea = SuperCellDescription< - typename MappingDesc::SuperCellSize, - LowerMargin, - UpperMargin - >; - - BlockArea BlockDescription; - - /* parameter class containing the energy cutoff parameter for electron temperature calculation */ - struct CutoffMaxEnergy + namespace ionization { - static constexpr float_X cutoffMaxEnergy = - particles::ionization::thomasFermi::CUTOFF_MAX_ENERGY; - }; - - private: - - /* define ionization ALGORITHM (calculation) for ionization MODEL */ - using IonizationAlgorithm = T_IonizationAlgorithm; - - /* random number generator */ - using RNGFactory = pmacc::random::RNGProvider; - using Distribution = pmacc::random::distributions::Uniform; - using RandomGen = typename RNGFactory::GetRandomType::type; - RandomGen randomGen; - - using SuperCellSize = MappingDesc::SuperCellSize; - - using ValueType_Rho = FieldTmp::ValueType; - using ValueType_Ene = FieldTmp::ValueType ; - - /* global memory EM-field device databoxes */ - PMACC_ALIGN(rhoBox, FieldTmp::DataBoxType); - PMACC_ALIGN(eneBox, FieldTmp::DataBoxType); - - /* shared memory EM-field device databoxes */ - PMACC_ALIGN(cachedRho, DataBox >); - PMACC_ALIGN(cachedEne, DataBox >); - - public: - /* host constructor initializing member : random number generator */ - ThomasFermi_Impl(const uint32_t currentStep) : randomGen(RNGFactory::createRandom()) + /** ThomasFermi_Impl + * + * Thomas-Fermi pressure ionization - Implementation + * + * @tparam T_IonizationAlgorithm functor that returns a number of + * new free macro electrons to create, range: [0, boundElectrons] + * @tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created + * @tparam T_SrcSpecies type or name as boost::mpl::string of the particle species that is ionized + */ + template + struct ThomasFermi_Impl { - /* create handle for access to host and device data */ - DataConnector &dc = Environment<>::get().DataConnector(); + using DestSpecies = pmacc::particles::meta::FindByNameOrType_t; + using SrcSpecies = pmacc::particles::meta::FindByNameOrType_t; - /* The compiler is allowed to evaluate an expression that does not depend on a template parameter - * even if the class is never instantiated. In that case static assert is always - * evaluated (e.g. with clang), this results in an error if the condition is false. - * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html + using FrameType = typename SrcSpecies::FrameType; + + /** specify field to particle interpolation scheme * - * A workaround is to add a template dependency to the expression. - * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. + * @todo this needs to be done independently/twice if ion species (rho) and electron + * species (ene) are of different shape */ - PMACC_CASSERT_MSG( - _please_allocate_at_least_two_FieldTmp_slots_in_memory_param, - ( fieldTmpNumSlots >= 2 ) && ( sizeof( T_IonizationAlgorithm ) != 0 ) - ); - /* initialize pointers on host-side density-/energy density field databoxes */ - auto density = dc.get< FieldTmp >( FieldTmp::getUniqueId( 0 ), true ); - auto eneKinDens = dc.get< FieldTmp >( FieldTmp::getUniqueId( 1 ), true ); - - /* reset density and kinetic energy values to zero */ - density->getGridBuffer().getDeviceBuffer().setValue( FieldTmp::ValueType( 0. ) ); - eneKinDens->getGridBuffer().getDeviceBuffer().setValue( FieldTmp::ValueType( 0. ) ); - - /* load species without copying the particle data to the host */ - auto srcSpecies = dc.get< SrcSpecies >( SrcSpecies::FrameType::getName(), true ); - - /** Calculate weighted ion density + using Field2ParticleInterpolation = + typename pmacc::traits::Resolve>::type>::type; + + /* margins around the supercell for the interpolation of the field on the cells */ + using LowerMargin = typename GetMargin::LowerMargin; + using UpperMargin = typename GetMargin::UpperMargin; + + /* relevant area of a block */ + using BlockArea = SuperCellDescription; + + BlockArea BlockDescription; + + /* parameter class containing the energy cutoff parameter for electron temperature calculation */ + struct CutoffMaxEnergy + { + static constexpr float_X cutoffMaxEnergy = particles::ionization::thomasFermi::CUTOFF_MAX_ENERGY; + }; + + private: + /* define ionization ALGORITHM (calculation) for ionization MODEL */ + using IonizationAlgorithm = T_IonizationAlgorithm; + + /* random number generator */ + using RNGFactory = pmacc::random::RNGProvider; + using Distribution = pmacc::random::distributions::Uniform; + using RandomGen = typename RNGFactory::GetRandomType::type; + RandomGen randomGen; + + using SuperCellSize = MappingDesc::SuperCellSize; + + using ValueType_Rho = FieldTmp::ValueType; + using ValueType_Ene = FieldTmp::ValueType; + + /* global memory EM-field device databoxes */ + PMACC_ALIGN(rhoBox, FieldTmp::DataBoxType); + PMACC_ALIGN(eneBox, FieldTmp::DataBoxType); + + /* shared memory EM-field device databoxes */ + PMACC_ALIGN(cachedRho, DataBox>); + PMACC_ALIGN(cachedEne, DataBox>); + + public: + /* host constructor initializing member : random number generator */ + ThomasFermi_Impl(const uint32_t currentStep) : randomGen(RNGFactory::createRandom()) + { + /* create handle for access to host and device data */ + DataConnector& dc = Environment<>::get().DataConnector(); + + /* The compiler is allowed to evaluate an expression that does not depend on a template parameter + * even if the class is never instantiated. In that case static assert is always + * evaluated (e.g. with clang), this results in an error if the condition is false. + * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html + * + * A workaround is to add a template dependency to the expression. + * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. + */ + PMACC_CASSERT_MSG( + _please_allocate_at_least_two_FieldTmp_slots_in_memory_param, + (fieldTmpNumSlots >= 2) && (sizeof(T_IonizationAlgorithm) != 0)); + /* initialize pointers on host-side density-/energy density field databoxes */ + auto density = dc.get(FieldTmp::getUniqueId(0), true); + auto eneKinDens = dc.get(FieldTmp::getUniqueId(1), true); + + /* reset density and kinetic energy values to zero */ + density->getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.)); + eneKinDens->getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.)); + + /* load species without copying the particle data to the host */ + auto srcSpecies = dc.get(SrcSpecies::FrameType::getName(), true); + + /** Calculate weighted ion density + * + * @todo Include all ion species because the model requires the + * density of ionic potential wells + */ + using DensitySolver = typename particleToGrid:: + CreateFieldTmpOperation_t::Solver; + density->template computeValue(*srcSpecies, currentStep); + dc.releaseData(SrcSpecies::FrameType::getName()); + EventTask densityEvent = density->asyncCommunication(__getTransactionEvent()); + densityEvent += density->asyncCommunicationGather(densityEvent); + + /* load species without copying the particle data to the host */ + auto destSpecies = dc.get(DestSpecies::FrameType::getName(), true); + + /** Calculate energy density of the electron species with maximum energy cutoff + * + * @todo Include all electron species with a meta::ForEach + * instead of just the destination species + */ + using EnergyDensitySolver = typename particleToGrid::CreateFieldTmpOperation_t< + DestSpecies, + particleToGrid::derivedAttributes::EnergyDensityCutoff>::Solver; + eneKinDens->template computeValue(*destSpecies, currentStep); + dc.releaseData(DestSpecies::FrameType::getName()); + EventTask eneKinEvent = eneKinDens->asyncCommunication(__getTransactionEvent()); + eneKinEvent += eneKinDens->asyncCommunicationGather(eneKinEvent); + + /* contributions from neighboring GPUs to our border area */ + __setTransactionEvent(densityEvent + eneKinEvent); + + /* initialize device-side density- and energy density field databox pointers */ + rhoBox = density->getDeviceDataBox(); + eneBox = eneKinDens->getDeviceDataBox(); + } + + /** cache fields used by this functor + * + * @warning this is a collective method and calls synchronize + * + * @tparam T_Acc alpaka accelerator type + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker * - * @todo Include all ion species because the model requires the - * density of ionic potential wells + * @param acc alpaka accelerator + * @param blockCell relative offset (in cells) to the local domain plus the guarding cells + * @param workerCfg configuration of the worker */ - using DensitySolver = typename particleToGrid::CreateFieldTmpOperation_t< - SrcSpecies, - particleToGrid::derivedAttributes::Density - >::Solver; - density->template computeValue< CORE + BORDER, DensitySolver >(*srcSpecies, currentStep); - dc.releaseData( SrcSpecies::FrameType::getName() ); - EventTask densityEvent = density->asyncCommunication( __getTransactionEvent() ); - densityEvent += density->asyncCommunicationGather( densityEvent ); - - /* load species without copying the particle data to the host */ - auto destSpecies = dc.get< DestSpecies >( DestSpecies::FrameType::getName(), true ); - - /** Calculate energy density of the electron species with maximum energy cutoff + template + DINLINE void collectiveInit( + const T_Acc& acc, + const DataSpace& blockCell, + const T_WorkerCfg& workerCfg) + { + /* caching of density and "temperature" fields */ + cachedRho = CachedBox::create<0, ValueType_Rho>(acc, BlockArea()); + cachedEne = CachedBox::create<1, ValueType_Ene>(acc, BlockArea()); + + /* instance of nvidia assignment operator */ + nvidia::functors::Assign assign; + /* copy fields from global to shared */ + auto fieldRhoBlock = rhoBox.shift(blockCell); + ThreadCollective collective(workerCfg.getWorkerIdx()); + collective(acc, assign, cachedRho, fieldRhoBlock); + /* copy fields from global to shared */ + auto fieldEneBlock = eneBox.shift(blockCell); + collective(acc, assign, cachedEne, fieldEneBlock); + + /* wait for shared memory to be initialized */ + cupla::__syncthreads(acc); + } + + /** Initialization function on device * - * @todo Include all electron species with a meta::ForEach - * instead of just the destination species + * Cache density and energy density fields on device and initialize + * possible prerequisites for ionization, like e.g. random number + * generator. + * + * This function will be called inline on the device which must happen BEFORE threads diverge + * during loop execution. The reason for this is the `cupla::__syncthreads( acc )` call which is + * necessary after initializing the field shared boxes in shared memory. + * + * @param blockCell Offset of the cell from the origin of the local domain + * *including guarding supercells* in units of cells + * @param linearThreadIdx Linearized thread ID inside the block + * @param localCellOffset Offset of the cell from the origin of the local + * domain, i.e. from the @see BORDER + * *without guarding supercells* */ - using EnergyDensitySolver = typename particleToGrid::CreateFieldTmpOperation_t< - DestSpecies, - particleToGrid::derivedAttributes::EnergyDensityCutoff< CutoffMaxEnergy > - >::Solver; - eneKinDens->template computeValue< CORE + BORDER, EnergyDensitySolver >(*destSpecies, currentStep); - dc.releaseData( DestSpecies::FrameType::getName() ); - EventTask eneKinEvent = eneKinDens->asyncCommunication( __getTransactionEvent() ); - eneKinEvent += eneKinDens->asyncCommunicationGather( eneKinEvent ); - - /* contributions from neighboring GPUs to our border area */ - __setTransactionEvent( densityEvent + eneKinEvent ); - - /* initialize device-side density- and energy density field databox pointers */ - rhoBox = density->getDeviceDataBox(); - eneBox = eneKinDens->getDeviceDataBox(); - - } - - /** cache fields used by this functor - * - * @warning this is a collective method and calls synchronize - * - * @tparam T_Acc alpaka accelerator type - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * - * @param acc alpaka accelerator - * @param blockCell relative offset (in cells) to the local domain plus the guarding cells - * @param workerCfg configuration of the worker - */ - template< - typename T_Acc , - typename T_WorkerCfg - > - DINLINE void collectiveInit( - const T_Acc & acc, - const DataSpace& blockCell, - const T_WorkerCfg & workerCfg - ) - { - /* caching of density and "temperature" fields */ - cachedRho = CachedBox::create< - 0, - ValueType_Rho - >( - acc, - BlockArea() - ); - cachedEne = CachedBox::create< - 1, - ValueType_Ene - >( - acc, - BlockArea() - ); - - /* instance of nvidia assignment operator */ - nvidia::functors::Assign assign; - /* copy fields from global to shared */ - auto fieldRhoBlock = rhoBox.shift(blockCell); - ThreadCollective< - BlockArea, - T_WorkerCfg::numWorkers - > collective( workerCfg.getWorkerIdx( ) ); - collective( - acc, - assign, - cachedRho, - fieldRhoBlock - ); - /* copy fields from global to shared */ - auto fieldEneBlock = eneBox.shift(blockCell); - collective( - acc, - assign, - cachedEne, - fieldEneBlock - ); - - /* wait for shared memory to be initialized */ - __syncthreads(); - } - - /** Initialization function on device - * - * Cache density and energy density fields on device and initialize - * possible prerequisites for ionization, like e.g. random number - * generator. - * - * This function will be called inline on the device which must happen BEFORE threads diverge - * during loop execution. The reason for this is the `__syncthreads()` call which is necessary after - * initializing the field shared boxes in shared memory. - * - * @param blockCell Offset of the cell from the origin of the local domain - * *including guarding supercells* in units of cells - * @param linearThreadIdx Linearized thread ID inside the block - * @param localCellOffset Offset of the cell from the origin of the local - * domain, i.e. from the @see BORDER - * *without guarding supercells* - */ - template< typename T_Acc > - DINLINE void init( - T_Acc const & acc, - const DataSpace& blockCell, - const int& linearThreadIdx, - const DataSpace& localCellOffset - ) - { - /* initialize random number generator with the local cell index in the simulation */ - this->randomGen.init(localCellOffset); - } - - /** Determine number of new macro electrons due to ionization - * - * @param ionFrame reference to frame of the to-be-ionized particles - * @param localIdx local (linear) index in super cell / frame - */ - template< typename T_Acc > - DINLINE uint32_t numNewParticles(T_Acc const & acc, FrameType& ionFrame, int localIdx) - { - /* alias for the single macro-particle */ - auto particle = ionFrame[localIdx]; - /* particle position, used for field-to-particle interpolation */ - floatD_X const pos = particle[position_]; - int const particleCellIdx = particle[localCellIdx_]; - /* multi-dim coordinate of the local cell inside the super cell */ - DataSpace localCell(DataSpaceOperations::template map (particleCellIdx)); - /* interpolation of density */ - const picongpu::traits::FieldPosition fieldPosRho; - ValueType_Rho densityV = Field2ParticleInterpolation() - (cachedRho.shift(localCell).toCursor(), pos, fieldPosRho()); - /* and energy density field on the particle position */ - const picongpu::traits::FieldPosition fieldPosEne; - ValueType_Ene kinEnergyV = Field2ParticleInterpolation() - (cachedEne.shift(localCell).toCursor(), pos, fieldPosEne()); - - /* density in sim units */ - float_X const density = densityV[0]; - /* energy density in sim units */ - float_X const kinEnergyDensity = kinEnergyV[0]; - - /* Returns the new number of bound electrons for an integer number of macro electrons */ - IonizationAlgorithm ionizeAlgo; - uint32_t newMacroElectrons = ionizeAlgo( - kinEnergyDensity, - density, - particle, - this->randomGen(acc) - ); - - - return newMacroElectrons; - - } - - /* Functor implementation - * - * Ionization model specific particle creation - * - * \tparam T_parentIon type of ion species that is being ionized - * \tparam T_childElectron type of electron species that is created - * \param parentIon ion instance that is ionized - * \param childElectron electron instance that is created - */ - template - DINLINE void operator()(T_Acc const & acc, T_parentIon& parentIon,T_childElectron& childElectron) - { - /* for not mixing operations::assign up with the nvidia functor assign */ - namespace partOp = pmacc::particles::operations; - /* each thread sets the multiMask hard on "particle" (=1) */ - childElectron[multiMask_] = 1u; - const float_X weighting = parentIon[weighting_]; - - /* each thread initializes a clone of the parent ion but leaving out - * some attributes: - * - multiMask: reading from global memory takes longer than just setting it again explicitly - * - momentum: because the electron would get a higher energy because of the ion mass - * - boundElectrons: because species other than ions or atoms do not have them - * (gets AUTOMATICALLY deselected because electrons do not have this attribute) + template + DINLINE void init( + T_Acc const& acc, + const DataSpace& blockCell, + const int& linearThreadIdx, + const DataSpace& localCellOffset) + { + /* initialize random number generator with the local cell index in the simulation */ + this->randomGen.init(localCellOffset); + } + + /** Determine number of new macro electrons due to ionization + * + * @param ionFrame reference to frame of the to-be-ionized particles + * @param localIdx local (linear) index in super cell / frame */ - auto targetElectronClone = partOp::deselect >(childElectron); - - partOp::assign(targetElectronClone, partOp::deselect(parentIon)); - - const float_X massIon = attribute::getMass(weighting,parentIon); - const float_X massElectron = attribute::getMass(weighting,childElectron); - - const float3_X electronMomentum (parentIon[momentum_]*(massElectron/massIon)); - - childElectron[momentum_] = electronMomentum; - - /* conservation of momentum - * \todo add conservation of mass */ - parentIon[momentum_] -= electronMomentum; - - /** ionization of the ion by reducing the number of bound electrons + template + DINLINE uint32_t numNewParticles(T_Acc const& acc, FrameType& ionFrame, int localIdx) + { + /* alias for the single macro-particle */ + auto particle = ionFrame[localIdx]; + /* particle position, used for field-to-particle interpolation */ + floatD_X const pos = particle[position_]; + int const particleCellIdx = particle[localCellIdx_]; + /* multi-dim coordinate of the local cell inside the super cell */ + DataSpace localCell( + DataSpaceOperations::template map(particleCellIdx)); + /* interpolation of density */ + const picongpu::traits::FieldPosition fieldPosRho; + ValueType_Rho densityV + = Field2ParticleInterpolation()(cachedRho.shift(localCell).toCursor(), pos, fieldPosRho()); + /* and energy density field on the particle position */ + const picongpu::traits::FieldPosition fieldPosEne; + ValueType_Ene kinEnergyV + = Field2ParticleInterpolation()(cachedEne.shift(localCell).toCursor(), pos, fieldPosEne()); + + /* density in sim units */ + float_X const density = densityV[0]; + /* energy density in sim units */ + float_X const kinEnergyDensity = kinEnergyV[0]; + + /* Returns the new number of bound electrons for an integer number of macro electrons */ + IonizationAlgorithm ionizeAlgo; + uint32_t newMacroElectrons = ionizeAlgo(kinEnergyDensity, density, particle, this->randomGen(acc)); + + + return newMacroElectrons; + } + + /* Functor implementation * - * @warning substracting a float from a float can potentially - * create a negative boundElectrons number for the ion, - * see #1850 for details + * Ionization model specific particle creation + * + * \tparam T_parentIon type of ion species that is being ionized + * \tparam T_childElectron type of electron species that is created + * \param parentIon ion instance that is ionized + * \param childElectron electron instance that is created */ - parentIon[boundElectrons_] -= float_X(1.); - } - - }; - -} // namespace ionization -} // namespace particles + template + DINLINE void operator()(T_Acc const& acc, T_parentIon& parentIon, T_childElectron& childElectron) + { + /* for not mixing operations::assign up with the nvidia functor assign */ + namespace partOp = pmacc::particles::operations; + /* each thread sets the multiMask hard on "particle" (=1) */ + childElectron[multiMask_] = 1u; + const float_X weighting = parentIon[weighting_]; + + /* each thread initializes a clone of the parent ion but leaving out + * some attributes: + * - multiMask: reading from global memory takes longer than just setting it again explicitly + * - momentum: because the electron would get a higher energy because of the ion mass + * - boundElectrons: because species other than ions or atoms do not have them + * (gets AUTOMATICALLY deselected because electrons do not have this attribute) + */ + auto targetElectronClone = partOp::deselect>(childElectron); + + partOp::assign(targetElectronClone, partOp::deselect(parentIon)); + + const float_X massIon = attribute::getMass(weighting, parentIon); + const float_X massElectron = attribute::getMass(weighting, childElectron); + + const float3_X electronMomentum(parentIon[momentum_] * (massElectron / massIon)); + + childElectron[momentum_] = electronMomentum; + + /* conservation of momentum + * \todo add conservation of mass */ + parentIon[momentum_] -= electronMomentum; + + /** ionization of the ion by reducing the number of bound electrons + * + * @warning substracting a float from a float can potentially + * create a negative boundElectrons number for the ion, + * see #1850 for details + */ + parentIon[boundElectrons_] -= float_X(1.); + } + }; + + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byCollision/collisionalIonizationCalc.def b/include/picongpu/particles/ionization/byCollision/collisionalIonizationCalc.def index e8773886c0..16c4bb0c94 100644 --- a/include/picongpu/particles/ionization/byCollision/collisionalIonizationCalc.def +++ b/include/picongpu/particles/ionization/byCollision/collisionalIonizationCalc.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten * * This file is part of PIConGPU. * @@ -28,17 +28,14 @@ namespace picongpu { + namespace particles + { + namespace ionization + { + struct AlgorithmThomasFermi; -namespace particles -{ - -namespace ionization -{ - - struct AlgorithmThomasFermi; - -} // namespace ionization + } // namespace ionization -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byCollision/collisionalIonizationCalc.hpp b/include/picongpu/particles/ionization/byCollision/collisionalIonizationCalc.hpp index 4608ddb5d0..5b819262f6 100644 --- a/include/picongpu/particles/ionization/byCollision/collisionalIonizationCalc.hpp +++ b/include/picongpu/particles/ionization/byCollision/collisionalIonizationCalc.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/ionization/byCollision/ionizers.def b/include/picongpu/particles/ionization/byCollision/ionizers.def index 165b934ecc..ec6dcf3d92 100644 --- a/include/picongpu/particles/ionization/byCollision/ionizers.def +++ b/include/picongpu/particles/ionization/byCollision/ionizers.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/ionization/byCollision/ionizers.hpp b/include/picongpu/particles/ionization/byCollision/ionizers.hpp index e40eec175e..8153ba4bec 100644 --- a/include/picongpu/particles/ionization/byCollision/ionizers.hpp +++ b/include/picongpu/particles/ionization/byCollision/ionizers.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/ionization/byField/ADK/ADK.def b/include/picongpu/particles/ionization/byField/ADK/ADK.def index 27d4922bec..535c507af9 100644 --- a/include/picongpu/particles/ionization/byField/ADK/ADK.def +++ b/include/picongpu/particles/ionization/byField/ADK/ADK.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten, Jakob Trojok * * This file is part of PIConGPU. * @@ -19,78 +19,83 @@ #pragma once +#include "picongpu/particles/ionization/byField/IonizationCurrent/IonizationCurrent.def" #include namespace picongpu { -namespace particles -{ -namespace ionization -{ - - /** Ammosov-Delone-Krainov tunneling model - * - * \tparam T_DestSpecies electron species to be created - * \tparam T_SrcSpecies ion species to be ionized - * default is boost::mpl placeholder because specialization - * cannot be known in list of particle species' flags - * \see speciesDefinition.param - */ - template - struct ADK_Impl; - - /** Ammosov-Delone-Krainov tunneling model - linear laser polarization - * - * - takes the ionization energies of the various charge states of ions - * - calculates the ionization rates and then the ionization probabilities from them - * - ATTENTION: this approach is not very applicable for rapidly changing high intensity laser fields - * - this is a Monte Carlo method: if a random number is smaller - * or equal than the ionization probability -> increase the charge state - * - see for example: Delone, N. B.; Krainov, V. P. (1998). - * "Tunneling and barrier-suppression ionization of atoms and ions in a laser radiation field" - * doi:10.1070/PU1998v041n05ABEH000393 - * - * wrapper class, - * needed because the SrcSpecies cannot be known during the - * first specialization of the ionization model in the particle definition - * \see speciesDefinition.param - */ - template - struct ADKLinPol + namespace particles { - /* Boolean value that results in an additional polarization factor in - * the ionization rate for linear polarization */ - static constexpr bool linPol = true; - using IonizationAlgorithm = particles::ionization::AlgorithmADK< linPol >; - using type = ADK_Impl< IonizationAlgorithm, T_DestSpecies >; - }; + namespace ionization + { + /** Ammosov-Delone-Krainov tunneling model + * + * \tparam T_DestSpecies electron species to be created + * \tparam T_IonizationCurrent select type of ionization current (None or EnergyConservation) + * \tparam T_SrcSpecies ion species to be ionized + * default is boost::mpl placeholder because specialization + * cannot be known in list of particle species' flags + * \see speciesDefinition.param + */ + template< + typename T_IonizationAlgorithm, + typename T_DestSpecies, + typename T_IonizationCurrent, + typename T_SrcSpecies = bmpl::_1> + struct ADK_Impl; - /** Ammosov-Delone-Krainov tunneling model - circular laser polarization - * - * - takes the ionization energies of the various charge states of ions - * - calculates the ionization rates and then the ionization probabilities from them - * - ATTENTION: this approach is not very applicable for rapidly changing high intensity laser fields - * - this is a Monte Carlo method: if a random number is smaller - * or equal than the ionization probability -> increase the charge state - * - see for example: Delone, N. B.; Krainov, V. P. (1998). - * "Tunneling and barrier-suppression ionization of atoms and ions in a laser radiation field" - * doi:10.1070/PU1998v041n05ABEH000393 - * - * wrapper class, - * needed because the SrcSpecies cannot be known during the - * first specialization of the ionization model in the particle definition - * \see speciesDefinition.param - */ - template - struct ADKCircPol - { - /* Boolean value that results in an additional polarization factor in - * the ionization rate for linear polarization */ - static constexpr bool linPol = false; - using IonizationAlgorithm = particles::ionization::AlgorithmADK< linPol >; - using type = ADK_Impl< IonizationAlgorithm, T_DestSpecies >; - }; + /** Ammosov-Delone-Krainov tunneling model - linear laser polarization + * + * - takes the ionization energies of the various charge states of ions + * - calculates the ionization rates and then the ionization probabilities from them + * - ATTENTION: this approach is not very applicable for rapidly changing high intensity laser fields + * - this is a Monte Carlo method: if a random number is smaller + * or equal than the ionization probability -> increase the charge state + * - see for example: Delone, N. B.; Krainov, V. P. (1998). + * "Tunneling and barrier-suppression ionization of atoms and ions in a laser radiation field" + * doi:10.1070/PU1998v041n05ABEH000393 + * + * wrapper class, + * needed because the SrcSpecies cannot be known during the + * first specialization of the ionization model in the particle definition + * \see speciesDefinition.param + */ + template + struct ADKLinPol + { + /* Boolean value that results in an additional polarization factor in + * the ionization rate for linear polarization */ + static constexpr bool linPol = true; + using IonizationAlgorithm = particles::ionization::AlgorithmADK; + using type = ADK_Impl; + }; + + /** Ammosov-Delone-Krainov tunneling model - circular laser polarization + * + * - takes the ionization energies of the various charge states of ions + * - calculates the ionization rates and then the ionization probabilities from them + * - ATTENTION: this approach is not very applicable for rapidly changing high intensity laser fields + * - this is a Monte Carlo method: if a random number is smaller + * or equal than the ionization probability -> increase the charge state + * - see for example: Delone, N. B.; Krainov, V. P. (1998). + * "Tunneling and barrier-suppression ionization of atoms and ions in a laser radiation field" + * doi:10.1070/PU1998v041n05ABEH000393 + * + * wrapper class, + * needed because the SrcSpecies cannot be known during the + * first specialization of the ionization model in the particle definition + * \see speciesDefinition.param + */ + template + struct ADKCircPol + { + /* Boolean value that results in an additional polarization factor in + * the ionization rate for linear polarization */ + static constexpr bool linPol = false; + using IonizationAlgorithm = particles::ionization::AlgorithmADK; + using type = ADK_Impl; + }; -} // namespace ionization -} // namespace particles + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp b/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp index 1fa1a13650..a6c8563f91 100644 --- a/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp +++ b/include/picongpu/particles/ionization/byField/ADK/ADK_Impl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten, Jakob Trojok * * This file is part of PIConGPU. * @@ -22,7 +22,6 @@ #include "picongpu/simulation_defines.hpp" #include #include -#include "picongpu/traits/UsesRNG.hpp" #include "picongpu/fields/CellType.hpp" #include "picongpu/fields/FieldB.hpp" @@ -30,6 +29,8 @@ #include "picongpu/traits/FieldPosition.hpp" #include "picongpu/particles/ionization/byField/ADK/ADK.def" #include "picongpu/particles/ionization/byField/ADK/AlgorithmADK.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/JIonizationCalc.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/JIonizationAssignment.hpp" #include #include @@ -45,277 +46,231 @@ namespace picongpu { -namespace traits -{ - /** specialization of the UsesRNG trait - * --> ionization module uses random number generation - */ - template - struct UsesRNG > : - public boost::true_type - { - }; -} // namespace traits - -namespace particles -{ -namespace ionization -{ - - /** \struct ADK_Impl - * - * \brief Ammosov-Delone-Krainov - * Tunneling ionization for hydrogenlike atoms - * - * \tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created - * \tparam T_SrcSpecies type or name as boost::mpl::string of the particle species that is ionized - */ - template - struct ADK_Impl + namespace particles { - - using DestSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_DestSpecies - >; - using SrcSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SrcSpecies - >; - - using FrameType = typename SrcSpecies::FrameType; - - /* specify field to particle interpolation scheme */ - using Field2ParticleInterpolation = typename pmacc::traits::Resolve< - typename GetFlagType >::type - >::type; - - /* margins around the supercell for the interpolation of the field on the cells */ - using LowerMargin = typename GetMargin::LowerMargin; - using UpperMargin = typename GetMargin::UpperMargin; - - /* relevant area of a block */ - using BlockArea = SuperCellDescription< - typename MappingDesc::SuperCellSize, - LowerMargin, - UpperMargin - >; - - BlockArea BlockDescription; - - private: - - /* define ionization ALGORITHM (calculation) for ionization MODEL */ - using IonizationAlgorithm = T_IonizationAlgorithm; - - /* random number generator */ - using RNGFactory = pmacc::random::RNGProvider; - using Distribution = pmacc::random::distributions::Uniform; - using RandomGen = typename RNGFactory::GetRandomType::type; - RandomGen randomGen; - - using TVec = MappingDesc::SuperCellSize; - - using ValueType_E = FieldE::ValueType; - using ValueType_B = FieldB::ValueType; - /* global memory EM-field device databoxes */ - PMACC_ALIGN(eBox, FieldE::DataBoxType); - PMACC_ALIGN(bBox, FieldB::DataBoxType); - /* shared memory EM-field device databoxes */ - PMACC_ALIGN(cachedE, DataBox >); - PMACC_ALIGN(cachedB, DataBox >); - - public: - /* host constructor initializing member : random number generator */ - ADK_Impl(const uint32_t currentStep) : randomGen(RNGFactory::createRandom()) - { - DataConnector &dc = Environment<>::get().DataConnector(); - /* initialize pointers on host-side E-(B-)field databoxes */ - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); - auto fieldB = dc.get< FieldB >( FieldB::getName(), true ); - /* initialize device-side E-(B-)field databoxes */ - eBox = fieldE->getDeviceDataBox(); - bBox = fieldB->getDeviceDataBox(); - - } - - /** cache fields used by this functor + namespace ionization + { + /** \struct ADK_Impl * - * @warning this is a collective method and calls synchronize + * \brief Ammosov-Delone-Krainov + * Tunneling ionization for hydrogenlike atoms * - * @tparam T_Acc alpaka accelerator type - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * - * @param acc alpaka accelerator - * @param blockCell relative offset (in cells) to the local domain plus the guarding cells - * @param workerCfg configuration of the worker + * \tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created + * \tparam T_IonizationCurrent select type of ionization current (None or EnergyConservation) + * \tparam T_SrcSpecies type or name as boost::mpl::string of the particle species that is ionized */ template< - typename T_Acc , - typename T_WorkerCfg - > - DINLINE void collectiveInit( - const T_Acc & acc, - const DataSpace& blockCell, - const T_WorkerCfg & workerCfg - ) - { - /* caching of E and B fields */ - cachedB = CachedBox::create< - 0, - ValueType_B - >( - acc, - BlockArea() - ); - cachedE = CachedBox::create< - 1, - ValueType_E - >( - acc, - BlockArea() - ); - - /* instance of nvidia assignment operator */ - nvidia::functors::Assign assign; - /* copy fields from global to shared */ - auto fieldBBlock = bBox.shift(blockCell); - ThreadCollective< - BlockArea, - T_WorkerCfg::numWorkers - > collective( workerCfg.getWorkerIdx( ) ); - collective( - acc, - assign, - cachedB, - fieldBBlock - ); - /* copy fields from global to shared */ - auto fieldEBlock = eBox.shift(blockCell); - collective( - acc, - assign, - cachedE, - fieldEBlock - ); - - /* wait for shared memory to be initialized */ - __syncthreads(); - } - - /** Initialization function on device - * - * \brief Cache EM-fields on device - * and initialize possible prerequisites for ionization, like e.g. random number generator. - * - * This function will be called inline on the device which must happen BEFORE threads diverge - * during loop execution. The reason for this is the `__syncthreads()` call which is necessary after - * initializing the E-/B-field shared boxes in shared memory. - */ - template< typename T_Acc > - DINLINE void init( - T_Acc const & acc, - const DataSpace& blockCell, - const int& linearThreadIdx, - const DataSpace& localCellOffset - ) - { - /* initialize random number generator with the local cell index in the simulation */ - this->randomGen.init(localCellOffset); - } - - /** Determine number of new macro electrons due to ionization - * - * \param ionFrame reference to frame of the to-be-ionized particles - * \param localIdx local (linear) index in super cell / frame - */ - template< typename T_Acc > - DINLINE uint32_t numNewParticles(const T_Acc& acc, FrameType& ionFrame, int localIdx) - { - /* alias for the single macro-particle */ - auto particle = ionFrame[localIdx]; - /* particle position, used for field-to-particle interpolation */ - floatD_X pos = particle[position_]; - const int particleCellIdx = particle[localCellIdx_]; - /* multi-dim coordinate of the local cell inside the super cell */ - DataSpace localCell(DataSpaceOperations::template map (particleCellIdx)); - /* interpolation of E- */ - const picongpu::traits::FieldPosition< - fields::CellType, - FieldE - > fieldPosE; - ValueType_E eField = Field2ParticleInterpolation() - (cachedE.shift(localCell).toCursor(), pos, fieldPosE()); - /* and B-field on the particle position */ - const picongpu::traits::FieldPosition fieldPosB; - ValueType_B bField = Field2ParticleInterpolation() - (cachedB.shift(localCell).toCursor(), pos, fieldPosB()); - - /* define number of bound macro electrons before ionization */ - float_X prevBoundElectrons = particle[boundElectrons_]; - - IonizationAlgorithm ionizeAlgo; - /* determine number of new macro electrons to be created */ - uint32_t newMacroElectrons = ionizeAlgo( - bField, eField, - particle, this->randomGen(acc) - ); - - return newMacroElectrons; - - } - - /* Functor implementation - * - * Ionization model specific particle creation - * - * \tparam T_parentIon type of ion species that is being ionized - * \tparam T_childElectron type of electron species that is created - * \param parentIon ion instance that is ionized - * \param childElectron electron instance that is created - */ - template - DINLINE void operator()(const T_Acc& acc, T_parentIon& parentIon,T_childElectron& childElectron) + typename T_IonizationAlgorithm, + typename T_DestSpecies, + typename T_IonizationCurrent, + typename T_SrcSpecies> + struct ADK_Impl { - /* for not mixing operations::assign up with the nvidia functor assign */ - namespace partOp = pmacc::particles::operations; - /* each thread sets the multiMask hard on "particle" (=1) */ - childElectron[multiMask_] = 1u; - const float_X weighting = parentIon[weighting_]; - - /* each thread initializes a clone of the parent ion but leaving out - * some attributes: - * - multiMask: reading from global memory takes longer than just setting it again explicitly - * - momentum: because the electron would get a higher energy because of the ion mass - * - boundElectrons: because species other than ions or atoms do not have them - * (gets AUTOMATICALLY deselected because electrons do not have this attribute) + using DestSpecies = pmacc::particles::meta::FindByNameOrType_t; + using SrcSpecies = pmacc::particles::meta::FindByNameOrType_t; + + using FrameType = typename SrcSpecies::FrameType; + + /* specify field to particle interpolation scheme */ + using Field2ParticleInterpolation = + typename pmacc::traits::Resolve>::type>::type; + + /* margins around the supercell for the interpolation of the field on the cells */ + using LowerMargin = typename GetMargin::LowerMargin; + using UpperMargin = typename GetMargin::UpperMargin; + + /* relevant area of a block */ + using BlockArea = SuperCellDescription; + + BlockArea BlockDescription; + + private: + /* define ionization ALGORITHM (calculation) for ionization MODEL */ + using IonizationAlgorithm = T_IonizationAlgorithm; + + /* random number generator */ + using RNGFactory = pmacc::random::RNGProvider; + using Distribution = pmacc::random::distributions::Uniform; + using RandomGen = typename RNGFactory::GetRandomType::type; + RandomGen randomGen; + + using TVec = MappingDesc::SuperCellSize; + + using ValueType_E = FieldE::ValueType; + using ValueType_B = FieldB::ValueType; + /* global memory EM-field and current density device databoxes */ + PMACC_ALIGN(eBox, FieldE::DataBoxType); + PMACC_ALIGN(bBox, FieldB::DataBoxType); + PMACC_ALIGN(jBox, FieldJ::DataBoxType); + /* shared memory EM-field device databoxes */ + PMACC_ALIGN(cachedE, DataBox>); + PMACC_ALIGN(cachedB, DataBox>); + + public: + /* host constructor initializing member : random number generator */ + ADK_Impl(const uint32_t currentStep) : randomGen(RNGFactory::createRandom()) + { + DataConnector& dc = Environment<>::get().DataConnector(); + /* initialize pointers on host-side E-(B-)field and current density databoxes */ + auto fieldE = dc.get(FieldE::getName(), true); + auto fieldB = dc.get(FieldB::getName(), true); + auto fieldJ = dc.get(FieldJ::getName(), true); + /* initialize device-side E-(B-)field and current density databoxes */ + eBox = fieldE->getDeviceDataBox(); + bBox = fieldB->getDeviceDataBox(); + jBox = fieldJ->getDeviceDataBox(); + } + + /** cache fields used by this functor + * + * @warning this is a collective method and calls synchronize + * + * @tparam T_Acc alpaka accelerator type + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * + * @param acc alpaka accelerator + * @param blockCell relative offset (in cells) to the local domain plus the guarding cells + * @param workerCfg configuration of the worker */ - auto targetElectronClone = partOp::deselect >(childElectron); - - partOp::assign(targetElectronClone, partOp::deselect(parentIon)); - - const float_X massIon = attribute::getMass(weighting,parentIon); - const float_X massElectron = attribute::getMass(weighting,childElectron); - - const float3_X electronMomentum (parentIon[momentum_]*(massElectron/massIon)); - - childElectron[momentum_] = electronMomentum; - - /* conservation of momentum - * \todo add conservation of mass */ - parentIon[momentum_] -= electronMomentum; - - /** ionization of the ion by reducing the number of bound electrons + template + DINLINE void collectiveInit( + const T_Acc& acc, + const DataSpace& blockCell, + const T_WorkerCfg& workerCfg) + { + /* shift origin of jbox to supercell of particle */ + jBox = jBox.shift(blockCell); + + /* caching of E and B fields */ + cachedB = CachedBox::create<0, ValueType_B>(acc, BlockArea()); + cachedE = CachedBox::create<1, ValueType_E>(acc, BlockArea()); + + /* instance of nvidia assignment operator */ + nvidia::functors::Assign assign; + /* copy fields from global to shared */ + auto fieldBBlock = bBox.shift(blockCell); + ThreadCollective collective(workerCfg.getWorkerIdx()); + collective(acc, assign, cachedB, fieldBBlock); + /* copy fields from global to shared */ + auto fieldEBlock = eBox.shift(blockCell); + collective(acc, assign, cachedE, fieldEBlock); + + /* wait for shared memory to be initialized */ + cupla::__syncthreads(acc); + } + + /** Initialization function on device + * + * \brief Cache EM-fields on device + * and initialize possible prerequisites for ionization, like e.g. random number generator. * - * @warning substracting a float from a float can potentially - * create a negative boundElectrons number for the ion, - * see #1850 for details + * This function will be called inline on the device which must happen BEFORE threads diverge + * during loop execution. The reason for this is the `cupla::__syncthreads( acc )` call which is + * necessary after initializing the E-/B-field shared boxes in shared memory. */ - parentIon[boundElectrons_] -= float_X(1.); - } - - }; - -} // namespace ionization -} // namespace particles + template + DINLINE void init( + T_Acc const& acc, + const DataSpace& blockCell, + const int& linearThreadIdx, + const DataSpace& localCellOffset) + { + /* initialize random number generator with the local cell index in the simulation */ + this->randomGen.init(localCellOffset); + } + + /** Determine number of new macro electrons due to ionization + * + * \param ionFrame reference to frame of the to-be-ionized particles + * \param localIdx local (linear) index in super cell / frame + */ + template + DINLINE uint32_t numNewParticles(const T_Acc& acc, FrameType& ionFrame, int localIdx) + { + /* alias for the single macro-particle */ + auto particle = ionFrame[localIdx]; + /* particle position, used for field-to-particle interpolation */ + floatD_X pos = particle[position_]; + const int particleCellIdx = particle[localCellIdx_]; + /* multi-dim coordinate of the local cell inside the super cell */ + DataSpace localCell( + DataSpaceOperations::template map(particleCellIdx)); + /* interpolation of E- */ + const picongpu::traits::FieldPosition fieldPosE; + ValueType_E eField + = Field2ParticleInterpolation()(cachedE.shift(localCell).toCursor(), pos, fieldPosE()); + /* and B-field on the particle position */ + const picongpu::traits::FieldPosition fieldPosB; + ValueType_B bField + = Field2ParticleInterpolation()(cachedB.shift(localCell).toCursor(), pos, fieldPosB()); + + /* define number of bound macro electrons before ionization */ + float_X prevBoundElectrons = particle[boundElectrons_]; + + IonizationAlgorithm ionizeAlgo; + /* determine number of new macro electrons to be created and energy used for ionization */ + auto retValue = ionizeAlgo(bField, eField, particle, this->randomGen(acc)); + IonizationCurrent{}( + retValue, + particle[weighting_], + jBox.shift(localCell), + eField, + acc, + pos); + + return retValue.newMacroElectrons; + } + + /* Functor implementation + * + * Ionization model specific particle creation + * + * \tparam T_parentIon type of ion species that is being ionized + * \tparam T_childElectron type of electron species that is created + * \param parentIon ion instance that is ionized + * \param childElectron electron instance that is created + */ + template + DINLINE void operator()(const T_Acc& acc, T_parentIon& parentIon, T_childElectron& childElectron) + { + /* for not mixing operations::assign up with the nvidia functor assign */ + namespace partOp = pmacc::particles::operations; + /* each thread sets the multiMask hard on "particle" (=1) */ + childElectron[multiMask_] = 1u; + const float_X weighting = parentIon[weighting_]; + + /* each thread initializes a clone of the parent ion but leaving out + * some attributes: + * - multiMask: reading from global memory takes longer than just setting it again explicitly + * - momentum: because the electron would get a higher energy because of the ion mass + * - boundElectrons: because species other than ions or atoms do not have them + * (gets AUTOMATICALLY deselected because electrons do not have this attribute) + */ + auto targetElectronClone = partOp::deselect>(childElectron); + + partOp::assign(targetElectronClone, partOp::deselect(parentIon)); + + const float_X massIon = attribute::getMass(weighting, parentIon); + const float_X massElectron = attribute::getMass(weighting, childElectron); + + const float3_X electronMomentum(parentIon[momentum_] * (massElectron / massIon)); + + childElectron[momentum_] = electronMomentum; + + /* conservation of momentum + * \todo add conservation of mass */ + parentIon[momentum_] -= electronMomentum; + + /** ionization of the ion by reducing the number of bound electrons + * + * @warning substracting a float from a float can potentially + * create a negative boundElectrons number for the ion, + * see #1850 for details + */ + parentIon[boundElectrons_] -= float_X(1.); + } + }; + + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/ADK/AlgorithmADK.hpp b/include/picongpu/particles/ionization/byField/ADK/AlgorithmADK.hpp index a6ed85592f..663e1357bf 100644 --- a/include/picongpu/particles/ionization/byField/ADK/AlgorithmADK.hpp +++ b/include/picongpu/particles/ionization/byField/ADK/AlgorithmADK.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten, Jakob Trojok * * This file is part of PIConGPU. * @@ -27,6 +27,7 @@ #include #include #include "picongpu/particles/ionization/utilities.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/IonizerReturn.hpp" /** \file AlgorithmADK.hpp * @@ -36,110 +37,103 @@ * states by decreasing the number of bound electrons * - is called with the IONIZATION MODEL, specifically by setting the flag in * speciesDefinition.param -*/ + */ namespace picongpu { -namespace particles -{ -namespace ionization -{ - - /** Calculation for the Ammosov-Delone-Krainov tunneling model - * - * for either linear or circular laser polarization - * - * \tparam T_linPol boolean value that is true for lin. pol. and false for circ. pol. - */ - template - struct AlgorithmADK + namespace particles { - /** Functor implementation - * \tparam EType type of electric field - * \tparam BType type of magnetic field - * \tparam ParticleType type of particle to be ionized - * - * \param bField magnetic field value at t=0 - * \param eField electric field value at t=0 - * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 - * \param randNr random number, equally distributed in range [0.:1.0] - * - * \return number of new macro electrons to be created - */ - template - HDINLINE uint32_t - operator()( const BType bField, const EType eField, ParticleType& parentIon, float_X randNr ) + namespace ionization { - - float_X const protonNumber = GetAtomicNumbers::type::numberOfProtons; - float_X const chargeState = attribute::getChargeState(parentIon); - - /* verify that ion is not completely ionized */ - if( chargeState < protonNumber ) + /** Calculation for the Ammosov-Delone-Krainov tunneling model + * + * for either linear or circular laser polarization + * + * \tparam T_linPol boolean value that is true for lin. pol. and false for circ. pol. + */ + template + struct AlgorithmADK { - uint32_t const cs = math::float2int_rd(chargeState); - float_X const iEnergy = typename GetIonizationEnergies::type{ }[cs]; - - constexpr float_X pi = pmacc::algorithms::math::Pi< float_X >::value; - /* electric field in atomic units - only absolute value */ - float_X const eInAU = math::abs( eField ) / ATOMIC_UNIT_EFIELD; - - /* the charge that attracts the electron that is to be ionized: - * equals `protonNumber - #allInnerElectrons` - */ - float_X const effectiveCharge = chargeState + float_X( 1.0 ); - /* effective principal quantum number (unitless) */ - float_X const nEff = effectiveCharge / math::sqrt( float_X( 2.0 ) * iEnergy ); - /* nameless variable for convenience dFromADK*/ - float_X const dBase = float_X( 4.0 ) * util::cube( effectiveCharge ) / - ( eInAU * util::quad( nEff ) ) ; - float_X const dFromADK = math::pow( dBase, nEff ); - - /* ionization rate (for CIRCULAR polarization)*/ - float_X rateADK = eInAU * util::square( dFromADK ) / - ( float_X( 8.0 ) * pi * effectiveCharge ) * - math::exp( float_X( -2.0 ) * util::cube( effectiveCharge ) / - ( float_X( 3.0 ) * util::cube( nEff ) * eInAU ) - ); - - /* in case of linear polarization the rate is modified by an additional factor */ - if( T_linPol ) - { - /* factor from averaging over one laser cycle with LINEAR polarization */ - float_X const polarizationFactor = math::sqrt( - float_X( 3.0 ) * util::cube( nEff ) * eInAU / - ( pi * util::cube( effectiveCharge ) ) - ); - - rateADK *= polarizationFactor; - } - - /* simulation time step in atomic units */ - float_X const timeStepAU = float_X( DELTA_T / ATOMIC_UNIT_TIME ); - /* ionization probability + /** Functor implementation + * \tparam EType type of electric field + * \tparam BType type of magnetic field + * \tparam ParticleType type of particle to be ionized * - * probability = rate * time step - * --> for infinitesimal time steps + * \param bField magnetic field value at t=0 + * \param eField electric field value at t=0 + * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 + * \param randNr random number, equally distributed in range [0.:1.0] * - * the whole ensemble should then follow - * P = 1 - exp(-rate * time step) if the laser wavelength is - * sampled well enough + * \return ionization energy and number of new macro electrons to be created */ - float_X const probADK = rateADK * timeStepAU; - - /* ionization condition */ - if( randNr < probADK ) + template + HDINLINE IonizerReturn + operator()(const BType bField, const EType eField, ParticleType& parentIon, float_X randNr) { - /* return number of macro electrons to produce */ - return 1u; + float_X const protonNumber = GetAtomicNumbers::type::numberOfProtons; + float_X const chargeState = attribute::getChargeState(parentIon); + + /* verify that ion is not completely ionized */ + if(chargeState < protonNumber) + { + uint32_t const cs = pmacc::math::float2int_rd(chargeState); + float_X const iEnergy = typename GetIonizationEnergies::type{}[cs]; + + constexpr float_X pi = pmacc::math::Pi::value; + /* electric field in atomic units - only absolute value */ + float_X const eInAU = math::abs(eField) / ATOMIC_UNIT_EFIELD; + + /* the charge that attracts the electron that is to be ionized: + * equals `protonNumber - #allInnerElectrons` + */ + float_X const effectiveCharge = chargeState + float_X(1.0); + /* effective principal quantum number (unitless) */ + float_X const nEff = effectiveCharge / math::sqrt(float_X(2.0) * iEnergy); + /* nameless variable for convenience dFromADK*/ + float_X const dBase = float_X(4.0) * util::cube(effectiveCharge) / (eInAU * util::quad(nEff)); + float_X const dFromADK = math::pow(dBase, nEff); + + /* ionization rate (for CIRCULAR polarization)*/ + float_X rateADK = eInAU * util::square(dFromADK) / (float_X(8.0) * pi * effectiveCharge) + * math::exp(float_X(-2.0) * util::cube(effectiveCharge) + / (float_X(3.0) * util::cube(nEff) * eInAU)); + + /* in case of linear polarization the rate is modified by an additional factor */ + if(T_linPol) + { + /* factor from averaging over one laser cycle with LINEAR polarization */ + float_X const polarizationFactor = math::sqrt( + float_X(3.0) * util::cube(nEff) * eInAU / (pi * util::cube(effectiveCharge))); + + rateADK *= polarizationFactor; + } + + /* simulation time step in atomic units */ + float_X const timeStepAU = float_X(DELTA_T / ATOMIC_UNIT_TIME); + /* ionization probability + * + * probability = rate * time step + * --> for infinitesimal time steps + * + * the whole ensemble should then follow + * P = 1 - exp(-rate * time step) if the laser wavelength is + * sampled well enough + */ + float_X const probADK = rateADK * timeStepAU; + + /* ionization condition */ + if(randNr < probADK) + { + /* return ionization energy and number of macro electrons to produce */ + return IonizerReturn{iEnergy, 1u}; + } + } + /* no ionization */ + return IonizerReturn{0.0, 0u}; } - } - /* no ionization */ - return 0u; - } - }; + }; -} // namespace ionization -} // namespace particles + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSI.hpp b/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSI.hpp index 0192943bf3..264d8d3803 100644 --- a/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSI.hpp +++ b/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSI.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Marco Garten +/* Copyright 2014-2021 Marco Garten, Jakob Trojok * * This file is part of PIConGPU. * @@ -23,6 +23,7 @@ #include "picongpu/particles/traits/GetIonizationEnergies.hpp" #include "picongpu/particles/traits/GetAtomicNumbers.hpp" #include "picongpu/traits/attribute/GetChargeState.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/IonizerReturn.hpp" /** @file AlgorithmBSI.hpp * @@ -35,61 +36,56 @@ namespace picongpu { -namespace particles -{ -namespace ionization -{ - - /** Calculation for the Barrier Suppression Ionization model - */ - struct AlgorithmBSI + namespace particles { - - /** Functor implementation - * - * \tparam EType type of electric field - * \tparam ParticleType type of particle to be ionized - * - * \param eField electric field value at t=0 - * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 - * - * and "t" being with respect to the current time step (on step/half a step backward/-""-forward) - * - * \return the number of electrons to produce - * (current implementation supports only 0 or 1 per execution) - */ - template - HDINLINE uint32_t - operator()( const EType eField, ParticleType& parentIon ) + namespace ionization { - - float_X const protonNumber = GetAtomicNumbers::type::numberOfProtons; - float_X const chargeState = attribute::getChargeState(parentIon); - - /* verify that ion is not completely ionized */ - if (chargeState < protonNumber) + /** Calculation for the Barrier Suppression Ionization model + */ + struct AlgorithmBSI { - uint32_t const cs = math::float2int_rd(chargeState); - /* ionization potential in atomic units */ - float_X const iEnergy = typename GetIonizationEnergies::type{ }[cs]; - /* the charge that attracts the electron that is to be ionized: - * equals `protonNumber - no. allInnerElectrons` + /** Functor implementation + * + * \tparam EType type of electric field + * \tparam ParticleType type of particle to be ionized + * + * \param eField electric field value at t=0 + * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 + * + * and "t" being with respect to the current time step (on step/half a step backward/-""-forward) + * + * \return ionization energy and number of new macro electrons to be created + * (current implementation supports only 0 or 1 per execution) */ - float_X const effectiveCharge = chargeState + float_X(1.0); - /* critical field strength in atomic units */ - float_X const critField = iEnergy*iEnergy / (float_X(4.0) * effectiveCharge); - /* ionization condition */ - if (math::abs(eField) / ATOMIC_UNIT_EFIELD >= critField) + template + HDINLINE IonizerReturn operator()(const EType eField, ParticleType& parentIon) { - /* return number of macro electrons to produce */ - return 1u; + float_X const protonNumber = GetAtomicNumbers::type::numberOfProtons; + float_X const chargeState = attribute::getChargeState(parentIon); + + /* verify that ion is not completely ionized */ + if(chargeState < protonNumber) + { + uint32_t const cs = pmacc::math::float2int_rd(chargeState); + /* ionization potential in atomic units */ + float_X const iEnergy = typename GetIonizationEnergies::type{}[cs]; + /* the charge that attracts the electron that is to be ionized: + * equals `protonNumber - no. allInnerElectrons` + */ + float_X const effectiveCharge = chargeState + float_X(1.0); + /* critical field strength in atomic units */ + float_X const critField = iEnergy * iEnergy / (float_X(4.0) * effectiveCharge); + /* ionization condition */ + if(math::abs(eField) / ATOMIC_UNIT_EFIELD >= critField) + { + /* return ionization energy and number of macro electrons to produce */ + return IonizerReturn{iEnergy, 1u}; + } + } + return IonizerReturn{0.0, 0u}; } - } - /* no ionization */ - return 0u; - } - }; + }; -} // namespace ionization -} // namespace particles + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSIEffectiveZ.hpp b/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSIEffectiveZ.hpp index 56fa3fd4d4..472e219969 100644 --- a/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSIEffectiveZ.hpp +++ b/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSIEffectiveZ.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Marco Garten +/* Copyright 2014-2021 Marco Garten, Jakob Trojok * * This file is part of PIConGPU. * @@ -24,6 +24,7 @@ #include "picongpu/particles/traits/GetAtomicNumbers.hpp" #include "picongpu/particles/traits/GetEffectiveNuclearCharge.hpp" #include "picongpu/traits/attribute/GetChargeState.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/IonizerReturn.hpp" /** @file AlgorithmBSIEffectiveZ.hpp * @@ -36,59 +37,55 @@ namespace picongpu { -namespace particles -{ -namespace ionization -{ - - /** Calculation for the Barrier Suppression Ionization model - */ - struct AlgorithmBSIEffectiveZ + namespace particles { - - /** Functor implementation - * - * \tparam EType type of electric field - * \tparam ParticleType type of particle to be ionized - * - * \param eField electric field value at t=0 - * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 - * - * and "t" being with respect to the current time step (on step/half a step backward/-""-forward) - * - * \return the number of electrons to produce - * (current implementation supports only 0 or 1 per execution) - */ - template - HDINLINE uint32_t - operator()( const EType eField, ParticleType& parentIon ) + namespace ionization { - - const float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; - float_X chargeState = attribute::getChargeState(parentIon); - - /* verify that ion is not completely ionized */ - if (chargeState < protonNumber) + /** Calculation for the Barrier Suppression Ionization model + */ + struct AlgorithmBSIEffectiveZ { - uint32_t cs = math::float2int_rd(chargeState); - /* ionization potential in atomic units */ - const float_X iEnergy = typename GetIonizationEnergies::type{ }[cs]; - const float_X ZEff = typename GetEffectiveNuclearCharge::type{ }[cs]; - /* critical field strength in atomic units */ - float_X critField = iEnergy*iEnergy / (float_X(4.0) * ZEff); - - /* ionization condition */ - if (math::abs(eField) / ATOMIC_UNIT_EFIELD >= critField) + /** Functor implementation + * + * \tparam EType type of electric field + * \tparam ParticleType type of particle to be ionized + * + * \param eField electric field value at t=0 + * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 + * + * and "t" being with respect to the current time step (on step/half a step backward/-""-forward) + * + * \return ionization energy and number of new macro electrons to be created + * (current implementation supports only 0 or 1 per execution) + */ + template + HDINLINE IonizerReturn operator()(const EType eField, ParticleType& parentIon) { - /* return number of macro electrons to produce */ - return 1u; + const float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; + float_X chargeState = attribute::getChargeState(parentIon); + + /* verify that ion is not completely ionized */ + if(chargeState < protonNumber) + { + uint32_t cs = pmacc::math::float2int_rd(chargeState); + /* ionization potential in atomic units */ + const float_X iEnergy = typename GetIonizationEnergies::type{}[cs]; + const float_X ZEff = typename GetEffectiveNuclearCharge::type{}[cs]; + /* critical field strength in atomic units */ + float_X critField = iEnergy * iEnergy / (float_X(4.0) * ZEff); + + /* ionization condition */ + if(math::abs(eField) / ATOMIC_UNIT_EFIELD >= critField) + { + /* return ionization energy and number of macro electrons to produce */ + return IonizerReturn{iEnergy, 1u}; + } + } + /* no ionization */ + return IonizerReturn{0.0, 0u}; } - } - /* no ionization */ - return 0u; - } - }; + }; -} // namespace ionization -} // namespace particles + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSIStarkShifted.hpp b/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSIStarkShifted.hpp index bf8e898648..cb5e6320ba 100644 --- a/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSIStarkShifted.hpp +++ b/include/picongpu/particles/ionization/byField/BSI/AlgorithmBSIStarkShifted.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Marco Garten +/* Copyright 2014-2021 Marco Garten, Jakob Trojok * * This file is part of PIConGPU. * @@ -23,6 +23,7 @@ #include "picongpu/particles/traits/GetIonizationEnergies.hpp" #include "picongpu/particles/traits/GetAtomicNumbers.hpp" #include "picongpu/traits/attribute/GetChargeState.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/IonizerReturn.hpp" /** @file AlgorithmBSIStarkShifted.hpp * @@ -35,58 +36,55 @@ namespace picongpu { -namespace particles -{ -namespace ionization -{ - - /** Calculation for the Barrier Suppression Ionization model - */ - struct AlgorithmBSIStarkShifted + namespace particles { - - /** Functor implementation - * - * \tparam EType type of electric field - * \tparam ParticleType type of particle to be ionized - * - * \param eField electric field value at t=0 - * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 - * - * and "t" being with respect to the current time step (on step/half a step backward/-""-forward) - * - * \return the number of electrons to produce - * (current implementation supports only 0 or 1 per execution) - */ - template - HDINLINE uint32_t - operator()( const EType eField, ParticleType& parentIon ) + namespace ionization { - - const float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; - float_X chargeState = attribute::getChargeState(parentIon); - - /* verify that ion is not completely ionized */ - if (chargeState < protonNumber) + /** Calculation for the Barrier Suppression Ionization model + */ + struct AlgorithmBSIStarkShifted { - uint32_t cs = math::float2int_rd(chargeState); - /* ionization potential in atomic units */ - const float_X iEnergy = typename GetIonizationEnergies::type{ }[cs]; - /* critical field strength in atomic units */ - float_X critField = (math::sqrt(float_X(2.))-float_X(1.)) * math::pow(iEnergy,float_X(3./2.)); - - /* ionization condition */ - if (math::abs(eField) / ATOMIC_UNIT_EFIELD >= critField) + /** Functor implementation + * + * \tparam EType type of electric field + * \tparam ParticleType type of particle to be ionized + * + * \param eField electric field value at t=0 + * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 + * + * and "t" being with respect to the current time step (on step/half a step backward/-""-forward) + * + * \return ionization energy and number of new macro electrons to be created + * (current implementation supports only 0 or 1 per execution) + */ + template + HDINLINE IonizerReturn operator()(const EType eField, ParticleType& parentIon) { - /* return number of electrons to produce */ - return 1u; + const float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; + float_X chargeState = attribute::getChargeState(parentIon); + + /* verify that ion is not completely ionized */ + if(chargeState < protonNumber) + { + uint32_t cs = pmacc::math::float2int_rd(chargeState); + /* ionization potential in atomic units */ + const float_X iEnergy = typename GetIonizationEnergies::type{}[cs]; + /* critical field strength in atomic units */ + float_X critField + = (math::sqrt(float_X(2.)) - float_X(1.)) * math::pow(iEnergy, float_X(3. / 2.)); + + /* ionization condition */ + if(math::abs(eField) / ATOMIC_UNIT_EFIELD >= critField) + { + /* return ionization energy number of electrons to produce */ + return IonizerReturn{iEnergy, 1u}; + } + } + /* no ionization */ + return IonizerReturn{0.0, 0u}; } - } - /* no ionization */ - return 0u; - } - }; + }; -} // namespace ionization -} // namespace particles + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/BSI/BSI.def b/include/picongpu/particles/ionization/byField/BSI/BSI.def index 5a2373b673..bc62925c8d 100644 --- a/include/picongpu/particles/ionization/byField/BSI/BSI.def +++ b/include/picongpu/particles/ionization/byField/BSI/BSI.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten, Jakob Trojok * * This file is part of PIConGPU. * @@ -20,88 +20,97 @@ #pragma once #include +#include "picongpu/particles/ionization/byField/IonizationCurrent/IonizationCurrent.def" namespace picongpu { -namespace particles -{ -namespace ionization -{ - /** Barrier Suppression Ionization - Implementation - * - * \tparam T_DestSpecies electron species to be created - * \tparam T_SrcSpecies particle species that is ionized - * default is boost::mpl placeholder because specialization - * cannot be known in list of particle species' flags - * \see speciesDefinition.param - */ - template< typename T_IonizationAlgorithm, typename T_DestSpecies, typename T_SrcSpecies = bmpl::_1 > - struct BSI_Impl; - - /** Barrier Suppression Ionization - Hydrogen-Like - * - * - takes the ionization energies of the various charge states of ions - * - calculates the corresponding field strengths necessary to overcome - * the binding energy of the electron to the core - * - if the field strength is locally exceeded: increase the charge state - * - see for example: Delone, N. B.; Krainov, V. P. (1998). - * "Tunneling and barrier-suppression ionization of atoms and ions in a laser radiation field" - * doi:10.1070/PU1998v041n05ABEH000393 - * - * - This model accounts for naive ion charge shielding by inner electrons - * as it assumes that the charge the electron 'feels' is equal to - * `proton number - number of inner shell electrons`. - * - This model neglects the Stark upshift of ionization energies. - * - * \tparam T_DestSpecies electron species to be created - * - * wrapper class, - * needed because the SrcSpecies cannot be known during the - * first specialization of the ionization model in the particle definition - * \see speciesDefinition.param - */ - template< typename T_DestSpecies > - struct BSI + namespace particles { - using IonizationAlgorithm = particles::ionization::AlgorithmBSI; - using type = BSI_Impl< IonizationAlgorithm, T_DestSpecies >; - }; + namespace ionization + { + /** Barrier Suppression Ionization - Implementation + * + * \tparam T_DestSpecies electron species to be created + * \tparam T_IonizationCurrent select type of ionization current (None or EnergyConservation) + * \tparam T_SrcSpecies particle species that is ionized + * default is boost::mpl placeholder because specialization + * cannot be known in list of particle species' flags + * \see speciesDefinition.param + */ + template< + typename T_IonizationAlgorithm, + typename T_DestSpecies, + typename T_IonizationCurrent, + typename T_SrcSpecies = bmpl::_1> + struct BSI_Impl; - /** Barrier Suppression Ionization - Effective Atomic Numbers - * - * - similar to BSI - * - * - tries to account for electron shielding by issuing a lookup table of - * effective atomic numbers for each filled electron shell @see ionizer.param - * - unvalidated and still in development - * - * \tparam T_DestSpecies electron species to be created - */ - template< typename T_DestSpecies > - struct BSIEffectiveZ - { - using IonizationAlgorithm = particles::ionization::AlgorithmBSIEffectiveZ; - using type = BSI_Impl< IonizationAlgorithm, T_DestSpecies >; - }; + /** Barrier Suppression Ionization - Hydrogen-Like + * + * - takes the ionization energies of the various charge states of ions + * - calculates the corresponding field strengths necessary to overcome + * the binding energy of the electron to the core + * - if the field strength is locally exceeded: increase the charge state + * - see for example: Delone, N. B.; Krainov, V. P. (1998). + * "Tunneling and barrier-suppression ionization of atoms and ions in a laser radiation field" + * doi:10.1070/PU1998v041n05ABEH000393 + * + * - This model accounts for naive ion charge shielding by inner electrons + * as it assumes that the charge the electron 'feels' is equal to + * `proton number - number of inner shell electrons`. + * - This model neglects the Stark upshift of ionization energies. + * + * \tparam T_DestSpecies electron species to be created + * \tparam T_IonizationCurrent select type of ionization current (None or EnergyConservation) + * + * wrapper class, + * needed because the SrcSpecies cannot be known during the + * first specialization of the ionization model in the particle definition + * \see speciesDefinition.param + */ + template + struct BSI + { + using IonizationAlgorithm = particles::ionization::AlgorithmBSI; + using type = BSI_Impl; + }; - /** Barrier Suppression Ionization - Ion. energies Stark-upshifted - * - * - similar to BSI - * - * - developed by Bauer and Mulser (book: High Power Laser Matter Interaction) - * - accounts for stark upshift of ionization energy but only covers the - * hydrogenlike ions originally - * - \todo needs to be extrapolated to arbitrary ions - * - * \tparam T_DestSpecies electron species to be created - */ - template< typename T_DestSpecies > - struct BSIStarkShifted - { - using IonizationAlgorithm = particles::ionization::AlgorithmBSIStarkShifted; - using type = BSI_Impl< IonizationAlgorithm, T_DestSpecies >; - }; + /** Barrier Suppression Ionization - Effective Atomic Numbers + * + * - similar to BSI + * + * - tries to account for electron shielding by issuing a lookup table of + * effective atomic numbers for each filled electron shell @see ionizer.param + * - unvalidated and still in development + * + * \tparam T_DestSpecies electron species to be created + * \tparam T_IonizationCurrent select type of ionization current (None or EnergyConservation) + */ + template + struct BSIEffectiveZ + { + using IonizationAlgorithm = particles::ionization::AlgorithmBSIEffectiveZ; + using type = BSI_Impl; + }; + + /** Barrier Suppression Ionization - Ion. energies Stark-upshifted + * + * - similar to BSI + * + * - developed by Bauer and Mulser (book: High Power Laser Matter Interaction) + * - accounts for stark upshift of ionization energy but only covers the + * hydrogenlike ions originally + * - \todo needs to be extrapolated to arbitrary ions + * + * \tparam T_DestSpecies electron species to be created + * \tparam T_IonizationCurrent select type of ionization current (None or EnergyConservation) + */ + template + struct BSIStarkShifted + { + using IonizationAlgorithm = particles::ionization::AlgorithmBSIStarkShifted; + using type = BSI_Impl; + }; -} // namespace ionization -} // namespace particles + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp b/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp index 3afeed6d0d..5c97a514d1 100644 --- a/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp +++ b/include/picongpu/particles/ionization/byField/BSI/BSI_Impl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten, Jakob Trojok * * This file is part of PIConGPU. * @@ -24,11 +24,13 @@ #include "picongpu/fields/CellType.hpp" #include "picongpu/fields/FieldB.hpp" #include "picongpu/fields/FieldE.hpp" +#include "picongpu/fields/FieldJ.hpp" #include "picongpu/traits/FieldPosition.hpp" #include "picongpu/particles/ionization/byField/BSI/BSI.def" #include "picongpu/particles/ionization/byField/BSI/AlgorithmBSI.hpp" #include "picongpu/particles/ionization/byField/BSI/AlgorithmBSIEffectiveZ.hpp" #include "picongpu/particles/ionization/byField/BSI/AlgorithmBSIStarkShifted.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/IonizationCurrent.hpp" #include "picongpu/particles/ParticlesFunctors.hpp" @@ -43,240 +45,218 @@ namespace picongpu { -namespace particles -{ -namespace ionization -{ - - /** \struct BSI_Impl - * - * \brief Barrier Suppression Ionization - Implementation - * - * \tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created - * \tparam T_SrcSpecies type or name as boost::mpl::string of the particle species that is ionized - */ - template - struct BSI_Impl + namespace particles { - - using DestSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_DestSpecies - >; - using SrcSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SrcSpecies - >; - - using FrameType = typename SrcSpecies::FrameType; - - /* specify field to particle interpolation scheme */ - using Field2ParticleInterpolation = typename pmacc::traits::Resolve< - typename GetFlagType >::type - >::type; - - /* margins around the supercell for the interpolation of the field on the cells */ - using LowerMargin = typename GetMargin::LowerMargin; - using UpperMargin = typename GetMargin::UpperMargin; - - /* relevant area of a block */ - using BlockArea = SuperCellDescription< - typename MappingDesc::SuperCellSize, - LowerMargin, - UpperMargin - >; - - BlockArea BlockDescription; - - private: - - /* define ionization ALGORITHM (calculation) for ionization MODEL */ - using IonizationAlgorithm = T_IonizationAlgorithm; - - using TVec = MappingDesc::SuperCellSize; - - using ValueType_E = FieldE::ValueType; - /* global memory EM-field device databoxes */ - FieldE::DataBoxType eBox; - /* shared memory EM-field device databoxes */ - PMACC_ALIGN(cachedE, DataBox >); - - public: - /* host constructor */ - BSI_Impl(const uint32_t currentStep) - { - DataConnector &dc = Environment<>::get().DataConnector(); - /* initialize pointers on host-side E-(B-)field databoxes */ - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); - /* initialize device-side E-(B-)field databoxes */ - eBox = fieldE->getDeviceDataBox(); - - } - - /** cache fields used by this functor + namespace ionization + { + /** \struct BSI_Impl * - * @warning this is a collective method and calls synchronize + * \brief Barrier Suppression Ionization - Implementation * - * @tparam T_Acc alpaka accelerator type - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * - * @param acc alpaka accelerator - * @param blockCell relative offset (in cells) to the local domain plus the guarding cells - * @param workerCfg configuration of the worker + * \tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created + * \tparam T_IonizationCurrent select type of ionization current (None or EnergyConservation) + * \tparam T_SrcSpecies type or name as boost::mpl::string of the particle species that is ionized */ template< - typename T_Acc , - typename T_WorkerCfg - > - DINLINE void collectiveInit( - const T_Acc & acc, - const DataSpace& blockCell, - const T_WorkerCfg & workerCfg - ) + typename T_IonizationAlgorithm, + typename T_DestSpecies, + typename T_IonizationCurrent, + typename T_SrcSpecies> + struct BSI_Impl { - - /* caching of E field */ - cachedE = CachedBox::create< - 1, - ValueType_E - >( - acc, - BlockArea() - ); - - /* instance of nvidia assignment operator */ - nvidia::functors::Assign assign; - - ThreadCollective< - BlockArea, - T_WorkerCfg::numWorkers - > collective( workerCfg.getWorkerIdx( ) ); - /* copy fields from global to shared */ - auto fieldEBlock = eBox.shift(blockCell); - collective( - acc, - assign, - cachedE, - fieldEBlock - ); - - /* wait for shared memory to be initialized */ - __syncthreads(); - } - - /** Initialization function on device - * - * \brief Cache EM-fields on device - * and initialize possible prerequisites for ionization, like e.g. random number generator. - * - * This function will be called inline on the device which must happen BEFORE threads diverge - * during loop execution. The reason for this is the `__syncthreads()` call which is necessary after - * initializing the E-/B-field shared boxes in shared memory. - * - * @param blockCell Offset of the cell from the origin of the local domain - * including guarding supercells in units of cells - * @param linearThreadIdx Linearized thread ID inside the block - * @param localCellOffset Offset of the cell from the origin of the local - * domain, i.e. from the @see BORDER - * without guarding supercells - */ - template< typename T_Acc > - DINLINE void init( - T_Acc const & acc, - const DataSpace& blockCell, - const int& linearThreadIdx, - const DataSpace& localCellOffset - ) - { - } - - /** Determine number of new macro electrons due to ionization - * - * \param ionFrame reference to frame of the to-be-ionized particles - * \param localIdx local (linear) index in super cell / frame - */ - template< typename T_Acc > - DINLINE uint32_t numNewParticles(T_Acc const & acc, FrameType& ionFrame, int localIdx) - { - /* alias for the single macro-particle */ - auto particle = ionFrame[localIdx]; - /* particle position, used for field-to-particle interpolation */ - floatD_X pos = particle[position_]; - const int particleCellIdx = particle[localCellIdx_]; - /* multi-dim coordinate of the local cell inside the super cell */ - DataSpace localCell(DataSpaceOperations::template map (particleCellIdx)); - /* interpolation of E */ - const picongpu::traits::FieldPosition fieldPosE; - ValueType_E eField = Field2ParticleInterpolation() - (cachedE.shift(localCell).toCursor(), pos, fieldPosE()); - - /* define number of bound macro electrons before ionization */ - float_X prevBoundElectrons = particle[boundElectrons_]; - - /* this is the point where actual ionization takes place */ - IonizationAlgorithm ionizeAlgo; - /* determine number of new macro electrons to be created */ - uint32_t newMacroElectrons = ionizeAlgo( - eField, - particle - ); - - return newMacroElectrons; - - } - - /* Functor implementation - * - * Ionization model specific particle creation - * - * \tparam T_parentIon type of ion species that is being ionized - * \tparam T_childElectron type of electron species that is created - * \param parentIon ion instance that is ionized - * \param childElectron electron instance that is created - */ - template - DINLINE void operator()(T_Acc const & acc, T_parentIon& parentIon,T_childElectron& childElectron) - { - - /* for not mixing operations::assign up with the nvidia functor assign */ - namespace partOp = pmacc::particles::operations; - /* each thread sets the multiMask hard on "particle" (=1) */ - childElectron[multiMask_] = 1u; - const float_X weighting = parentIon[weighting_]; - - /* each thread initializes a clone of the parent ion but leaving out - * some attributes: - * - multiMask: reading from global memory takes longer than just setting it again explicitly - * - momentum: because the electron would get a higher energy because of the ion mass - * - boundElectrons: because species other than ions or atoms do not have them - * (gets AUTOMATICALLY deselected because electrons do not have this attribute) + using DestSpecies = pmacc::particles::meta::FindByNameOrType_t; + using SrcSpecies = pmacc::particles::meta::FindByNameOrType_t; + + using FrameType = typename SrcSpecies::FrameType; + + /* specify field to particle interpolation scheme */ + using Field2ParticleInterpolation = + typename pmacc::traits::Resolve>::type>::type; + + /* margins around the supercell for the interpolation of the field on the cells */ + using LowerMargin = typename GetMargin::LowerMargin; + using UpperMargin = typename GetMargin::UpperMargin; + + /* relevant area of a block */ + using BlockArea = SuperCellDescription; + + BlockArea BlockDescription; + + private: + /* define ionization ALGORITHM (calculation) for ionization MODEL */ + using IonizationAlgorithm = T_IonizationAlgorithm; + + using TVec = MappingDesc::SuperCellSize; + + using ValueType_E = FieldE::ValueType; + /* global memory E-field and current density device databoxes */ + FieldE::DataBoxType eBox; + FieldJ::DataBoxType jBox; + /* shared memory EM-field device databoxes */ + PMACC_ALIGN(cachedE, DataBox>); + + public: + /* host constructor */ + BSI_Impl(const uint32_t currentStep) + { + DataConnector& dc = Environment<>::get().DataConnector(); + /* initialize pointers on host-side E-field and current density databoxes */ + auto fieldE = dc.get(FieldE::getName(), true); + auto fieldJ = dc.get(FieldJ::getName(), true); + /* initialize device-side E-(J-)field databoxes */ + eBox = fieldE->getDeviceDataBox(); + jBox = fieldJ->getDeviceDataBox(); + } + + /** cache fields used by this functor + * + * @warning this is a collective method and calls synchronize + * + * @tparam T_Acc alpaka accelerator type + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * + * @param acc alpaka accelerator + * @param blockCell relative offset (in cells) to the local domain plus the guarding cells + * @param workerCfg configuration of the worker */ - auto targetElectronClone = partOp::deselect >(childElectron); - - partOp::assign(targetElectronClone, partOp::deselect(parentIon)); - - const float_X massIon = attribute::getMass(weighting,parentIon); - const float_X massElectron = attribute::getMass(weighting,childElectron); - - const float3_X electronMomentum (parentIon[momentum_]*(massElectron/massIon)); - - childElectron[momentum_] = electronMomentum; - - /* conservation of momentum - * \todo add conservation of mass */ - parentIon[momentum_] -= electronMomentum; - - /** ionization of the ion by reducing the number of bound electrons + template + DINLINE void collectiveInit( + const T_Acc& acc, + const DataSpace& blockCell, + const T_WorkerCfg& workerCfg) + { + /* shift origin of jbox to supercell of particle */ + jBox = jBox.shift(blockCell); + + /* caching of E field */ + cachedE = CachedBox::create<1, ValueType_E>(acc, BlockArea()); + + /* instance of nvidia assignment operator */ + nvidia::functors::Assign assign; + + ThreadCollective collective(workerCfg.getWorkerIdx()); + /* copy fields from global to shared */ + auto fieldEBlock = eBox.shift(blockCell); + collective(acc, assign, cachedE, fieldEBlock); + + /* wait for shared memory to be initialized */ + cupla::__syncthreads(acc); + } + + /** Initialization function on device * - * @warning substracting a float from a float can potentially - * create a negative boundElectrons number for the ion, - * see #1850 for details + * \brief Cache EM-fields on device + * and initialize possible prerequisites for ionization, like e.g. random number generator. + * + * This function will be called inline on the device which must happen BEFORE threads diverge + * during loop execution. The reason for this is the `cupla::__syncthreads( acc )` call which is + * necessary after initializing the E-/B-field shared boxes in shared memory. + * + * @param blockCell Offset of the cell from the origin of the local domain + * including guarding supercells in units of cells + * @param linearThreadIdx Linearized thread ID inside the block + * @param localCellOffset Offset of the cell from the origin of the local + * domain, i.e. from the @see BORDER + * without guarding supercells */ - parentIon[boundElectrons_] -= float_X(1.); - } - - }; - -} // namespace ionization -} // namespace particles + template + DINLINE void init( + T_Acc const& acc, + const DataSpace& blockCell, + const int& linearThreadIdx, + const DataSpace& localCellOffset) + { + } + + /** Determine number of new macro electrons due to ionization + * + * \param ionFrame reference to frame of the to-be-ionized particles + * \param localIdx local (linear) index in super cell / frame + */ + template + DINLINE uint32_t numNewParticles(T_Acc const& acc, FrameType& ionFrame, int localIdx) + { + /* alias for the single macro-particle */ + auto particle = ionFrame[localIdx]; + /* particle position, used for field-to-particle interpolation */ + floatD_X pos = particle[position_]; + const int particleCellIdx = particle[localCellIdx_]; + /* multi-dim coordinate of the local cell inside the super cell */ + DataSpace localCell( + DataSpaceOperations::template map(particleCellIdx)); + /* interpolation of E */ + const picongpu::traits::FieldPosition fieldPosE; + ValueType_E eField + = Field2ParticleInterpolation()(cachedE.shift(localCell).toCursor(), pos, fieldPosE()); + + /* define number of bound macro electrons before ionization */ + float_X prevBoundElectrons = particle[boundElectrons_]; + + /* this is the point where actual ionization takes place */ + IonizationAlgorithm ionizeAlgo{}; + auto retValue = ionizeAlgo(eField, particle); + /* determine number of new macro electrons to be created and calculate ionization current */ + IonizationCurrent{}( + retValue, + particle[weighting_], + jBox.shift(localCell), + eField, + acc, + pos); + + return retValue.newMacroElectrons; + } + + /* Functor implementation + * + * Ionization model specific particle creation + * + * \tparam T_parentIon type of ion species that is being ionized + * \tparam T_childElectron type of electron species that is created + * \param parentIon ion instance that is ionized + * \param childElectron electron instance that is created + */ + template + DINLINE void operator()(T_Acc const& acc, T_parentIon& parentIon, T_childElectron& childElectron) + { + /* for not mixing operations::assign up with the nvidia functor assign */ + namespace partOp = pmacc::particles::operations; + /* each thread sets the multiMask hard on "particle" (=1) */ + childElectron[multiMask_] = 1u; + const float_X weighting = parentIon[weighting_]; + + /* each thread initializes a clone of the parent ion but leaving out + * some attributes: + * - multiMask: reading from global memory takes longer than just setting it again explicitly + * - momentum: because the electron would get a higher energy because of the ion mass + * - boundElectrons: because species other than ions or atoms do not have them + * (gets AUTOMATICALLY deselected because electrons do not have this attribute) + */ + auto targetElectronClone = partOp::deselect>(childElectron); + + partOp::assign(targetElectronClone, partOp::deselect(parentIon)); + + const float_X massIon = attribute::getMass(weighting, parentIon); + const float_X massElectron = attribute::getMass(weighting, childElectron); + + const float3_X electronMomentum(parentIon[momentum_] * (massElectron / massIon)); + + childElectron[momentum_] = electronMomentum; + + /* conservation of momentum + * \todo add conservation of mass */ + parentIon[momentum_] -= electronMomentum; + + /** ionization of the ion by reducing the number of bound electrons + * + * @warning substracting a float from a float can potentially + * create a negative boundElectrons number for the ion, + * see #1850 for details + */ + parentIon[boundElectrons_] -= float_X(1.); + } + }; + + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/IonizationCurrent/IonizationCurrent.def b/include/picongpu/particles/ionization/byField/IonizationCurrent/IonizationCurrent.def new file mode 100644 index 0000000000..2db8199078 --- /dev/null +++ b/include/picongpu/particles/ionization/byField/IonizationCurrent/IonizationCurrent.def @@ -0,0 +1,55 @@ +/* Copyright 2020-2021 Jakob Trojok + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +namespace picongpu +{ + namespace particles + { + namespace ionization + { + namespace current + { + /** possible inputs for T_IonizationCurrent + * EnergyConservation -> with ionization current + * None -> without + */ + struct EnergyConservation; + struct None; + } // namespace current + /** Implementation of Ionization Current + * + * In order to conserve energy, PIConGPU supports an ionization current + * to decrease the electric field according to the amount of energy lost to field ioniztion processes. + * + * Reference: P. Mulser et al. + * Modeling field ionization in an energy conserving form and resulting nonstandard fluid + * dynamcis, Physics of Plasmas 5, 4466 (1998) https://doi.org/10.1063/1.873184 + * + * \tparam T_Acc alpaka accelerator type + * \tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created + * \tparam T_Dim dimension of simulation + * \tparam T_IonizationCurrent select type of ionization current (None or EnergyConservation) + */ + template + struct IonizationCurrent; + } // namespace ionization + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/IonizationCurrent/IonizationCurrent.hpp b/include/picongpu/particles/ionization/byField/IonizationCurrent/IonizationCurrent.hpp new file mode 100644 index 0000000000..78b3e3c8d4 --- /dev/null +++ b/include/picongpu/particles/ionization/byField/IonizationCurrent/IonizationCurrent.hpp @@ -0,0 +1,98 @@ +/* Copyright 2020-2021 Jakob Trojok + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/particles/ParticlesFunctors.hpp" +#include "picongpu/fields/FieldE.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/IonizerReturn.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/JIonizationCalc.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/JIonizationAssignment.hpp" + + +namespace picongpu +{ + namespace particles + { + namespace ionization + { + /**@{*/ + /** Implementation of actual ionization current + * + * \tparam T_Acc alpaka accelerator type + * \tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created + * \tparam T_Dim dimension of simulation + */ + template + struct IonizationCurrent + { + using ValueType_E = FieldE::ValueType; + + /** Ionization current routine + * + * \tparam T_JBox type of current density data box + */ + template + HDINLINE void operator()( + IonizerReturn retValue, + float_X const weighting, + T_JBox jBoxPar, + ValueType_E eField, + T_Acc const& acc, + floatD_X const pos) + { + /* If there is no ionization, the ionization energy is zero. In that case, there is no need for an + * ionization current. */ + if(retValue.ionizationEnergy != 0.0_X) + { + auto ionizationEnergy = weighting * retValue.ionizationEnergy * SI::ATOMIC_UNIT_ENERGY + / UNIT_ENERGY; // convert to PIConGPU units + /* calculate ionization current at particle position */ + float3_X jIonizationPar = JIonizationCalc{}(ionizationEnergy, eField); + /* assign ionization current to grid points */ + JIonizationAssignment{}(acc, jIonizationPar, pos, jBoxPar); + } + } + }; + + /** Ionization current deactivated + */ + template + struct IonizationCurrent + { + using ValueType_E = FieldE::ValueType; + + /** no ionization current + */ + template + HDINLINE void operator()( + IonizerReturn, + float_X const, + T_JBox, + ValueType_E, + T_Acc const&, + floatD_X const) + { + } + /**@}*/ + }; + } // namespace ionization + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/IonizationCurrent/IonizerReturn.hpp b/include/picongpu/particles/ionization/byField/IonizationCurrent/IonizerReturn.hpp new file mode 100644 index 0000000000..0142dfae4b --- /dev/null +++ b/include/picongpu/particles/ionization/byField/IonizationCurrent/IonizerReturn.hpp @@ -0,0 +1,39 @@ +/* Copyright 2020-2021 Jakob Trojok + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" + +namespace picongpu +{ + namespace particles + { + namespace ionization + { + /** return type for ionization algorithms + */ + struct IonizerReturn + { + float_X ionizationEnergy = 0._X; + uint32_t newMacroElectrons = 0u; + }; + } // namespace ionization + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/IonizationCurrent/JIonizationAssignment.hpp b/include/picongpu/particles/ionization/byField/IonizationCurrent/JIonizationAssignment.hpp new file mode 100644 index 0000000000..915a80cdd2 --- /dev/null +++ b/include/picongpu/particles/ionization/byField/IonizationCurrent/JIonizationAssignment.hpp @@ -0,0 +1,146 @@ +/* Copyright 2020-2021 Jakob Trojok + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/particles/ParticlesFunctors.hpp" +#include "picongpu/simulation_defines.hpp" +#include "picongpu/fields/FieldJ.hpp" + +namespace picongpu +{ + namespace particles + { + namespace ionization + { + /** defining traits for current assignment + * + * \tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created + */ + template + struct JIonizationAssignmentParent + { + using Shape = typename ::picongpu::traits::GetShape::type; + using AssignmentFunction = typename Shape::ChargeAssignmentOnSupport; + static constexpr int supp = AssignmentFunction::support; + /*(supp + 1) % 2 is 1 for even supports else 0*/ + static constexpr int begin = -supp / 2 + (supp + 1) % 2; + static constexpr int end = begin + supp; + }; + + /**@{*/ + /** implementation of current assignment + * + * \tparam T_Acc alpaka accelerator type + * \tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created + * \tparam T_Dim dimension of simulation + */ + template + struct JIonizationAssignment; + + /** 3d case + */ + template + struct JIonizationAssignment + : public JIonizationAssignmentParent + { + /** functor for assigning current to databox + * + * \tparam T_JBox type of current density data box + */ + template + HDINLINE void operator()( + T_Acc const& acc, + float3_X const jIonizationPar, + float3_X const pos, + T_JBox jBoxPar) + { + /* actual assignment */ + for(int z = JIonizationAssignmentParent::begin; + z < JIonizationAssignmentParent::end; + ++z) + { + float3_X jGridz = jIonizationPar; + jGridz *= typename JIonizationAssignmentParent::AssignmentFunction{}( + float_X(z) - pos.z()); + for(int y = JIonizationAssignmentParent::begin; + y < JIonizationAssignmentParent::end; + ++y) + { + float3_X jGridy = jGridz; + jGridy *= typename JIonizationAssignmentParent::AssignmentFunction{}( + float_X(y) - pos.y()); + for(int x = JIonizationAssignmentParent::begin; + x < JIonizationAssignmentParent::end; + ++x) + { + float3_X jGridx = jGridy; + jGridx *= typename JIonizationAssignmentParent::AssignmentFunction{}( + float_X(x) - pos.x()); + for(int i = 0; i <= 2; i++) + { + cupla::atomicAdd(acc, &(jBoxPar(DataSpace(x, y, z))[i]), jGridx[i]); + } + } + } + } + } + }; + + /** 2d case + */ + template + struct JIonizationAssignment + : public JIonizationAssignmentParent + { + /** functor for assigning current to databox + */ + template + HDINLINE void operator()( + T_Acc const& acc, + float3_X const jIonizationPar, + float2_X const pos, + T_JBox jBoxPar) + { + for(int y = JIonizationAssignmentParent::begin; + y < JIonizationAssignmentParent::end; + ++y) + { + float3_X jGridy = jIonizationPar; + jGridy *= typename JIonizationAssignmentParent::AssignmentFunction{}( + float_X(y) - pos.y()); + for(int x = JIonizationAssignmentParent::begin; + x < JIonizationAssignmentParent::end; + ++x) + { + float3_X jGridx = jGridy; + jGridx *= typename JIonizationAssignmentParent::AssignmentFunction{}( + float_X(x) - pos.x()); + for(int i = 0; i <= 2; i++) + { + cupla::atomicAdd(acc, &(jBoxPar(DataSpace(x, y))[i]), jGridx[i]); + } + } + } + } + }; + /**@}*/ + } // namespace ionization + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/IonizationCurrent/JIonizationCalc.hpp b/include/picongpu/particles/ionization/byField/IonizationCurrent/JIonizationCalc.hpp new file mode 100644 index 0000000000..9a540f7e76 --- /dev/null +++ b/include/picongpu/particles/ionization/byField/IonizationCurrent/JIonizationCalc.hpp @@ -0,0 +1,46 @@ +/* Copyright 2020-2021 Jakob Trojok + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" + +namespace picongpu +{ + namespace particles + { + namespace ionization + { + /** calculates ionization current + */ + struct JIonizationCalc + { + /** Functor calculating ionization current. + * Is only called if ionization energy is not zero, + * thus we ensure the field is different from zero. + */ + HDINLINE float3_X operator()(float_X const ionizationEnergy, float3_X const eField) + { + float3_X jion = ionizationEnergy * eField / pmacc::math::abs2(eField) / DELTA_T / CELL_VOLUME; + return jion; + } + }; + } // namespace ionization + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/Keldysh/AlgorithmKeldysh.hpp b/include/picongpu/particles/ionization/byField/Keldysh/AlgorithmKeldysh.hpp index ae847f05db..bc3f255551 100644 --- a/include/picongpu/particles/ionization/byField/Keldysh/AlgorithmKeldysh.hpp +++ b/include/picongpu/particles/ionization/byField/Keldysh/AlgorithmKeldysh.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Marco Garten +/* Copyright 2016-2021 Marco Garten, Jakob Trojok * * This file is part of PIConGPU. * @@ -27,6 +27,7 @@ #include #include #include "picongpu/particles/ionization/utilities.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/IonizerReturn.hpp" /** @file AlgorithmKeldysh.hpp * @@ -36,83 +37,80 @@ namespace picongpu { -namespace particles -{ -namespace ionization -{ - - /** Calculation for the Keldysh ionization model - * - * for linear laser polarization - */ - struct AlgorithmKeldysh + namespace particles { - /** Functor implementation - * \tparam EType type of electric field - * \tparam BType type of magnetic field - * \tparam ParticleType type of particle to be ionized - * - * \param bField magnetic field value at t=0 - * \param eField electric field value at t=0 - * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 - * \param randNr random number, equally distributed in range [0.:1.0] - * - * \return number of new macro electrons to be created - */ - template - HDINLINE uint32_t - operator()( const BType bField, const EType eField, ParticleType& parentIon, float_X randNr ) + namespace ionization { - - const float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; - float_X chargeState = attribute::getChargeState(parentIon); - - /* verify that ion is not completely ionized */ - if ( chargeState < protonNumber ) + /** Calculation for the Keldysh ionization model + * + * for linear laser polarization + */ + struct AlgorithmKeldysh { - uint32_t const cs = math::float2int_rd(chargeState); - const float_X iEnergy = typename GetIonizationEnergies::type{ }[cs]; + /** Functor implementation + * \tparam EType type of electric field + * \tparam BType type of magnetic field + * \tparam ParticleType type of particle to be ionized + * + * \param bField magnetic field value at t=0 + * \param eField electric field value at t=0 + * \param parentIon particle instance to be ionized with position at t=0 and momentum at t=-1/2 + * \param randNr random number, equally distributed in range [0.:1.0] + * + * \return ionization energy and number of new macro electrons to be created + */ + template + HDINLINE IonizerReturn + operator()(const BType bField, const EType eField, ParticleType& parentIon, float_X randNr) + { + const float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; + float_X chargeState = attribute::getChargeState(parentIon); - constexpr float_X pi = pmacc::algorithms::math::Pi< float_X >::value; - /* electric field in atomic units - only absolute value */ - float_X eInAU = math::abs(eField) / ATOMIC_UNIT_EFIELD; + /* verify that ion is not completely ionized */ + if(chargeState < protonNumber) + { + uint32_t const cs = pmacc::math::float2int_rd(chargeState); + const float_X iEnergy = typename GetIonizationEnergies::type{}[cs]; - /* factor two avoid calculation math::pow(2,5./4.); */ - const float_X twoToFiveQuarters = 2.3784142300054; + constexpr float_X pi = pmacc::math::Pi::value; + /* electric field in atomic units - only absolute value */ + float_X eInAU = math::abs(eField) / ATOMIC_UNIT_EFIELD; - /* characteristic exponential function argument */ - const float_X charExpArg = math::sqrt(util::cube(float_X(2.)*iEnergy))/eInAU; + /* factor two avoid calculation math::pow(2,5./4.); */ + const float_X twoToFiveQuarters = 2.3784142300054; - /* ionization rate */ - float_X rateKeldysh = math::sqrt(float_X(6.)*pi) / twoToFiveQuarters \ - * iEnergy * math::sqrt(float_X(1.)/charExpArg) \ - * math::exp(-float_X(2./3.) * charExpArg); + /* characteristic exponential function argument */ + const float_X charExpArg = math::sqrt(util::cube(float_X(2.) * iEnergy)) / eInAU; - /* simulation time step in atomic units */ - const float_X timeStepAU = float_X(DELTA_T / ATOMIC_UNIT_TIME); - /* ionization probability - * - * probability = rate * time step - * --> for infinitesimal time steps - * - * the whole ensemble should then follow - * P = 1 - exp(-rate * time step) if the laser wavelength is - * sampled well enough - */ - float_X const probKeldysh = rateKeldysh * timeStepAU; + /* ionization rate */ + float_X rateKeldysh = math::sqrt(float_X(6.) * pi) / twoToFiveQuarters * iEnergy + * math::sqrt(float_X(1.) / charExpArg) * math::exp(-float_X(2. / 3.) * charExpArg); - /* ionization condition */ - if( randNr < probKeldysh ) - { - /* return number of macro electrons to produce */ - return 1u; + /* simulation time step in atomic units */ + const float_X timeStepAU = float_X(DELTA_T / ATOMIC_UNIT_TIME); + /* ionization probability + * + * probability = rate * time step + * --> for infinitesimal time steps + * + * the whole ensemble should then follow + * P = 1 - exp(-rate * time step) if the laser wavelength is + * sampled well enough + */ + float_X const probKeldysh = rateKeldysh * timeStepAU; + + /* ionization condition */ + if(randNr < probKeldysh) + { + /* return ionization energy number of macro electrons to produce */ + return IonizerReturn{iEnergy, 1u}; + } + } + /* no ionization */ + return IonizerReturn{0.0, 0u}; } - } - /* no ionization */ - return 0u; - } - }; + }; -} // namespace ionization -} // namespace particles + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/Keldysh/Keldysh.def b/include/picongpu/particles/ionization/byField/Keldysh/Keldysh.def index 6a4915a56a..cfa116aad4 100644 --- a/include/picongpu/particles/ionization/byField/Keldysh/Keldysh.def +++ b/include/picongpu/particles/ionization/byField/Keldysh/Keldysh.def @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Marco Garten +/* Copyright 2016-2021 Marco Garten, Jakob Trojok * * This file is part of PIConGPU. * @@ -20,51 +20,56 @@ #pragma once #include +#include "picongpu/particles/ionization/byField/IonizationCurrent/IonizationCurrent.def" namespace picongpu { -namespace particles -{ -namespace ionization -{ - - /** Keldysh model - * - * \tparam T_DestSpecies electron species to be created - * \tparam T_SrcSpecies ion species to be ionized - * default is boost::mpl placeholder because specialization - * cannot be known in list of particle species' flags - * \see speciesDefinition.param - */ - template - struct Keldysh_Impl; - - /** Keldysh ionization model - * - * - Keldysh viewed ionization not as multiple different effects but rather as - * one that can be classified in different ionization regimes characterized - * by certain values of the Keldysh parameter - * - takes the ionization energies of the various charge states of ions - * - calculates the ionization rates and then the ionization probabilities from them - * - ATTENTION: this approach is not very applicable for rapidly changing high intensity laser fields - * - this is a Monte Carlo method: if a random number is smaller - * or equal than the ionization probability -> increase the charge state - * - see for example: D. Bauer and P. Mulser. Exact field ionization rates in the barrier-suppression - * regime from numerical time-dependent Schroedinger-equation calculations. - * Physical Review A, 59(1):569+, January 1999. - * - * wrapper class, - * needed because the SrcSpecies cannot be known during the - * first specialization of the ionization model in the particle definition - * \see speciesDefinition.param - */ - template - struct Keldysh + namespace particles { - using IonizationAlgorithm = particles::ionization::AlgorithmKeldysh; - using type = Keldysh_Impl< IonizationAlgorithm, T_DestSpecies >; - }; + namespace ionization + { + /** Keldysh model + * + * \tparam T_DestSpecies electron species to be created + * \tparam T_IonizationCurrent select type of ionization current (None or EnergyConservation) + * \tparam T_SrcSpecies ion species to be ionized + * default is boost::mpl placeholder because specialization + * cannot be known in list of particle species' flags + * \see speciesDefinition.param + */ + template< + typename T_IonizationAlgorithm, + typename T_DestSpecies, + typename T_IonizationCurrent, + typename T_SrcSpecies = bmpl::_1> + struct Keldysh_Impl; + + /** Keldysh ionization model + * + * - Keldysh viewed ionization not as multiple different effects but rather as + * one that can be classified in different ionization regimes characterized + * by certain values of the Keldysh parameter + * - takes the ionization energies of the various charge states of ions + * - calculates the ionization rates and then the ionization probabilities from them + * - ATTENTION: this approach is not very applicable for rapidly changing high intensity laser fields + * - this is a Monte Carlo method: if a random number is smaller + * or equal than the ionization probability -> increase the charge state + * - see for example: D. Bauer and P. Mulser. Exact field ionization rates in the barrier-suppression + * regime from numerical time-dependent Schroedinger-equation calculations. + * Physical Review A, 59(1):569+, January 1999. + * + * wrapper class, + * needed because the SrcSpecies cannot be known during the + * first specialization of the ionization model in the particle definition + * \see speciesDefinition.param + */ + template + struct Keldysh + { + using IonizationAlgorithm = particles::ionization::AlgorithmKeldysh; + using type = Keldysh_Impl; + }; -} // namespace ionization -} // namespace particles + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp b/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp index 10c0fd9440..c54a1fa698 100644 --- a/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp +++ b/include/picongpu/particles/ionization/byField/Keldysh/Keldysh_Impl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Marco Garten +/* Copyright 2016-2021 Marco Garten, Jakob Trojok * * This file is part of PIConGPU. * @@ -20,7 +20,6 @@ #pragma once #include "picongpu/simulation_defines.hpp" -#include "picongpu/traits/UsesRNG.hpp" #include "picongpu/fields/CellType.hpp" #include "picongpu/fields/FieldB.hpp" @@ -28,6 +27,8 @@ #include "picongpu/traits/FieldPosition.hpp" #include "picongpu/particles/ionization/byField/Keldysh/Keldysh.def" #include "picongpu/particles/ionization/byField/Keldysh/AlgorithmKeldysh.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/JIonizationCalc.hpp" +#include "picongpu/particles/ionization/byField/IonizationCurrent/JIonizationAssignment.hpp" #include #include @@ -46,274 +47,231 @@ namespace picongpu { -namespace traits -{ - /** specialization of the UsesRNG trait - * --> ionization module uses random number generation - */ - template - struct UsesRNG > : - public boost::true_type - { - }; -} // namespace traits - -namespace particles -{ -namespace ionization -{ - - /** \struct Keldysh_Impl - * - * \brief Ammosov-Delone-Krainov - * Tunneling ionization for hydrogenlike atoms - * - * \tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created - * \tparam T_SrcSpecies type or name as boost::mpl::string of the particle species that is ionized - */ - template - struct Keldysh_Impl + namespace particles { - - using DestSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_DestSpecies - >; - using SrcSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SrcSpecies - >; - - using FrameType = typename SrcSpecies::FrameType; - - /* specify field to particle interpolation scheme */ - using Field2ParticleInterpolation = typename pmacc::traits::Resolve< - typename GetFlagType >::type - >::type; - - /* margins around the supercell for the interpolation of the field on the cells */ - using LowerMargin = typename GetMargin::LowerMargin; - using UpperMargin = typename GetMargin::UpperMargin; - - /* relevant area of a block */ - using BlockArea = SuperCellDescription< - typename MappingDesc::SuperCellSize, - LowerMargin, - UpperMargin - >; - - BlockArea BlockDescription; - - private: - - /* define ionization ALGORITHM (calculation) for ionization MODEL */ - using IonizationAlgorithm = T_IonizationAlgorithm; - - /* random number generator */ - using RNGFactory = pmacc::random::RNGProvider; - using Distribution = pmacc::random::distributions::Uniform; - using RandomGen = typename RNGFactory::GetRandomType::type; - RandomGen randomGen; - - using TVec = MappingDesc::SuperCellSize; - - using ValueType_E = FieldE::ValueType; - using ValueType_B = FieldB::ValueType; - /* global memory EM-field device databoxes */ - PMACC_ALIGN(eBox, FieldE::DataBoxType); - PMACC_ALIGN(bBox, FieldB::DataBoxType); - /* shared memory EM-field device databoxes */ - PMACC_ALIGN(cachedE, DataBox >); - PMACC_ALIGN(cachedB, DataBox >); - - public: - /* host constructor initializing member : random number generator */ - Keldysh_Impl(const uint32_t currentStep) : randomGen(RNGFactory::createRandom()) - { - DataConnector &dc = Environment<>::get().DataConnector(); - /* initialize pointers on host-side E-(B-)field databoxes */ - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); - auto fieldB = dc.get< FieldB >( FieldB::getName(), true ); - /* initialize device-side E-(B-)field databoxes */ - eBox = fieldE->getDeviceDataBox(); - bBox = fieldB->getDeviceDataBox(); - - } - - /** cache fields used by this functor + namespace ionization + { + /** \struct Keldysh_Impl * - * @warning this is a collective method and calls synchronize + * \brief Ammosov-Delone-Krainov + * Tunneling ionization for hydrogenlike atoms * - * @tparam T_Acc alpaka accelerator type - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * - * @param acc alpaka accelerator - * @param blockCell relative offset (in cells) to the local domain plus the guarding cells - * @param workerCfg configuration of the worker + * \tparam T_DestSpecies type or name as boost::mpl::string of the electron species to be created + * \tparam T_IonizationCurrent select type of ionization current (None or EnergyConservation) + * \tparam T_SrcSpecies type or name as boost::mpl::string of the particle species that is ionized */ template< - typename T_Acc , - typename T_WorkerCfg - > - DINLINE void collectiveInit( - const T_Acc & acc, - const DataSpace& blockCell, - const T_WorkerCfg & workerCfg - ) - { - /* caching of E and B fields */ - cachedB = CachedBox::create< - 0, - ValueType_B - >( - acc, - BlockArea() - ); - cachedE = CachedBox::create< - 1, - ValueType_E - >( - acc, - BlockArea() - ); - - /* instance of nvidia assignment operator */ - nvidia::functors::Assign assign; - /* copy fields from global to shared */ - auto fieldBBlock = bBox.shift(blockCell); - ThreadCollective< - BlockArea, - T_WorkerCfg::numWorkers - > collective( workerCfg.getWorkerIdx( ) ); - collective( - acc, - assign, - cachedB, - fieldBBlock - ); - /* copy fields from global to shared */ - auto fieldEBlock = eBox.shift(blockCell); - collective( - acc, - assign, - cachedE, - fieldEBlock - ); - - /* wait for shared memory to be initialized */ - __syncthreads(); - } - - /** Initialization function on device - * - * \brief Cache EM-fields on device - * and initialize possible prerequisites for ionization, like e.g. random number generator. - * - * This function will be called inline on the device which must happen BEFORE threads diverge - * during loop execution. The reason for this is the `__syncthreads()` call which is necessary after - * initializing the E-/B-field shared boxes in shared memory. - */ - template< typename T_Acc > - DINLINE void init( - T_Acc const & acc, - const DataSpace& blockCell, - const int& linearThreadIdx, - const DataSpace& localCellOffset - ) - { - /* initialize random number generator with the local cell index in the simulation */ - this->randomGen.init(localCellOffset); - } - - /** Determine number of new macro electrons due to ionization - * - * \param ionFrame reference to frame of the to-be-ionized particles - * \param localIdx local (linear) index in super cell / frame - */ - template< typename T_Acc > - DINLINE uint32_t numNewParticles(T_Acc const & acc, FrameType& ionFrame, int localIdx) - { - /* alias for the single macro-particle */ - auto particle = ionFrame[localIdx]; - /* particle position, used for field-to-particle interpolation */ - floatD_X pos = particle[position_]; - const int particleCellIdx = particle[localCellIdx_]; - /* multi-dim coordinate of the local cell inside the super cell */ - DataSpace localCell(DataSpaceOperations::template map (particleCellIdx)); - /* interpolation of E- */ - const picongpu::traits::FieldPosition fieldPosE; - ValueType_E eField = Field2ParticleInterpolation() - (cachedE.shift(localCell).toCursor(), pos, fieldPosE()); - /* and B-field on the particle position */ - const picongpu::traits::FieldPosition fieldPosB; - ValueType_B bField = Field2ParticleInterpolation() - (cachedB.shift(localCell).toCursor(), pos, fieldPosB()); - - /* define number of bound macro electrons before ionization */ - float_X prevBoundElectrons = particle[boundElectrons_]; - - IonizationAlgorithm ionizeAlgo; - /* determine number of new macro electrons to be created */ - uint32_t newMacroElectrons = ionizeAlgo( - bField, eField, - particle, this->randomGen(acc) - ); - - return newMacroElectrons; - - } - - /* Functor implementation - * - * Ionization model specific particle creation - * - * \tparam T_parentIon type of ion species that is being ionized - * \tparam T_childElectron type of electron species that is created - * \param parentIon ion instance that is ionized - * \param childElectron electron instance that is created - */ - template - DINLINE void operator()(T_Acc const & acc, T_parentIon& parentIon,T_childElectron& childElectron) + typename T_IonizationAlgorithm, + typename T_DestSpecies, + typename T_IonizationCurrent, + typename T_SrcSpecies> + struct Keldysh_Impl { - /* for not mixing operations::assign up with the nvidia functor assign */ - namespace partOp = pmacc::particles::operations; - /* each thread sets the multiMask hard on "particle" (=1) */ - childElectron[multiMask_] = 1u; - const float_X weighting = parentIon[weighting_]; - - /* each thread initializes a clone of the parent ion but leaving out - * some attributes: - * - multiMask: reading from global memory takes longer than just setting it again explicitly - * - momentum: because the electron would get a higher energy because of the ion mass - * - boundElectrons: because species other than ions or atoms do not have them - * (gets AUTOMATICALLY deselected because electrons do not have this attribute) + using DestSpecies = pmacc::particles::meta::FindByNameOrType_t; + using SrcSpecies = pmacc::particles::meta::FindByNameOrType_t; + + using FrameType = typename SrcSpecies::FrameType; + + /* specify field to particle interpolation scheme */ + using Field2ParticleInterpolation = + typename pmacc::traits::Resolve>::type>::type; + + /* margins around the supercell for the interpolation of the field on the cells */ + using LowerMargin = typename GetMargin::LowerMargin; + using UpperMargin = typename GetMargin::UpperMargin; + + /* relevant area of a block */ + using BlockArea = SuperCellDescription; + + BlockArea BlockDescription; + + private: + /* define ionization ALGORITHM (calculation) for ionization MODEL */ + using IonizationAlgorithm = T_IonizationAlgorithm; + + /* random number generator */ + using RNGFactory = pmacc::random::RNGProvider; + using Distribution = pmacc::random::distributions::Uniform; + using RandomGen = typename RNGFactory::GetRandomType::type; + RandomGen randomGen; + + using TVec = MappingDesc::SuperCellSize; + + using ValueType_E = FieldE::ValueType; + using ValueType_B = FieldB::ValueType; + /* global memory EM-field and current density device databoxes */ + PMACC_ALIGN(eBox, FieldE::DataBoxType); + PMACC_ALIGN(bBox, FieldB::DataBoxType); + PMACC_ALIGN(jBox, FieldJ::DataBoxType); + /* shared memory EM-field device databoxes */ + PMACC_ALIGN(cachedE, DataBox>); + PMACC_ALIGN(cachedB, DataBox>); + + public: + /* host constructor initializing member : random number generator */ + Keldysh_Impl(const uint32_t currentStep) : randomGen(RNGFactory::createRandom()) + { + DataConnector& dc = Environment<>::get().DataConnector(); + /* initialize pointers on host-side E-(B-)field and current density databoxes */ + auto fieldE = dc.get(FieldE::getName(), true); + auto fieldB = dc.get(FieldB::getName(), true); + auto fieldJ = dc.get(FieldJ::getName(), true); + /* initialize device-side E-(B-)field and current density databoxes */ + eBox = fieldE->getDeviceDataBox(); + bBox = fieldB->getDeviceDataBox(); + jBox = fieldJ->getDeviceDataBox(); + } + + /** cache fields used by this functor + * + * @warning this is a collective method and calls synchronize + * + * @tparam T_Acc alpaka accelerator type + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * + * @param acc alpaka accelerator + * @param blockCell relative offset (in cells) to the local domain plus the guarding cells + * @param workerCfg configuration of the worker */ - auto targetElectronClone = partOp::deselect >(childElectron); - - partOp::assign(targetElectronClone, partOp::deselect(parentIon)); - - const float_X massIon = attribute::getMass(weighting,parentIon); - const float_X massElectron = attribute::getMass(weighting,childElectron); - - const float3_X electronMomentum (parentIon[momentum_]*(massElectron/massIon)); - - childElectron[momentum_] = electronMomentum; - - /* conservation of momentum - * \todo add conservation of mass */ - parentIon[momentum_] -= electronMomentum; - - /** ionization of the ion by reducing the number of bound electrons + template + DINLINE void collectiveInit( + const T_Acc& acc, + const DataSpace& blockCell, + const T_WorkerCfg& workerCfg) + { + /* shifting origin of jbox to supercell of particle */ + jBox = jBox.shift(blockCell); + + /* caching of E and B fields */ + cachedB = CachedBox::create<0, ValueType_B>(acc, BlockArea()); + cachedE = CachedBox::create<1, ValueType_E>(acc, BlockArea()); + + /* instance of nvidia assignment operator */ + nvidia::functors::Assign assign; + /* copy fields from global to shared */ + auto fieldBBlock = bBox.shift(blockCell); + ThreadCollective collective(workerCfg.getWorkerIdx()); + collective(acc, assign, cachedB, fieldBBlock); + /* copy fields from global to shared */ + auto fieldEBlock = eBox.shift(blockCell); + collective(acc, assign, cachedE, fieldEBlock); + + /* wait for shared memory to be initialized */ + cupla::__syncthreads(acc); + } + + /** Initialization function on device + * + * \brief Cache EM-fields on device + * and initialize possible prerequisites for ionization, like e.g. random number generator. * - * @warning substracting a float from a float can potentially - * create a negative boundElectrons number for the ion, - * see #1850 for details + * This function will be called inline on the device which must happen BEFORE threads diverge + * during loop execution. The reason for this is the `cupla::__syncthreads( acc )` call which is + * necessary after initializing the E-/B-field shared boxes in shared memory. */ - parentIon[boundElectrons_] -= float_X(1.); - } - - }; - -} // namespace ionization -} // namespace particles + template + DINLINE void init( + T_Acc const& acc, + const DataSpace& blockCell, + const int& linearThreadIdx, + const DataSpace& localCellOffset) + { + /* initialize random number generator with the local cell index in the simulation */ + this->randomGen.init(localCellOffset); + } + + /** Determine number of new macro electrons due to ionization + * + * \param ionFrame reference to frame of the to-be-ionized particles + * \param localIdx local (linear) index in super cell / frame + */ + template + DINLINE uint32_t numNewParticles(T_Acc const& acc, FrameType& ionFrame, int localIdx) + { + /* alias for the single macro-particle */ + auto particle = ionFrame[localIdx]; + /* particle position, used for field-to-particle interpolation */ + floatD_X pos = particle[position_]; + const int particleCellIdx = particle[localCellIdx_]; + /* multi-dim coordinate of the local cell inside the super cell */ + DataSpace localCell( + DataSpaceOperations::template map(particleCellIdx)); + /* interpolation of E- */ + const picongpu::traits::FieldPosition fieldPosE; + ValueType_E eField + = Field2ParticleInterpolation()(cachedE.shift(localCell).toCursor(), pos, fieldPosE()); + /* and B-field on the particle position */ + const picongpu::traits::FieldPosition fieldPosB; + ValueType_B bField + = Field2ParticleInterpolation()(cachedB.shift(localCell).toCursor(), pos, fieldPosB()); + + /* define number of bound macro electrons before ionization */ + float_X prevBoundElectrons = particle[boundElectrons_]; + + IonizationAlgorithm ionizeAlgo; + /* determine number of new macro electrons to be created and energy used for ionization */ + auto retValue = ionizeAlgo(bField, eField, particle, this->randomGen(acc)); + IonizationCurrent{}( + retValue, + particle[weighting_], + jBox.shift(localCell), + eField, + acc, + pos); + + return retValue.newMacroElectrons; + } + + /* Functor implementation + * + * Ionization model specific particle creation + * + * \tparam T_parentIon type of ion species that is being ionized + * \tparam T_childElectron type of electron species that is created + * \param parentIon ion instance that is ionized + * \param childElectron electron instance that is created + */ + template + DINLINE void operator()(T_Acc const& acc, T_parentIon& parentIon, T_childElectron& childElectron) + { + /* for not mixing operations::assign up with the nvidia functor assign */ + namespace partOp = pmacc::particles::operations; + /* each thread sets the multiMask hard on "particle" (=1) */ + childElectron[multiMask_] = 1u; + const float_X weighting = parentIon[weighting_]; + + /* each thread initializes a clone of the parent ion but leaving out + * some attributes: + * - multiMask: reading from global memory takes longer than just setting it again explicitly + * - momentum: because the electron would get a higher energy because of the ion mass + * - boundElectrons: because species other than ions or atoms do not have them + * (gets AUTOMATICALLY deselected because electrons do not have this attribute) + */ + auto targetElectronClone = partOp::deselect>(childElectron); + + partOp::assign(targetElectronClone, partOp::deselect(parentIon)); + + const float_X massIon = attribute::getMass(weighting, parentIon); + const float_X massElectron = attribute::getMass(weighting, childElectron); + + const float3_X electronMomentum(parentIon[momentum_] * (massElectron / massIon)); + + childElectron[momentum_] = electronMomentum; + + /* conservation of momentum + * \todo add conservation of mass */ + parentIon[momentum_] -= electronMomentum; + + /** ionization of the ion by reducing the number of bound electrons + * + * @warning substracting a float from a float can potentially + * create a negative boundElectrons number for the ion, + * see #1850 for details + */ + parentIon[boundElectrons_] -= float_X(1.); + } + }; + + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/fieldIonizationCalc.def b/include/picongpu/particles/ionization/byField/fieldIonizationCalc.def index a5b94e7b94..fba51b7a4c 100644 --- a/include/picongpu/particles/ionization/byField/fieldIonizationCalc.def +++ b/include/picongpu/particles/ionization/byField/fieldIonizationCalc.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten * * This file is part of PIConGPU. * @@ -28,28 +28,25 @@ namespace picongpu { + namespace particles + { + namespace ionization + { + struct AlgorithmNone; -namespace particles -{ - -namespace ionization -{ - - struct AlgorithmNone; - - template - struct AlgorithmADK; + template + struct AlgorithmADK; - struct AlgorithmBSI; + struct AlgorithmBSI; - struct AlgorithmBSIEffectiveZ; + struct AlgorithmBSIEffectiveZ; - struct AlgorithmBSIStarkShifted; + struct AlgorithmBSIStarkShifted; - struct AlgorithmKeldysh; + struct AlgorithmKeldysh; -} // namespace ionization + } // namespace ionization -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/ionization/byField/fieldIonizationCalc.hpp b/include/picongpu/particles/ionization/byField/fieldIonizationCalc.hpp index c8204a9f4c..e81ec118e8 100644 --- a/include/picongpu/particles/ionization/byField/fieldIonizationCalc.hpp +++ b/include/picongpu/particles/ionization/byField/fieldIonizationCalc.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/ionization/byField/ionizers.def b/include/picongpu/particles/ionization/byField/ionizers.def index 85141b4032..6933c5a151 100644 --- a/include/picongpu/particles/ionization/byField/ionizers.def +++ b/include/picongpu/particles/ionization/byField/ionizers.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/ionization/byField/ionizers.hpp b/include/picongpu/particles/ionization/byField/ionizers.hpp index eb9e2f6814..c422cd7f3c 100644 --- a/include/picongpu/particles/ionization/byField/ionizers.hpp +++ b/include/picongpu/particles/ionization/byField/ionizers.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten +/* Copyright 2015-2021 Marco Garten * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/ionization/utilities.hpp b/include/picongpu/particles/ionization/utilities.hpp index c0f68bde27..2f072d4ebf 100644 --- a/include/picongpu/particles/ionization/utilities.hpp +++ b/include/picongpu/particles/ionization/utilities.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Marco Garten, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Marco Garten, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -28,55 +28,53 @@ namespace picongpu { -namespace particles -{ -namespace ionization -{ - -namespace util -{ - - /* power 2 function */ - template - HDINLINE A square(A a) - { - return a*a; - } - /* power 2 function with different result type */ - template - HDINLINE R square(A a) - { - return a*a; - } - /* power 3 function */ - template - HDINLINE A cube(A a) - { - return a * a*a; - } - /* power 3 function with different result type */ - template - HDINLINE R cube(A a) - { - return a * a*a; - } - /* power 4 function */ - template - HDINLINE A quad(A a) - { - const A b = a*a; - return b*b; - } - /* power 4 function with different result type */ - template - HDINLINE R quad(A a) + namespace particles { - const R b = a*a; - return b*b; - } + namespace ionization + { + namespace util + { + /* power 2 function */ + template + HDINLINE A square(A a) + { + return a * a; + } + /* power 2 function with different result type */ + template + HDINLINE R square(A a) + { + return a * a; + } + /* power 3 function */ + template + HDINLINE A cube(A a) + { + return a * a * a; + } + /* power 3 function with different result type */ + template + HDINLINE R cube(A a) + { + return a * a * a; + } + /* power 4 function */ + template + HDINLINE A quad(A a) + { + const A b = a * a; + return b * b; + } + /* power 4 function with different result type */ + template + HDINLINE R quad(A a) + { + const R b = a * a; + return b * b; + } -} + } // namespace util -} // namespace ionization -} // namespace particles + } // namespace ionization + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/IBinary.def b/include/picongpu/particles/manipulators/IBinary.def index 3d5baf80e9..5df86d8f41 100644 --- a/include/picongpu/particles/manipulators/IBinary.def +++ b/include/picongpu/particles/manipulators/IBinary.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,42 +27,30 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ - - /** interface for a binary filtered particle functor - * - * The result of the filter is linked by a logic AND operation and the functor - * is only called if the filter result is `true`. - * The user functor and filter is passed by the manipulation algorithm - * (e.g. picongpu::particles::ManipulateDerive, ...) to this interface, there is - * no need to do this explicitly in the param files. - * - * @tparam T_BinaryFunctor binary particle functor, must contain - * `void operator()(P1 & particle1, P2 & particle2, ...)` - * and support at least two particles - * @tparam T_UnaryFilter unary particle filter, must contain `bool operator()(P particle)` - * each particle of the `T_BinaryFunctor::operator()`is passed through the filter - */ - template< - typename T_BinaryFunctor, - typename T_UnaryFilter = filter::All - > - using IBinary = pmacc::functor::Filtered< - pmacc::filter::operators::And, - pmacc::filter::Interface< - T_UnaryFilter, - 1u - >, - pmacc::functor::Interface< - T_BinaryFunctor, - 2u, - void - > - >; + namespace particles + { + namespace manipulators + { + /** interface for a binary filtered particle functor + * + * The result of the filter is linked by a logic AND operation and the functor + * is only called if the filter result is `true`. + * The user functor and filter is passed by the manipulation algorithm + * (e.g. picongpu::particles::ManipulateDerive, ...) to this interface, there is + * no need to do this explicitly in the param files. + * + * @tparam T_BinaryFunctor binary particle functor, must contain + * `void operator()(P1 & particle1, P2 & particle2, ...)` + * and support at least two particles + * @tparam T_UnaryFilter unary particle filter, must contain `bool operator()(P particle)` + * each particle of the `T_BinaryFunctor::operator()`is passed through the filter + */ + template + using IBinary = pmacc::functor::Filtered< + pmacc::filter::operators::And, + pmacc::filter::Interface, + pmacc::functor::Interface>; -} // namespace manipulators -} // namespace particles + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/IUnary.def b/include/picongpu/particles/manipulators/IUnary.def index e06984ab94..d5a6dd7db0 100644 --- a/include/picongpu/particles/manipulators/IUnary.def +++ b/include/picongpu/particles/manipulators/IUnary.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,41 +27,29 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ - - /** interface for a unary filtered particle functor - * - * The functor is only called if the filter result is `true`. - * The user functor and filter is passed by the manipulation algorithm - * (e.g. picongpu::particles::Manipulate, ...) to this interface, there is - * no need to do this explicitly in the param files. - * - * @tparam T_UnaryFunctor unary particle functor, must contain - * `void operator()(P & particle, ...)` - * and support at least one particle - * @tparam T_UnaryFilter unary particle filter must contain `bool operator()(P particle)`, - * each particle of the `T_UnaryFunctor::operator()` is passed through the filter - */ - template< - typename T_UnaryFunctor, - typename T_UnaryFilter = filter::All - > - using IUnary = pmacc::functor::Filtered< - pmacc::filter::operators::And, - pmacc::filter::Interface< - T_UnaryFilter, - 1u - >, - pmacc::functor::Interface< - T_UnaryFunctor, - 1u, - void - > - >; + namespace particles + { + namespace manipulators + { + /** interface for a unary filtered particle functor + * + * The functor is only called if the filter result is `true`. + * The user functor and filter is passed by the manipulation algorithm + * (e.g. picongpu::particles::Manipulate, ...) to this interface, there is + * no need to do this explicitly in the param files. + * + * @tparam T_UnaryFunctor unary particle functor, must contain + * `void operator()(P & particle, ...)` + * and support at least one particle + * @tparam T_UnaryFilter unary particle filter must contain `bool operator()(P particle)`, + * each particle of the `T_UnaryFunctor::operator()` is passed through the filter + */ + template + using IUnary = pmacc::functor::Filtered< + pmacc::filter::operators::And, + pmacc::filter::Interface, + pmacc::functor::Interface>; -} // namespace manipulators -} // namespace particles + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/binary/Assign.def b/include/picongpu/particles/manipulators/binary/Assign.def index baafdf33bb..4c6f647675 100644 --- a/include/picongpu/particles/manipulators/binary/Assign.def +++ b/include/picongpu/particles/manipulators/binary/Assign.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -25,62 +25,47 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace binary -{ -namespace acc -{ - - //! assign attributes of one particle to another - struct Assign + namespace particles { - /** execute assign operator - * - * @tparam T_DestParticle pmacc::Particle, type of the destination particle - * @tparam T_SrcParticle pmacc::Particle, type of the source particle - * @tparam T_Args pmacc::Particle, arbitrary number of particles types - * - * @param particleDest destination particle - * @param particleSrc source particle - * @param ... unused particles - */ - template< - typename T_DestParticle, - typename T_SrcParticle, - typename ... T_Args - > - HDINLINE void - operator( )( - T_DestParticle & particleDest, - T_SrcParticle & particleSrc, - T_Args && ... - ) + namespace manipulators { - pmacc::particles::operations::assign( - particleDest, - particleSrc - ); - } - }; -} // namespace acc + namespace binary + { + namespace acc + { + //! assign attributes of one particle to another + struct Assign + { + /** execute assign operator + * + * @tparam T_DestParticle pmacc::Particle, type of the destination particle + * @tparam T_SrcParticle pmacc::Particle, type of the source particle + * @tparam T_Args pmacc::Particle, arbitrary number of particles types + * + * @param particleDest destination particle + * @param particleSrc source particle + * @param ... unused particles + */ + template + HDINLINE void operator()(T_DestParticle& particleDest, T_SrcParticle& particleSrc, T_Args&&...) + { + pmacc::particles::operations::assign(particleDest, particleSrc); + } + }; + } // namespace acc - /** assign attributes of one particle to another - * - * Can be used as binary and higher order operator but only the first two - * particles are used for the assign operation. - * - * Assign all matching attributes of a source particle to the destination - * particle. Attributes that only exist in the destination species are initialized - * with the default value. Attributes that only exists in the source particle will be ignored. - */ - using Assign = generic::Free< - acc::Assign - >; + /** assign attributes of one particle to another + * + * Can be used as binary and higher order operator but only the first two + * particles are used for the assign operation. + * + * Assign all matching attributes of a source particle to the destination + * particle. Attributes that only exist in the destination species are initialized + * with the default value. Attributes that only exists in the source particle will be ignored. + */ + using Assign = generic::Free; -} // namespace binary -} // namespace manipulators -} // namespace particles + } // namespace binary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/binary/DensityWeighting.def b/include/picongpu/particles/manipulators/binary/DensityWeighting.def index 60c79e0d99..653ad9a257 100644 --- a/include/picongpu/particles/manipulators/binary/DensityWeighting.def +++ b/include/picongpu/particles/manipulators/binary/DensityWeighting.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -26,79 +26,68 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace binary -{ -namespace acc -{ - - //! Re-scale the weighting of a cloned particle by densityRatio - struct DensityWeighting + namespace particles { - /** Adjust the weighting of particleDes by densityRatio of particleDes & Src particle - * - * While deriving a particle (particleDes) from another (T_SrcParticle), one - * can afterwards directly normalize the weighting back to the intended density: - * - divide weighting with the `T_SrcParticle`'s densityRatio - * (to get macro particle weighting according to reference BASE_DENSITY * profile - * at this specific point in space & time) - * - multiply weighting with own densityRatio (to get this species' - * densityRatio * BASE_DENSITY * profile) - * - * This is useful when the profile and number of macro particles for both species - * shall be the same and the initialization of another profile via `CreateDensity` - * would be expensive (or one wants to keep the exact same position while deriving). - * - * @tparam T_DesParticle type of the particle species with weighting to manipulate - * @tparam T_SrcParticle type of the particle species one cloned from - * @tparam T_Args pmacc::Particle, arbitrary number of particles types - * - * @param particleDest destination particle - * @param particleSrc source particle (the density ratio of this particle is used) - * @param ... unused particles - * - * @see picongpu::particles::ManipulateDerive, picongpu::kernelCloneParticles - */ - template< - typename T_DesParticle, - typename T_SrcParticle, - typename ... T_Args - > - HDINLINE void operator()( - T_DesParticle & particleDes, - T_SrcParticle const &, - T_Args && ... - ) + namespace manipulators { - const float_X densityRatioDes = - traits::GetDensityRatio< T_DesParticle >::type::getValue( ); - const float_X densityRatioSrc = - traits::GetDensityRatio< T_SrcParticle >::type::getValue( ); + namespace binary + { + namespace acc + { + //! Re-scale the weighting of a cloned particle by densityRatio + struct DensityWeighting + { + /** Adjust the weighting of particleDes by densityRatio of particleDes & Src particle + * + * While deriving a particle (particleDes) from another (T_SrcParticle), one + * can afterwards directly normalize the weighting back to the intended density: + * - divide weighting with the `T_SrcParticle`'s densityRatio + * (to get macro particle weighting according to reference BASE_DENSITY * profile + * at this specific point in space & time) + * - multiply weighting with own densityRatio (to get this species' + * densityRatio * BASE_DENSITY * profile) + * + * This is useful when the profile and number of macro particles for both species + * shall be the same and the initialization of another profile via `CreateDensity` + * would be expensive (or one wants to keep the exact same position while deriving). + * + * @tparam T_DesParticle type of the particle species with weighting to manipulate + * @tparam T_SrcParticle type of the particle species one cloned from + * @tparam T_Args pmacc::Particle, arbitrary number of particles types + * + * @param particleDest destination particle + * @param particleSrc source particle (the density ratio of this particle is used) + * @param ... unused particles + * + * @see picongpu::particles::ManipulateDerive, picongpu::kernelCloneParticles + */ + template + HDINLINE void operator()(T_DesParticle& particleDes, T_SrcParticle const&, T_Args&&...) + { + const float_X densityRatioDes = traits::GetDensityRatio::type::getValue(); + const float_X densityRatioSrc = traits::GetDensityRatio::type::getValue(); - particleDes[ weighting_ ] *= densityRatioDes / densityRatioSrc; - } - }; + particleDes[weighting_] *= densityRatioDes / densityRatioSrc; + } + }; -} // namespace acc + } // namespace acc - /** Re-scale the weighting of a cloned species by densityRatio - * - * When deriving species from each other, the new - * species "inherits" the macro-particle weighting - * of the first one. - * This functor can be used to manipulate the weighting - * of the new species' macro particles to satisfy the - * input densityRatio of it. - * - * note: needs the densityRatio flag on both species, - * used by the GetDensityRatio trait. - */ - using DensityWeighting = generic::Free< acc::DensityWeighting >; + /** Re-scale the weighting of a cloned species by densityRatio + * + * When deriving species from each other, the new + * species "inherits" the macro-particle weighting + * of the first one. + * This functor can be used to manipulate the weighting + * of the new species' macro particles to satisfy the + * input densityRatio of it. + * + * note: needs the densityRatio flag on both species, + * used by the GetDensityRatio trait. + */ + using DensityWeighting = generic::Free; -} // namespace binary -} // namespace manipulators -} // namespace particles + } // namespace binary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/binary/ProtonTimesWeighting.def b/include/picongpu/particles/manipulators/binary/ProtonTimesWeighting.def index 25550c0faa..cd56793410 100644 --- a/include/picongpu/particles/manipulators/binary/ProtonTimesWeighting.def +++ b/include/picongpu/particles/manipulators/binary/ProtonTimesWeighting.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -26,75 +26,66 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace binary -{ -namespace acc -{ - - //! Re-scale the weighting of a cloned species by numberOfProtons - struct ProtonTimesWeighting + namespace particles { - - /** Increase weighting of particleDest by proton number of SrcParticle - * - * The frame's `numberOfProtons`of `T_SrcParticle` - * is used to increase the weighting of particleDest. - * Useful to increase the weighting of macro electrons when cloned from an - * ion with Z>1. Otherwise one would need Z macro electrons (each with the - * same weighting as the initial ion) to keep the charge of a pre-ionized - * atom neutral. - * - * @tparam T_DestParticle type of the particle species with weighting to manipulate - * @tparam T_SrcParticle type of the particle species with proton number Z - * @tparam T_Args pmacc::Particle, arbitrary number of particles types - * - * @param particleDest destination particle - * @param source particle (the number of protons of this particle is used) - * @param unused particles - * - * @see picongpu::particles::ManipulateDerive, picongpu::particles::Manipulate - */ - template< - typename T_DesParticle, - typename T_SrcParticle, - typename ... T_Args - > - HDINLINE void operator()( - T_DesParticle & particleDest, - T_SrcParticle const &, - T_Args && ... - ) + namespace manipulators { - float_X const protonNumber = traits::GetAtomicNumbers< T_SrcParticle >::type::numberOfProtons; - particleDest[ weighting_ ] *= protonNumber; - } - }; -} // namespace acc + namespace binary + { + namespace acc + { + //! Re-scale the weighting of a cloned species by numberOfProtons + struct ProtonTimesWeighting + { + /** Increase weighting of particleDest by proton number of SrcParticle + * + * The frame's `numberOfProtons`of `T_SrcParticle` + * is used to increase the weighting of particleDest. + * Useful to increase the weighting of macro electrons when cloned from an + * ion with Z>1. Otherwise one would need Z macro electrons (each with the + * same weighting as the initial ion) to keep the charge of a pre-ionized + * atom neutral. + * + * @tparam T_DestParticle type of the particle species with weighting to manipulate + * @tparam T_SrcParticle type of the particle species with proton number Z + * @tparam T_Args pmacc::Particle, arbitrary number of particles types + * + * @param particleDest destination particle + * @param source particle (the number of protons of this particle is used) + * @param unused particles + * + * @see picongpu::particles::ManipulateDerive, picongpu::particles::Manipulate + */ + template + HDINLINE void operator()(T_DesParticle& particleDest, T_SrcParticle const&, T_Args&&...) + { + float_X const protonNumber + = traits::GetAtomicNumbers::type::numberOfProtons; + particleDest[weighting_] *= protonNumber; + } + }; + } // namespace acc - /** Re-scale the weighting of a cloned species by numberOfProtons - * - * When deriving species from each other, the new - * species "inherits" the macro-particle weighting - * of the first one. - * This functor can be used to manipulate the weighting - * of the new species' macro particles to be a multiplied by - * the number of protons of the initial species. - * - * As an example, this is useful when initializing a quasi-neutral, - * pre-ionized plasma of ions and electrons. Electrons can be created - * from ions via deriving and increasing their weight to avoid simulating - * multiple macro electrons per macro ion (with Z>1). - * - * note: needs the atomicNumbers flag on the initial species, - * used by the GetAtomicNumbers trait. - */ - using ProtonTimesWeighting = generic::Free< acc::ProtonTimesWeighting >; + /** Re-scale the weighting of a cloned species by numberOfProtons + * + * When deriving species from each other, the new + * species "inherits" the macro-particle weighting + * of the first one. + * This functor can be used to manipulate the weighting + * of the new species' macro particles to be a multiplied by + * the number of protons of the initial species. + * + * As an example, this is useful when initializing a quasi-neutral, + * pre-ionized plasma of ions and electrons. Electrons can be created + * from ions via deriving and increasing their weight to avoid simulating + * multiple macro electrons per macro ion (with Z>1). + * + * note: needs the atomicNumbers flag on the initial species, + * used by the GetAtomicNumbers trait. + */ + using ProtonTimesWeighting = generic::Free; -} // namespace binary -} // namespace manipulators -} // namespace particles + } // namespace binary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/binary/UnboundElectronsTimesWeighting.def b/include/picongpu/particles/manipulators/binary/UnboundElectronsTimesWeighting.def index 21ce240411..041436b2e6 100644 --- a/include/picongpu/particles/manipulators/binary/UnboundElectronsTimesWeighting.def +++ b/include/picongpu/particles/manipulators/binary/UnboundElectronsTimesWeighting.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -26,77 +26,71 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace binary -{ -namespace acc -{ - - //! Re-scale the weighting of a cloned species by numberOfProtons - boundElectrons - struct UnboundElectronsTimesWeighting + namespace particles { - - /** Increase weighting of particleDest by ... number of SrcParticle - * - * The frame's `numberOfProtons`of `T_SrcParticle` - * is used to increase the weighting of particleDest. - * Useful to increase the weighting of macro electrons when cloned from an - * ion with Z>1. Otherwise one would need Z macro electrons (each with the - * same weighting as the initial ion) to keep the charge of a pre-ionized - * atom neutral. - * - * @tparam T_DestParticle type of the particle species with weighting to manipulate - * @tparam T_SrcParticle type of the particle species with proton number Z - * @tparam T_Args pmacc::Particle, arbitrary number of particles types - * - * @param particleDest destination particle - * @param source particle (the number of protons of this particle is used) - * @param unused particles - * - * @see picongpu::particles::ManipulateDerive, picongpu::particles::Manipulate - */ - template< - typename T_DesParticle, - typename T_SrcParticle, - typename ... T_Args - > - DINLINE void operator()( - T_DesParticle & particleDest, - T_SrcParticle const & particleSrc, - T_Args && ... - ) + namespace manipulators { - float_X const protonNumber = traits::GetAtomicNumbers< T_SrcParticle >::type::numberOfProtons; - float_X const boundElectrons = particleSrc[ boundElectrons_ ]; - float_X const freeElectrons = protonNumber - boundElectrons; - particleDest[ weighting_ ] *= freeElectrons; - } - }; -} // namespace acc + namespace binary + { + namespace acc + { + //! Re-scale the weighting of a cloned species by numberOfProtons - boundElectrons + struct UnboundElectronsTimesWeighting + { + /** Increase weighting of particleDest by ... number of SrcParticle + * + * The frame's `numberOfProtons`of `T_SrcParticle` + * is used to increase the weighting of particleDest. + * Useful to increase the weighting of macro electrons when cloned from an + * ion with Z>1. Otherwise one would need Z macro electrons (each with the + * same weighting as the initial ion) to keep the charge of a pre-ionized + * atom neutral. + * + * @tparam T_DestParticle type of the particle species with weighting to manipulate + * @tparam T_SrcParticle type of the particle species with proton number Z + * @tparam T_Args pmacc::Particle, arbitrary number of particles types + * + * @param particleDest destination particle + * @param source particle (the number of protons of this particle is used) + * @param unused particles + * + * @see picongpu::particles::ManipulateDerive, picongpu::particles::Manipulate + */ + template + DINLINE void operator()( + T_DesParticle& particleDest, + T_SrcParticle const& particleSrc, + T_Args&&...) + { + float_X const protonNumber + = traits::GetAtomicNumbers::type::numberOfProtons; + float_X const boundElectrons = particleSrc[boundElectrons_]; + float_X const freeElectrons = protonNumber - boundElectrons; + particleDest[weighting_] *= freeElectrons; + } + }; + } // namespace acc - /** Re-scale the weighting of a cloned species by numberOfProtons - ... - * - * When deriving species from each other, the new - * species "inherits" the macro-particle weighting - * of the first one. - * This functor can be used to manipulate the weighting - * of the new species' macro particles to be a multiplied by - * the number of protons of the initial species. - * - * As an example, this is useful when initializing a quasi-neutral, - * pre-ionized plasma of ions and electrons. Electrons can be created - * from ions via deriving and increasing their weight to avoid simulating - * multiple macro electrons per macro ion (with Z>1). - * - * note: needs the atomicNumbers flag on the initial species, - * used by the GetAtomicNumbers trait. - */ - using UnboundElectronsTimesWeighting = generic::Free< acc::UnboundElectronsTimesWeighting >; + /** Re-scale the weighting of a cloned species by numberOfProtons - ... + * + * When deriving species from each other, the new + * species "inherits" the macro-particle weighting + * of the first one. + * This functor can be used to manipulate the weighting + * of the new species' macro particles to be a multiplied by + * the number of protons of the initial species. + * + * As an example, this is useful when initializing a quasi-neutral, + * pre-ionized plasma of ions and electrons. Electrons can be created + * from ions via deriving and increasing their weight to avoid simulating + * multiple macro electrons per macro ion (with Z>1). + * + * note: needs the atomicNumbers flag on the initial species, + * used by the GetAtomicNumbers trait. + */ + using UnboundElectronsTimesWeighting = generic::Free; -} // namespace binary -} // namespace manipulators -} // namespace particles + } // namespace binary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/generic/Free.def b/include/picongpu/particles/manipulators/generic/Free.def index 4ab31c5725..cfc40074a4 100644 --- a/include/picongpu/particles/manipulators/generic/Free.def +++ b/include/picongpu/particles/manipulators/generic/Free.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,41 +22,40 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace generic -{ - - /** call simple free user defined manipulators - * - * @tparam T_Functor user defined manipulators - * **optional**: can implement **one** host side constructor - * `T_Functor()` or `T_Functor(uint32_t currentTimeStep)` - * - * example for `particle.param`: set in cell position to zero - * @code{.cpp} - * - * struct FunctorInCellPositionZero - * { - * template< typename T_Particle > - * HDINLINE void operator()( T_Particle & particle ) - * { - * particle[ position_ ] = floatD_X::create( 0.0 ); - * } - * static constexpr char const * name = "inCellPositionZero"; - * }; - * - * using InCellPositionZero = generic::Free< - * FunctorInCellPositionZero - * >; - * @endcode - */ - template< typename T_Functor > - struct Free; + namespace particles + { + namespace manipulators + { + namespace generic + { + /** call simple free user defined manipulators + * + * @tparam T_Functor user defined manipulators + * **optional**: can implement **one** host side constructor + * `T_Functor()` or `T_Functor(uint32_t currentTimeStep)` + * + * example for `particle.param`: set in cell position to zero + * @code{.cpp} + * + * struct FunctorInCellPositionZero + * { + * template< typename T_Particle > + * HDINLINE void operator()( T_Particle & particle ) + * { + * particle[ position_ ] = floatD_X::create( 0.0 ); + * } + * static constexpr char const * name = "inCellPositionZero"; + * }; + * + * using InCellPositionZero = generic::Free< + * FunctorInCellPositionZero + * >; + * @endcode + */ + template + struct Free; -} // namespace generic -} // namespace manipulators -} // namespace particles + } // namespace generic + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/generic/Free.hpp b/include/picongpu/particles/manipulators/generic/Free.hpp index 6f76bb78cf..6dc0e1ca98 100644 --- a/include/picongpu/particles/manipulators/generic/Free.hpp +++ b/include/picongpu/particles/manipulators/generic/Free.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Axel Huebl +/* Copyright 2013-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -28,106 +28,88 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace generic -{ -namespace acc -{ - /** wrapper for the user manipulator functor on the accelerator - * - * @tparam T_Functor user defined manipulators - */ - template< typename T_Functor > - struct Free : private T_Functor + namespace particles { - //! type of the user manipulators - using Functor = T_Functor; - - //! store user manipulators instance - HDINLINE Free( Functor const & manipulators ) : - Functor( manipulators ) - { - } - - /** execute the user manipulator functor - * - * @tparam T_Args type of the arguments passed to the user manipulator functor - * - * @param args arguments passed to the user functor - */ - template< - typename T_Acc, - typename ... T_Args > - HDINLINE - void operator( )( - T_Acc const &, - T_Args && ... args - ) + namespace manipulators { - Functor::operator( )( args ... ); - } - }; -} // namespace acc + namespace generic + { + namespace acc + { + /** wrapper for the user manipulator functor on the accelerator + * + * @tparam T_Functor user defined manipulators + */ + template + struct Free : private T_Functor + { + //! type of the user manipulators + using Functor = T_Functor; - template< typename T_Functor > - struct Free : protected functor::User< T_Functor > - { + //! store user manipulators instance + HDINLINE Free(Functor const& manipulators) : Functor(manipulators) + { + } - using Functor = functor::User< T_Functor >; + /** execute the user manipulator functor + * + * @tparam T_Args type of the arguments passed to the user manipulator functor + * + * @param args arguments passed to the user functor + */ + template + HDINLINE void operator()(T_Acc const&, T_Args&&... args) + { + Functor::operator()(args...); + } + }; + } // namespace acc - template< typename T_SpeciesType > - struct apply - { - using type = Free; - }; + template + struct Free : protected functor::User + { + using Functor = functor::User; - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE Free( uint32_t currentStep ) : Functor( currentStep ) - { - } + template + struct apply + { + using type = Free; + }; - /** create device manipulator functor - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param offset (in supercells, without any guards) to the - * origin of the local domain - * @param configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::Free< Functor > - operator()( - T_Acc const &, - DataSpace< simDim > const &, - T_WorkerCfg const & - ) const - { - return acc::Free< Functor >( *static_cast< Functor const * >( this ) ); - } + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE Free(uint32_t currentStep) : Functor(currentStep) + { + } - //! get the name of the functor - static - HINLINE std::string - getName( ) - { - // we provide the name from the param class - return Functor::name; - } + /** create device manipulator functor + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param offset (in supercells, without any guards) to the + * origin of the local domain + * @param configuration of the worker + */ + template + HDINLINE acc::Free operator()(T_Acc const&, DataSpace const&, T_WorkerCfg const&) + const + { + return acc::Free(*static_cast(this)); + } - }; + //! get the name of the functor + static HINLINE std::string getName() + { + // we provide the name from the param class + return Functor::name; + } + }; -} // namespace generic -} // namespace manipulators -} // namespace particles + } // namespace generic + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/generic/FreeRng.def b/include/picongpu/particles/manipulators/generic/FreeRng.def index e11c8c2b4e..3914986810 100644 --- a/include/picongpu/particles/manipulators/generic/FreeRng.def +++ b/include/picongpu/particles/manipulators/generic/FreeRng.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,51 +27,47 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace generic -{ - - /** call simple free user defined functor and provide a random number generator - * - * - * @tparam T_Functor user defined unary functor - * @tparam T_Distribution pmacc::random::distributions, random number distribution - * - * example for `particle.param`: add - * @code{.cpp} - * #include - * - * struct FunctorRandomX - * { - * template< typename T_Rng, typename T_Particle > - * HDINLINE void operator()( T_Rng& rng, T_Particle& particle ) - * { - * particle[ position_ ].x() = rng(); - * } - * static constexpr char const * name = "randomXPos"; - * }; - * - * using RandomXPos = generic::FreeRng< - * FunctorRandomX, - * pmacc::random::distributions::Uniform< float_X > - * >; - * @endcode - * - * and to `InitPipeline` in `speciesInitialization.param`: - * @code{.cpp} - * Manipulate< manipulators::RandomXPos, SPECIES_NAME > - * @endcode - */ - template< - typename T_Functor, - typename T_Distribution - > - struct FreeRng; + namespace particles + { + namespace manipulators + { + namespace generic + { + /** call simple free user defined functor and provide a random number generator + * + * + * @tparam T_Functor user defined unary functor + * @tparam T_Distribution pmacc::random::distributions, random number distribution + * + * example for `particle.param`: add + * @code{.cpp} + * #include + * + * struct FunctorRandomX + * { + * template< typename T_Rng, typename T_Particle > + * HDINLINE void operator()( T_Rng& rng, T_Particle& particle ) + * { + * particle[ position_ ].x() = rng(); + * } + * static constexpr char const * name = "randomXPos"; + * }; + * + * using RandomXPos = generic::FreeRng< + * FunctorRandomX, + * pmacc::random::distributions::Uniform< float_X > + * >; + * @endcode + * + * and to `InitPipeline` in `speciesInitialization.param`: + * @code{.cpp} + * Manipulate< manipulators::RandomXPos, SPECIES_NAME > + * @endcode + */ + template + struct FreeRng; -} // namespace generic -} // namespace manipulators -} // namespace particles + } // namespace generic + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/generic/FreeRng.hpp b/include/picongpu/particles/manipulators/generic/FreeRng.hpp index adf5954b43..7937e33171 100644 --- a/include/picongpu/particles/manipulators/generic/FreeRng.hpp +++ b/include/picongpu/particles/manipulators/generic/FreeRng.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Alexander Grund +/* Copyright 2015-2021 Rene Widera, Alexander Grund * * This file is part of PIConGPU. * @@ -31,157 +31,103 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace generic -{ -namespace acc -{ - template< - typename T_Functor, - typename T_RngType - > - struct FreeRng : private T_Functor - { - - using Functor = T_Functor; - using RngType = T_RngType; - - HDINLINE FreeRng( - Functor const & functor, - RngType const & rng - ) : - T_Functor( functor ), m_rng( rng ) - { - } - - /** call user functor - * - * The random number generator is initialized with the first call. - * - * @tparam T_Particle type of the particle to manipulate - * @tparam T_Args type of the arguments passed to the user functor - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param particle particle which is given to the user functor - * @return void is used to enable the operator if the user functor except two arguments - */ - template< - typename T_Particle, - typename ... T_Args, - typename T_Acc - > - HDINLINE - void operator()( - T_Acc const &, - T_Particle& particle, - T_Args && ... args - ) - { - namespace nvrng = nvidia::rng; - - Functor::operator()( - m_rng, - particle, - args ... - ); - } - - private: - - RngType m_rng; - }; -} // namespace acc - - template< - typename T_Functor, - typename T_Distribution - > - struct FreeRng : - protected functor::User< T_Functor >, - private picongpu::particles::functor::misc::Rng< - T_Distribution - > + namespace particles { - - template< typename T_SpeciesType > - struct apply - { - using type = FreeRng; - }; - - using RngGenerator = picongpu::particles::functor::misc::Rng< - T_Distribution - >; - - using RngType = typename RngGenerator::RandomGen; - - using Functor = functor::User< T_Functor >; - using Distribution = T_Distribution; - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE FreeRng( uint32_t currentStep ) : - Functor( currentStep ), - RngGenerator( currentStep ) - { - } - - /** create functor for the accelerator - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset offset (in superCells, without any guards) relative - * to the origin of the local domain - * @param workerCfg configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE auto - operator()( - T_Acc const & acc, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & workerCfg - ) const - -> acc::FreeRng< - Functor, - RngType - > - { - RngType const rng = ( *static_cast< RngGenerator const * >( this ) )( - acc, - localSupercellOffset, - workerCfg - ); - - return acc::FreeRng< - Functor, - RngType - >( - *static_cast< Functor const * >( this ), - rng - ); - } - - static - HINLINE std::string - getName( ) + namespace manipulators { - // we provide the name from the param class - return Functor::name; - } - }; - -} // namespace generic -} // namespace manipulators -} // namespace particles + namespace generic + { + namespace acc + { + template + struct FreeRng : private T_Functor + { + using Functor = T_Functor; + using RngType = T_RngType; + + HDINLINE FreeRng(Functor const& functor, RngType const& rng) : T_Functor(functor), m_rng(rng) + { + } + + /** call user functor + * + * The random number generator is initialized with the first call. + * + * @tparam T_Particle type of the particle to manipulate + * @tparam T_Args type of the arguments passed to the user functor + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param particle particle which is given to the user functor + * @return void is used to enable the operator if the user functor except two arguments + */ + template + HDINLINE void operator()(T_Acc const&, T_Particle& particle, T_Args&&... args) + { + Functor::operator()(m_rng, particle, args...); + } + + private: + RngType m_rng; + }; + } // namespace acc + + template + struct FreeRng + : protected functor::User + , private picongpu::particles::functor::misc::Rng + { + template + struct apply + { + using type = FreeRng; + }; + + using RngGenerator = picongpu::particles::functor::misc::Rng; + + using RngType = typename RngGenerator::RandomGen; + + using Functor = functor::User; + using Distribution = T_Distribution; + + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE FreeRng(uint32_t currentStep) : Functor(currentStep), RngGenerator(currentStep) + { + } + + /** create functor for the accelerator + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset offset (in superCells, without any guards) relative + * to the origin of the local domain + * @param workerCfg configuration of the worker + */ + template + HDINLINE auto operator()( + T_Acc const& acc, + DataSpace const& localSupercellOffset, + T_WorkerCfg const& workerCfg) const -> acc::FreeRng + { + RngType const rng + = (*static_cast(this))(acc, localSupercellOffset, workerCfg); + + return acc::FreeRng(*static_cast(this), rng); + } + + static HINLINE std::string getName() + { + // we provide the name from the param class + return Functor::name; + } + }; + + } // namespace generic + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/generic/None.def b/include/picongpu/particles/manipulators/generic/None.def index 3ada06dd0a..272aba07e6 100644 --- a/include/picongpu/particles/manipulators/generic/None.def +++ b/include/picongpu/particles/manipulators/generic/None.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -22,35 +22,32 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace generic -{ -namespace acc -{ - struct None + namespace particles { - template< typename ... T_Args > - HDINLINE void - operator( )( T_Args && ... ) + namespace manipulators { - } + namespace generic + { + namespace acc + { + struct None + { + template + HDINLINE void operator()(T_Args&&...) + { + } - static constexpr char const * name = "None"; - }; -} // namespace acc + static constexpr char const* name = "None"; + }; + } // namespace acc - /** do nothing with the particle - * - * The call of this functor results in an empty operation - */ - using None = Free< - acc::None - >; + /** do nothing with the particle + * + * The call of this functor results in an empty operation + */ + using None = Free; -} // namespace generic -} // namespace manipulators -} // namespace particles + } // namespace generic + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/manipulators.def b/include/picongpu/particles/manipulators/manipulators.def index 0e0e2b012f..148b0a7459 100644 --- a/include/picongpu/particles/manipulators/manipulators.def +++ b/include/picongpu/particles/manipulators/manipulators.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Axel Huebl +/* Copyright 2014-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/manipulators/manipulators.hpp b/include/picongpu/particles/manipulators/manipulators.hpp index 9466f70b69..bf4ac3e714 100644 --- a/include/picongpu/particles/manipulators/manipulators.hpp +++ b/include/picongpu/particles/manipulators/manipulators.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Axel Huebl +/* Copyright 2014-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/manipulators/unary/CopyAttribute.def b/include/picongpu/particles/manipulators/unary/CopyAttribute.def index 97a3fbf722..9b83c48e80 100644 --- a/include/picongpu/particles/manipulators/unary/CopyAttribute.def +++ b/include/picongpu/particles/manipulators/unary/CopyAttribute.def @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -24,68 +24,50 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace unary -{ -namespace acc -{ - - /** helper functor to copy a particle source attribute to a destination attribute - * - * @tparam T_DestAttribute type of the destination attribute e.g. `momentumPrev1` - * @tparam T_SrcAttribute type of the source attribute e.g. `momentum` - */ - template< - typename T_DestAttribute, - typename T_SrcAttribute - > - struct CopyAttribute + namespace particles { - /** copy attribute - * - * @tparam T_Particle pmacc::Particle, particle type - * @tparam T_Args pmacc::Particle, arbitrary number of particles types - * - * @param particle particle to be manipulated - * @param ... unused particles - */ - template< - typename T_Particle, - typename ... T_Args - > - HDINLINE void operator( )( - T_Particle & particle, - T_Args && ... - ) + namespace manipulators { - particle[ T_DestAttribute{ } ] = particle[ T_SrcAttribute{ } ]; - } - }; + namespace unary + { + namespace acc + { + /** helper functor to copy a particle source attribute to a destination attribute + * + * @tparam T_DestAttribute type of the destination attribute e.g. `momentumPrev1` + * @tparam T_SrcAttribute type of the source attribute e.g. `momentum` + */ + template + struct CopyAttribute + { + /** copy attribute + * + * @tparam T_Particle pmacc::Particle, particle type + * @tparam T_Args pmacc::Particle, arbitrary number of particles types + * + * @param particle particle to be manipulated + * @param ... unused particles + */ + template + HDINLINE void operator()(T_Particle& particle, T_Args&&...) + { + particle[T_DestAttribute{}] = particle[T_SrcAttribute{}]; + } + }; -} // namespace acc + } // namespace acc - /** copy a particle source attribute to a destination attribute - * - * This is an unary functor and operates on one particle. - * - * @tparam T_DestAttribute type of the destination attribute e.g. `momentumPrev1` - * @tparam T_SrcAttribute type of the source attribute e.g. `momentum` - */ - template< - typename T_DestAttribute, - typename T_SrcAttribute - > - using CopyAttribute = generic::Free< - acc::CopyAttribute< - T_DestAttribute, - T_SrcAttribute - > - >; + /** copy a particle source attribute to a destination attribute + * + * This is an unary functor and operates on one particle. + * + * @tparam T_DestAttribute type of the destination attribute e.g. `momentumPrev1` + * @tparam T_SrcAttribute type of the source attribute e.g. `momentum` + */ + template + using CopyAttribute = generic::Free>; -} // namespace unary -} // namespace manipulators -} // namespace particles + } // namespace unary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/unary/Drift.def b/include/picongpu/particles/manipulators/unary/Drift.def index 2f1adcaf0c..106997c130 100644 --- a/include/picongpu/particles/manipulators/unary/Drift.def +++ b/include/picongpu/particles/manipulators/unary/Drift.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -25,64 +25,49 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace unary -{ -namespace param -{ - CONST_VECTOR( - float_X, - 3, - DriftNegative_direction, - -1.0, - 0.0, - 0.0 - ); - - //! configuration for the unary manipulator functor Drift - struct DriftCfg + namespace particles { - /** Initial particle drift velocity for electrons and ions - * Examples: - * - No drift is equal to 1.0 - * unit: none - */ - static constexpr float_64 gamma = 1.021; - DriftNegative_direction_t const direction; - }; -} // namespace param + namespace manipulators + { + namespace unary + { + namespace param + { + CONST_VECTOR(float_X, 3, DriftNegative_direction, -1.0, 0.0, 0.0); -namespace acc -{ - template< - typename T_ParamClass, - typename T_ValueFunctor - > - struct Drift; -} // namespace acc + //! configuration for the unary manipulator functor Drift + struct DriftCfg + { + /** Initial particle drift velocity for electrons and ions + * Examples: + * - No drift is equal to 1.0 + * unit: none + */ + static constexpr float_64 gamma = 1.021; + DriftNegative_direction_t const direction; + }; + } // namespace param + + namespace acc + { + template + struct Drift; + } // namespace acc - /** change particle's momentum based on speed - * - * allow to manipulate a speed to a particle - * - * @tparam T_ParamClass param::DriftCfg, configuration parameter - * @tparam T_ValueFunctor pmacc::nvidia::functors::*, binary functor type to manipulate the momentum attribute - */ - template< - typename T_ParamClass = param::DriftCfg, - typename T_ValueFunctor = pmacc::nvidia::functors::Add - > - using Drift = generic::Free< - acc::Drift< - T_ParamClass, - T_ValueFunctor - > - >; + /** change particle's momentum based on speed + * + * allow to manipulate a speed to a particle + * + * @tparam T_ParamClass param::DriftCfg, configuration parameter + * @tparam T_ValueFunctor pmacc::nvidia::functors::*, binary functor type to manipulate the momentum + * attribute + */ + template< + typename T_ParamClass = param::DriftCfg, + typename T_ValueFunctor = pmacc::nvidia::functors::Add> + using Drift = generic::Free>; -} // namespace unary -} // namespace manipulators -} // namespace particles + } // namespace unary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/unary/Drift.hpp b/include/picongpu/particles/manipulators/unary/Drift.hpp index 0c29a83e6a..6fdb01f906 100644 --- a/include/picongpu/particles/manipulators/unary/Drift.hpp +++ b/include/picongpu/particles/manipulators/unary/Drift.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -25,76 +25,58 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace unary -{ -namespace acc -{ - - /** manipulate the speed - * - * @tparam T_ParamClass picongpu::particles::manipulators::unary::param::DriftCfg, - * type with compile configuration - * @tparam T_ValueFunctor pmacc::nvidia::functors, binary operator type to reduce current and new value - */ - template< - typename T_ParamClass, - typename T_ValueFunctor - > - struct Drift : private T_ValueFunctor + namespace particles { - /** manipulate the speed of the particle - * - * @tparam T_Particle pmacc::Particle, particle type - * @tparam T_Args pmacc::Particle, arbitrary number of particles types - * - * @param particle particle to be manipulated - * @param ... unused particles - */ - template< - typename T_Particle, - typename ... T_Args - > - HDINLINE void operator()( - T_Particle & particle, - T_Args && ... - ) + namespace manipulators { - using ParamClass = T_ParamClass; - using ValueFunctor = T_ValueFunctor; + namespace unary + { + namespace acc + { + /** manipulate the speed + * + * @tparam T_ParamClass picongpu::particles::manipulators::unary::param::DriftCfg, + * type with compile configuration + * @tparam T_ValueFunctor pmacc::nvidia::functors, binary operator type to reduce current and new + * value + */ + template + struct Drift : private T_ValueFunctor + { + /** manipulate the speed of the particle + * + * @tparam T_Particle pmacc::Particle, particle type + * @tparam T_Args pmacc::Particle, arbitrary number of particles types + * + * @param particle particle to be manipulated + * @param ... unused particles + */ + template + HDINLINE void operator()(T_Particle& particle, T_Args&&...) + { + using ParamClass = T_ParamClass; + using ValueFunctor = T_ValueFunctor; - float_X const macroWeighting = particle[ weighting_ ]; - float_X const macroMass = attribute::getMass( - macroWeighting, - particle - ); + float_X const macroWeighting = particle[weighting_]; + float_X const macroMass = attribute::getMass(macroWeighting, particle); - float_64 const myGamma = ParamClass::gamma; + float_64 const myGamma = ParamClass::gamma; - float_64 const initFreeBeta = - math::sqrt( 1.0 - - 1.0 / ( myGamma * myGamma) ); + float_64 const initFreeBeta = math::sqrt(1.0 - 1.0 / (myGamma * myGamma)); - float3_X const driftDirection( ParamClass( ).direction ); - float3_X const normDir = driftDirection / math::abs( driftDirection ); + float3_X const driftDirection(ParamClass().direction); + float3_X const normDir = driftDirection / math::abs(driftDirection); - float3_X const mom( normDir * - float_X( - myGamma * initFreeBeta * - float_64( macroMass ) * - float_64( SPEED_OF_LIGHT ) - ) - ); + float3_X const mom( + normDir + * float_X(myGamma * initFreeBeta * float_64(macroMass) * float_64(SPEED_OF_LIGHT))); - ValueFunctor::operator( )( particle[ momentum_ ], mom ); - } - }; + ValueFunctor::operator()(particle[momentum_], mom); + } + }; -} // namespace acc -} // namespace unary -} // namespace manipulators -} // namespace particles + } // namespace acc + } // namespace unary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/unary/FreeTotalCellOffset.def b/include/picongpu/particles/manipulators/unary/FreeTotalCellOffset.def index ce55688216..ea9da28478 100644 --- a/include/picongpu/particles/manipulators/unary/FreeTotalCellOffset.def +++ b/include/picongpu/particles/manipulators/unary/FreeTotalCellOffset.def @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera, Axel Huebl +/* Copyright 2017-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -27,44 +27,44 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace unary -{ - /** call simple free user defined manipulators and provide the cell information - * - * The functor passes the cell offset of the particle relative to the total - * domain origin into the functor. - * - * @tparam T_Functor user defined unary functor - * - * example for `particle.param`: set a user-defined species attribute y0 - * (type: uint32_t) to the current total y-cell index - * @code{.cpp} - * struct FunctorSaveYcell - * { - * template< typename T_Particle > - * HDINLINE void operator()( - * DataSpace< simDim > const & particleOffsetToTotalOrigin, - * T_Particle & particle - * ) - * { - * particle[ y0_ ] = particleOffsetToTotalOrigin.y(); - * } - * static constexpr char const * name = "saveYcell"; - * }; - * - * using SaveYcell = unary::FreeTotalCellOffset< - * FunctorSaveYcell - * >; - * @endcode - */ - template< typename T_Functor > - struct FreeTotalCellOffset; + namespace particles + { + namespace manipulators + { + namespace unary + { + /** call simple free user defined manipulators and provide the cell information + * + * The functor passes the cell offset of the particle relative to the total + * domain origin into the functor. + * + * @tparam T_Functor user defined unary functor + * + * example for `particle.param`: set a user-defined species attribute y0 + * (type: uint32_t) to the current total y-cell index + * @code{.cpp} + * struct FunctorSaveYcell + * { + * template< typename T_Particle > + * HDINLINE void operator()( + * DataSpace< simDim > const & particleOffsetToTotalOrigin, + * T_Particle & particle + * ) + * { + * particle[ y0_ ] = particleOffsetToTotalOrigin.y(); + * } + * static constexpr char const * name = "saveYcell"; + * }; + * + * using SaveYcell = unary::FreeTotalCellOffset< + * FunctorSaveYcell + * >; + * @endcode + */ + template + struct FreeTotalCellOffset; -} // namespace unary -} // namespace manipulators -} // namespace particles + } // namespace unary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/unary/FreeTotalCellOffset.hpp b/include/picongpu/particles/manipulators/unary/FreeTotalCellOffset.hpp index e56bfa7485..3d1ee36c25 100644 --- a/include/picongpu/particles/manipulators/unary/FreeTotalCellOffset.hpp +++ b/include/picongpu/particles/manipulators/unary/FreeTotalCellOffset.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera, Axel Huebl +/* Copyright 2017-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -28,133 +28,106 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace unary -{ -namespace acc -{ - template< typename T_Functor > - struct FreeTotalCellOffset : private T_Functor + namespace particles { - - using Functor = T_Functor; - - HDINLINE FreeTotalCellOffset( - Functor const & functor, - DataSpace< simDim > const & superCellToLocalOriginCellOffset - ) : - T_Functor( functor ), - m_superCellToLocalOriginCellOffset( superCellToLocalOriginCellOffset ) + namespace manipulators { - } - - /** call user functor - * - * The random number generator is initialized with the first call. - * - * @tparam T_Particle type of the particle to manipulate - * @tparam T_Args type of the arguments passed to the user functor - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param particle particle which is given to the user functor - * @return void is used to enable the operator if the user functor expects two arguments - */ - template< - typename T_Particle, - typename T_Acc - > - HDINLINE - void operator()( - T_Acc const &, - T_Particle & particle - ) - { - DataSpace< simDim > const cellInSuperCell( - DataSpaceOperations< simDim >::template map< SuperCellSize >( particle[ localCellIdx_ ] ) - ); - Functor::operator( )( - m_superCellToLocalOriginCellOffset + cellInSuperCell, - particle - ); - } - - private: - - DataSpace< simDim > const m_superCellToLocalOriginCellOffset; - }; -} // namespace acc - - template< typename T_Functor > - struct FreeTotalCellOffset : - protected functor::User< T_Functor >, - private functor::misc::TotalCellOffset - { - using CellOffsetFunctor = functor::misc::TotalCellOffset; - using Functor = functor::User< T_Functor >; - - template< typename T_SpeciesType > - struct apply - { - using type = FreeTotalCellOffset; - }; - - /** constructor - * - * @param currentStep current simulation time step - */ - HINLINE FreeTotalCellOffset( uint32_t currentStep ) : - Functor( currentStep ), - CellOffsetFunctor( currentStep ) - { - } - - /** create functor for the accelerator - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset offset (in superCells, without any guards) relative - * to the origin of the local domain - * @param workerCfg configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE auto - operator()( - T_Acc const & acc, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & workerCfg - ) const - -> acc::FreeTotalCellOffset< Functor > - { - auto & cellOffsetFunctor = *static_cast< CellOffsetFunctor const * >( this ); - return acc::FreeTotalCellOffset< Functor >( - *static_cast< Functor const * >( this ), - cellOffsetFunctor( - acc, - localSupercellOffset, - workerCfg - ) - ); - } - - static - HINLINE std::string - getName( ) - { - // we provide the name from the param class - return Functor::name; - } - }; - -} // namespace unary -} // namespace manipulators -} // namespace particles + namespace unary + { + namespace acc + { + template + struct FreeTotalCellOffset : private T_Functor + { + using Functor = T_Functor; + + HDINLINE FreeTotalCellOffset( + Functor const& functor, + DataSpace const& superCellToLocalOriginCellOffset) + : T_Functor(functor) + , m_superCellToLocalOriginCellOffset(superCellToLocalOriginCellOffset) + { + } + + /** call user functor + * + * The random number generator is initialized with the first call. + * + * @tparam T_Particle type of the particle to manipulate + * @tparam T_Args type of the arguments passed to the user functor + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param particle particle which is given to the user functor + * @return void is used to enable the operator if the user functor expects two arguments + */ + template + HDINLINE void operator()(T_Acc const&, T_Particle& particle) + { + DataSpace const cellInSuperCell( + DataSpaceOperations::template map(particle[localCellIdx_])); + Functor::operator()(m_superCellToLocalOriginCellOffset + cellInSuperCell, particle); + } + + private: + DataSpace const m_superCellToLocalOriginCellOffset; + }; + } // namespace acc + + template + struct FreeTotalCellOffset + : protected functor::User + , private functor::misc::TotalCellOffset + { + using CellOffsetFunctor = functor::misc::TotalCellOffset; + using Functor = functor::User; + + template + struct apply + { + using type = FreeTotalCellOffset; + }; + + /** constructor + * + * @param currentStep current simulation time step + */ + HINLINE FreeTotalCellOffset(uint32_t currentStep) + : Functor(currentStep) + , CellOffsetFunctor(currentStep) + { + } + + /** create functor for the accelerator + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset offset (in superCells, without any guards) relative + * to the origin of the local domain + * @param workerCfg configuration of the worker + */ + template + HDINLINE auto operator()( + T_Acc const& acc, + DataSpace const& localSupercellOffset, + T_WorkerCfg const& workerCfg) const -> acc::FreeTotalCellOffset + { + auto& cellOffsetFunctor = *static_cast(this); + return acc::FreeTotalCellOffset( + *static_cast(this), + cellOffsetFunctor(acc, localSupercellOffset, workerCfg)); + } + + static HINLINE std::string getName() + { + // we provide the name from the param class + return Functor::name; + } + }; + + } // namespace unary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/unary/RandomPosition.def b/include/picongpu/particles/manipulators/unary/RandomPosition.def index 91868f5047..f71f6b07da 100644 --- a/include/picongpu/particles/manipulators/unary/RandomPosition.def +++ b/include/picongpu/particles/manipulators/unary/RandomPosition.def @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -28,71 +28,60 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace unary -{ -namespace acc -{ - - /** set the particle attribute position - * - * The particle attribute position is overwritten with a random - * in-cell position. - */ - struct RandomPosition + namespace particles { - /** set in-cell position - * - * @tparam T_Rng pmacc::nvidia::rng::RNG, type of the random number generator - * @tparam T_Particle pmacc::Particle, particle type - * @tparam T_Args pmacc::Particle, arbitrary number of particles types - * - * @param rng random number generator - * @param particle particle to be manipulated - * @param ... unused particles - */ - template< - typename T_Rng, - typename T_Particle, - typename ... T_Args - > - HDINLINE void operator()( - T_Rng & rng, - T_Particle & particle, - T_Args && ... - ) + namespace manipulators { - floatD_X tmpPos; + namespace unary + { + namespace acc + { + /** set the particle attribute position + * + * The particle attribute position is overwritten with a random + * in-cell position. + */ + struct RandomPosition + { + /** set in-cell position + * + * @tparam T_Rng functor::misc::RngWrapper, type of the random number generator + * @tparam T_Particle pmacc::Particle, particle type + * @tparam T_Args pmacc::Particle, arbitrary number of particles types + * + * @param rng random number generator + * @param particle particle to be manipulated + * @param ... unused particles + */ + template + HDINLINE void operator()(T_Rng& rng, T_Particle& particle, T_Args&&...) + { + floatD_X tmpPos; - for( uint32_t d = 0; d < simDim; ++d ) - tmpPos[ d ] = rng( ); + for(uint32_t d = 0; d < simDim; ++d) + tmpPos[d] = rng(); - particle[ position_ ] = tmpPos; - } - }; + particle[position_] = tmpPos; + } + }; -} // namespace acc + } // namespace acc - /** Change the in cell position - * - * This functor changes the in-cell position of a particle. - * The new in-cell position is uniformly distributed position between [0.0;1.0). - * - * example: add - * ``` - * particles::Manipulate - * ``` - * to `InitPipeline` in `speciesInitialization.param` - */ - using RandomPosition = generic::FreeRng< - acc::RandomPosition, - pmacc::random::distributions::Uniform< float_X > - >; -} // namespace unary -} // namespace manipulators -} // namespace particles + /** Change the in cell position + * + * This functor changes the in-cell position of a particle. + * The new in-cell position is uniformly distributed position between [0.0;1.0). + * + * example: add + * ``` + * particles::Manipulate + * ``` + * to `InitPipeline` in `speciesInitialization.param` + */ + using RandomPosition + = generic::FreeRng>; + } // namespace unary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/unary/Temperature.def b/include/picongpu/particles/manipulators/unary/Temperature.def index 178f861668..a327b75825 100644 --- a/include/picongpu/particles/manipulators/unary/Temperature.def +++ b/include/picongpu/particles/manipulators/unary/Temperature.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -24,59 +24,47 @@ #include #include -#include - namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace unary -{ -namespace acc -{ - template< - typename T_ParamClass, - typename T_ValueFunctor - > - struct Temperature; -} // namespace acc - -namespace param -{ - //! configuration for the unary manipulator functor Temperature - struct TemperatureCfg + namespace particles { - /** Initial temperature - * unit: keV - */ - static constexpr float_64 temperature = 0.0; - }; -} // namespace param + namespace manipulators + { + namespace unary + { + namespace acc + { + template + struct Temperature; + } // namespace acc + + namespace param + { + //! configuration for the unary manipulator functor Temperature + struct TemperatureCfg + { + /** Initial temperature + * unit: keV + */ + static constexpr float_64 temperature = 0.0; + }; + } // namespace param - /** change particle's momentum based on a temperature - * - * allow to change the temperature (randomly normal distributed) - * of a particle. - * - * @tparam T_ParamClass param::TemperatureCfg, configuration parameter - * @tparam T_ValueFunctor pmacc::nvidia::functors::*, binary functor type to manipulate the momentum attribute - */ - template< - typename T_ParamClass = param::TemperatureCfg, - typename T_ValueFunctor = pmacc::nvidia::functors::Add - > - using Temperature = generic::FreeRng< - acc::Temperature< - T_ParamClass, - T_ValueFunctor - >, - pmacc::random::distributions::Normal< float_X > - >; + /** Modify particle momentum based on temperature + * + * @tparam T_ParamClass param::TemperatureCfg, configuration parameter + * @tparam T_ValueFunctor pmacc::nvidia::functors::*, binary functor type to + * add a new momentum to an old one + */ + template< + typename T_ParamClass = param::TemperatureCfg, + typename T_ValueFunctor = pmacc::nvidia::functors::Add> + using Temperature = generic::FreeRng< + acc::Temperature, + pmacc::random::distributions::Normal>; -} // namespace unary -} // namespace manipulators -} // namespace particles + } // namespace unary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/manipulators/unary/Temperature.hpp b/include/picongpu/particles/manipulators/unary/Temperature.hpp index 6d2c0a8fe4..8e608413ee 100644 --- a/include/picongpu/particles/manipulators/unary/Temperature.hpp +++ b/include/picongpu/particles/manipulators/unary/Temperature.hpp @@ -1,5 +1,5 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, - * Alexander Grund +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, + * Alexander Grund, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -25,93 +25,68 @@ namespace picongpu { -namespace particles -{ -namespace manipulators -{ -namespace unary -{ -namespace acc -{ - - /** manipulate the speed based on a temperature - * - * @tparam T_ParamClass picongpu::particles::manipulators::unary::param::TemperatureCfg, - * type with compile configuration - * @tparam T_ValueFunctor pmacc::nvidia::functors, binary operator type to reduce current and new value - */ - template< - typename T_ParamClass, - typename T_ValueFunctor - > - struct Temperature : private T_ValueFunctor + namespace particles { - /** manipulate the speed of the particle - * - * @tparam T_Rng pmacc::nvidia::rng::RNG, type of the random number generator - * @tparam T_Particle pmacc::Particle, particle type - * @tparam T_Args pmacc::Particle, arbitrary number of particles types - * - * @param rng random number generator - * @param particle particle to be manipulated - * @param ... unused particles - */ - template< - typename T_Rng, - typename T_Particle, - typename ... T_Args - > - HDINLINE void operator()( - T_Rng & rng, - T_Particle & particle, - T_Args && ... - ) + namespace manipulators { - using ParamClass = T_ParamClass; - - const float3_X tmpRand = float3_X( - rng(), - rng(), - rng() - ); - float_X const macroWeighting = particle[ weighting_ ]; - - float_X const energy = ( ParamClass::temperature * UNITCONV_keV_to_Joule ) / UNIT_ENERGY; - - // since energy is related to one particle - // and our units are normalized for macro particle quanities - // energy is quite small - float_X const macroEnergy = macroWeighting * energy; - // non-rel, MW: - // p = m * v - // v ~ sqrt(k*T/m), k*T = E - // => p = sqrt(m) - // - // Note on macro particle energies, with weighting w: - // p_1 = m_1 * v - // v = v_1 = v_w - // p_w = p_1 * w - // E_w = E_1 * w - // Since masses, energies and momenta add up linear, we can - // just take w times the p_1. Take care, E means E_1 ! - // This goes to: - // p_w = w * p_1 = w * m_1 * sqrt( E / m_1 ) - // = sqrt( E * w^2 * m_1 ) - // = sqrt( E * w * m_w ) - // Which makes sense, since it means that we use a macroMass - // and a macroEnergy now. - float3_X const mom = tmpRand * ( float_X )math::sqrt( - precisionCast< sqrt_X >( - macroEnergy * - attribute::getMass(macroWeighting,particle) - ) - ); - T_ValueFunctor::operator( )( particle[ momentum_ ], mom ); - } - }; + namespace unary + { + namespace acc + { + /** Modify particle momentum based on temperature + * + * Generate a new random momentum distributed according to the given + * temperature and add it to the existing particle momentum. + * This functor is for the non-relativistic case only. + * In this case the new momentums follow the Maxwell-Boltzmann distribution. + * + * @tparam T_ParamClass picongpu::particles::manipulators::unary::param::TemperatureCfg, + * type with compile configuration + * @tparam T_ValueFunctor pmacc::nvidia::functors::*, binary functor type to + * add a new momentum to an old one + */ + template + struct Temperature : private T_ValueFunctor + { + /** manipulate the speed of the particle + * + * @tparam T_StandardNormalRng functor::misc::RngWrapper, standard + * normal random number generator type + * @tparam T_Particle pmacc::Particle, particle type + * @tparam T_Args pmacc::Particle, arbitrary number of particles types + * + * @param rng standard normal random number generator + * @param particle particle to be manipulated + * @param ... unused parameters + */ + template + HDINLINE void operator()( + T_StandardNormalRng& standardNormalRng, + T_Particle& particle, + T_Args&&...) + { + /* In the non-relativistic case, particle momentums are following + * the Maxwell-Boltzmann distribution: each component is + * independently normally distributed with zero mean and variance of + * m * k * T = m * E. + * For the macroweighted momentums we store as particle[ momentum_ ], + * the same relation holds, just m and E are also macroweighted + */ + float_X const energy = (T_ParamClass::temperature * UNITCONV_keV_to_Joule) / UNIT_ENERGY; + float_X const macroWeighting = particle[weighting_]; + float_X const macroEnergy = macroWeighting * energy; + float_X const macroMass = attribute::getMass(macroWeighting, particle); + float_X const standardDeviation + = static_cast(math::sqrt(precisionCast(macroEnergy * macroMass))); + float3_X const mom + = float3_X(standardNormalRng(), standardNormalRng(), standardNormalRng()) + * standardDeviation; + T_ValueFunctor::operator()(particle[momentum_], mom); + } + }; -} // namespace acc -} // namespace unary -} // namespace manipulators -} // namespace particles + } // namespace acc + } // namespace unary + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def b/include/picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def index a2ff179db9..11a56845f8 100644 --- a/include/picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def +++ b/include/picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Marco Garten * * This file is part of PIConGPU. @@ -47,189 +47,136 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ - template - class ComputeGridValuePerFrame + namespace particles { - public: - - using AssignmentFunction = typename T_ParticleShape::ChargeAssignment; - static constexpr int supp = AssignmentFunction::support; - - static constexpr int lowerMargin = supp / 2; - static constexpr int upperMargin = (supp + 1) / 2; - using LowerMargin = typename pmacc::math::CT::make_Int::type; - using UpperMargin = typename pmacc::math::CT::make_Int::type; - - HDINLINE ComputeGridValuePerFrame() + namespace particleToGrid { - } - - /** return unit for this solver - * - * @return solver unit - */ - HDINLINE float1_64 getUnit() const; - - /** return powers of the 7 base measures for this solver - * - * characterizing the unit of the result of the solver in SI - * (length L, mass M, time T, electric current I, - * thermodynamic temperature theta, amount of substance N, - * luminous intensity J) */ - HINLINE std::vector getUnitDimension() const; - - /** return name of the this solver - * @return name of solver - */ - HINLINE static - std::string getName(); - - template< - typename FrameType, - typename TVecSuperCell, - typename BoxTmp, - typename T_Acc > - DINLINE void operator()( - T_Acc const & acc, - FrameType& frame, - const int localIdx, - const TVecSuperCell superCell, - BoxTmp& tmpBox - ); - }; - -namespace detail -{ - /** Most derived fields just operate on the particle shape - * - * But some "debug" diagnostics just need the nearest cell, e.g. for - * counting, so we specialize such options here. - */ - template< - typename T_Species, - typename T_DerivedAttribute - > - struct GetAttributeShape - { - using type = typename GetShape< T_Species >::type; - }; - - template< typename T_Species > - struct GetAttributeShape< - T_Species, - derivedAttributes::Counter - > - { - using type = shapes::Counter; - }; - template< typename T_Species > - struct GetAttributeShape< - T_Species, - derivedAttributes::MacroCounter - > - { - using type = shapes::Counter; - }; - - template< - typename T_Species, - typename T_DerivedAttribute - > - using GetAttributeShape_t = typename GetAttributeShape< - T_Species, - T_DerivedAttribute - >::type; - -} // namespace detail - - /** Solver Operation for Particle to Grid Projections - * - * Derives a scalar field from a particle species at runtime. - * Values are mapped to cells according either according to the - * species' spatial shape or a specifically overwritten (counter) shape - * depending on the implementation of the derived attribute - * - * @tparam T_Species a see picongpu::Particles class with a species definition, - * see see speciesDefinition.param - * - * @tparam T_DerivedAttribute a derived particle attribute from - * picongpu::particles::particleToGrid::derivedAttributes - * - * @typedef defines a FieldTmpOperation class - */ - template< - typename T_Species, - typename T_DerivedAttribute - > - struct CreateFieldTmpOperation - { - using shapeType = detail::GetAttributeShape_t< - T_Species, - T_DerivedAttribute - >; - - using OperationPerFrame = ComputeGridValuePerFrame< - shapeType, - T_DerivedAttribute - >; - using type = FieldTmpOperation< - OperationPerFrame, - T_Species - >; - }; - template< - typename T_Species, - typename T_DerivedAttribute - > - using CreateFieldTmpOperation_t = typename CreateFieldTmpOperation< - T_Species, - T_DerivedAttribute - >::type; - - /** Create a list solvers for derived fields for eligible species - * - * Returns a list of FieldTmpOperation classes. - * - * @tparam T_SeqSpecies a sequence of particle species to check if they are - * eligible to derive the attribute T_DerivedAttribute - * from, also allows a single type instead of a sequence - * @tparam T_DerivedAttribute a derived attribute to map to the field grid, - * see defines in - * picongpu::particles::particleToGrid::derivedAttributes - */ - template< - typename T_SeqSpecies, - typename T_DerivedAttribute - > - struct CreateEligible - { - // wrap single arguments to sequence - using SeqSpecies = typename pmacc::ToSeq< T_SeqSpecies >::type; - using DerivedAttribute = T_DerivedAttribute; - - using type = typename traits::GenerateSolversIfSpeciesEligible< - CreateFieldTmpOperation< - bmpl::_1, - DerivedAttribute - >, - SeqSpecies, - DerivedAttribute - >::type; - }; - - template< - typename T_SeqSpecies, - typename T_DerivedAttribute - > - using CreateEligible_t = typename CreateEligible< - T_SeqSpecies, - T_DerivedAttribute - >::type; - -} // namespace particleToGrid -} // namespace particles + template + class ComputeGridValuePerFrame + { + public: + using AssignmentFunction = typename T_ParticleShape::ChargeAssignment; + static constexpr int supp = AssignmentFunction::support; + + static constexpr int lowerMargin = supp / 2; + static constexpr int upperMargin = (supp + 1) / 2; + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = typename pmacc::math::CT::make_Int::type; + + HDINLINE ComputeGridValuePerFrame() + { + } + + /** return unit for this solver + * + * @return solver unit + */ + HDINLINE float1_64 getUnit() const; + + /** return powers of the 7 base measures for this solver + * + * characterizing the unit of the result of the solver in SI + * (length L, mass M, time T, electric current I, + * thermodynamic temperature theta, amount of substance N, + * luminous intensity J) */ + HINLINE std::vector getUnitDimension() const; + + /** return name of the this solver + * @return name of solver + */ + HINLINE static std::string getName(); + + template + DINLINE void operator()( + T_Acc const& acc, + FrameType& frame, + const int localIdx, + const TVecSuperCell superCell, + BoxTmp& tmpBox); + }; + + namespace detail + { + /** Most derived fields just operate on the particle shape + * + * But some "debug" diagnostics just need the nearest cell, e.g. for + * counting, so we specialize such options here. + */ + template + struct GetAttributeShape + { + using type = typename GetShape::type; + }; + + template + struct GetAttributeShape + { + using type = shapes::Counter; + }; + template + struct GetAttributeShape + { + using type = shapes::Counter; + }; + + template + using GetAttributeShape_t = typename GetAttributeShape::type; + + } // namespace detail + + /** Solver Operation for Particle to Grid Projections + * + * Derives a scalar field from a particle species at runtime. + * Values are mapped to cells according either according to the + * species' spatial shape or a specifically overwritten (counter) shape + * depending on the implementation of the derived attribute + * + * @tparam T_Species a see picongpu::Particles class with a species definition, + * see see speciesDefinition.param + * + * @tparam T_DerivedAttribute a derived particle attribute from + * picongpu::particles::particleToGrid::derivedAttributes + * + * @typedef defines a FieldTmpOperation class + */ + template + struct CreateFieldTmpOperation + { + using shapeType = detail::GetAttributeShape_t; + + using OperationPerFrame = ComputeGridValuePerFrame; + using type = FieldTmpOperation; + }; + template + using CreateFieldTmpOperation_t = typename CreateFieldTmpOperation::type; + + /** Create a list solvers for derived fields for eligible species + * + * Returns a list of FieldTmpOperation classes. + * + * @tparam T_SeqSpecies a sequence of particle species to check if they are + * eligible to derive the attribute T_DerivedAttribute + * from, also allows a single type instead of a sequence + * @tparam T_DerivedAttribute a derived attribute to map to the field grid, + * see defines in + * picongpu::particles::particleToGrid::derivedAttributes + */ + template + struct CreateEligible + { + // wrap single arguments to sequence + using SeqSpecies = typename pmacc::ToSeq::type; + using DerivedAttribute = T_DerivedAttribute; + + using type = typename traits::GenerateSolversIfSpeciesEligible< + CreateFieldTmpOperation, + SeqSpecies, + DerivedAttribute>::type; + }; + + template + using CreateEligible_t = typename CreateEligible::type; + + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/ComputeGridValuePerFrame.hpp b/include/picongpu/particles/particleToGrid/ComputeGridValuePerFrame.hpp index 67f64cce58..ee73d64af2 100644 --- a/include/picongpu/particles/particleToGrid/ComputeGridValuePerFrame.hpp +++ b/include/picongpu/particles/particleToGrid/ComputeGridValuePerFrame.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -33,103 +33,96 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ - -template -HDINLINE float1_64 -ComputeGridValuePerFrame::getUnit() const -{ - return T_DerivedAttribute().getUnit(); -} - -template -HINLINE std::vector -ComputeGridValuePerFrame::getUnitDimension() const -{ - return T_DerivedAttribute().getUnitDimension(); -} - -template -HINLINE std::string -ComputeGridValuePerFrame::getName() -{ - return T_DerivedAttribute::getName(); -} - -template -template -DINLINE void -ComputeGridValuePerFrame::operator() -( - T_Acc const & acc, - FrameType& frame, - const int localIdx, - const TVecSuperCell superCell, - BoxTmp& tmpBox -) -{ - /* \todo in the future and if useful, the functor can be a parameter */ - T_DerivedAttribute particleAttribute; - - auto particle = frame[localIdx]; - - /* particle attributes: in-cell position and generic, derived attribute */ - const floatD_X pos = particle[position_]; - const auto particleAttr = particleAttribute( particle ); - - /** Shift to the cell the particle belongs to - * range of particleCell: [DataSpace::create(0), TVecSuperCell] - */ - const int particleCellIdx = particle[localCellIdx_]; - const DataSpace particleCell( - DataSpaceOperations::map( superCell, particleCellIdx ) - ); - auto fieldTmpShiftToParticle = tmpBox.shift(particleCell); - - /* loop around the particle's cell (according to shape) */ - const DataSpace lowMargin(LowerMargin().toRT()); - const DataSpace upMargin(UpperMargin().toRT()); - - const DataSpace marginSpace(upMargin + lowMargin + 1); - - const int numWriteCells = marginSpace.productOfComponents(); - - for (int i = 0; i < numWriteCells; ++i) + namespace particles { - /** for the current cell i the multi dimensional index currentCell is only positive: - * allowed range = [DataSpace::create(0), LowerMargin+UpperMargin] - */ - const DataSpace currentCell = DataSpaceOperations::map(marginSpace, i); - - /** calculate the offset between the current cell i with simDim index currentCell - * and the cell of the particle (particleCell) in cells - */ - const DataSpace offsetParticleCellToCurrentCell = currentCell - lowMargin; - - /** assign particle contribution component-wise to the lower left corner of - * the cell i - * \todo take care of non-yee cells - */ - float_X assign( 1.0 ); - for (uint32_t d = 0; d < simDim; ++d) - assign *= AssignmentFunction()(float_X(offsetParticleCellToCurrentCell[d]) - pos[d]); - - /** add contribution of the particle times the generic attribute - * to cell i - * note: the .x() is used because FieldTmp is a scalar field with only - * one "x" component - */ - atomicAdd( - &(fieldTmpShiftToParticle(offsetParticleCellToCurrentCell).x()), - assign * particleAttr, - ::alpaka::hierarchy::Threads{} - ); - } -} - -} // namespace particleToGrid -} // namespace particles + namespace particleToGrid + { + template + HDINLINE float1_64 ComputeGridValuePerFrame::getUnit() const + { + return T_DerivedAttribute().getUnit(); + } + + template + HINLINE std::vector ComputeGridValuePerFrame:: + getUnitDimension() const + { + return T_DerivedAttribute().getUnitDimension(); + } + + template + HINLINE std::string ComputeGridValuePerFrame::getName() + { + return T_DerivedAttribute::getName(); + } + + template + template + DINLINE void ComputeGridValuePerFrame::operator()( + T_Acc const& acc, + FrameType& frame, + const int localIdx, + const TVecSuperCell superCell, + BoxTmp& tmpBox) + { + /* \todo in the future and if useful, the functor can be a parameter */ + T_DerivedAttribute particleAttribute; + + auto particle = frame[localIdx]; + + /* particle attributes: in-cell position and generic, derived attribute */ + const floatD_X pos = particle[position_]; + const auto particleAttr = particleAttribute(particle); + + /** Shift to the cell the particle belongs to + * range of particleCell: [DataSpace::create(0), TVecSuperCell] + */ + const int particleCellIdx = particle[localCellIdx_]; + const DataSpace particleCell( + DataSpaceOperations::map(superCell, particleCellIdx)); + auto fieldTmpShiftToParticle = tmpBox.shift(particleCell); + + /* loop around the particle's cell (according to shape) */ + const DataSpace lowMargin(LowerMargin().toRT()); + const DataSpace upMargin(UpperMargin().toRT()); + + const DataSpace marginSpace(upMargin + lowMargin + 1); + + const int numWriteCells = marginSpace.productOfComponents(); + + for(int i = 0; i < numWriteCells; ++i) + { + /** for the current cell i the multi dimensional index currentCell is only positive: + * allowed range = [DataSpace::create(0), LowerMargin+UpperMargin] + */ + const DataSpace currentCell = DataSpaceOperations::map(marginSpace, i); + + /** calculate the offset between the current cell i with simDim index currentCell + * and the cell of the particle (particleCell) in cells + */ + const DataSpace offsetParticleCellToCurrentCell = currentCell - lowMargin; + + /** assign particle contribution component-wise to the lower left corner of + * the cell i + * \todo take care of non-yee cells + */ + float_X assign(1.0); + for(uint32_t d = 0; d < simDim; ++d) + assign *= AssignmentFunction()(float_X(offsetParticleCellToCurrentCell[d]) - pos[d]); + + /** add contribution of the particle times the generic attribute + * to cell i + * note: the .x() is used because FieldTmp is a scalar field with only + * one "x" component + */ + cupla::atomicAdd( + acc, + &(fieldTmpShiftToParticle(offsetParticleCellToCurrentCell).x()), + assign * particleAttr, + ::alpaka::hierarchy::Threads{}); + } + } + + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/BoundElectronDensity.def b/include/picongpu/particles/particleToGrid/derivedAttributes/BoundElectronDensity.def index c0b728b2ef..2741f64192 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/BoundElectronDensity.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/BoundElectronDensity.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -30,86 +30,70 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - /** Density of Bound Electrons Operation for Particle to Grid Projections - * - * Derives a scalar density field from a particle species at runtime. - * Each value is mapped per cell according to the species' spatial shape. - * - * @note only makes sense for partially ionized ions - */ - struct BoundElectronDensity + namespace particles { - - HDINLINE float1_64 - getUnit() const; - - HINLINE std::vector< float_64 > - getUnitDimension() const + namespace particleToGrid { - /* L, M, T, I, theta, N, J - * - * Density is in inverse cubic meter: m^-3 - * -> L^-3 - */ - std::vector< float_64 > unitDimension( 7, 0.0 ); - unitDimension.at( SIBaseUnits::length ) = -3.0; + namespace derivedAttributes + { + /** Density of Bound Electrons Operation for Particle to Grid Projections + * + * Derives a scalar density field from a particle species at runtime. + * Each value is mapped per cell according to the species' spatial shape. + * + * @note only makes sense for partially ionized ions + */ + struct BoundElectronDensity + { + HDINLINE float1_64 getUnit() const; - return unitDimension; - } + HINLINE std::vector getUnitDimension() const + { + /* L, M, T, I, theta, N, J + * + * Density is in inverse cubic meter: m^-3 + * -> L^-3 + */ + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::length) = -3.0; - static HINLINE - std::string - getName() - { - return "boundElectronDensity"; - } + return unitDimension; + } - /** Calculate a new attribute per particle - * - * Returns a new (on-the-fly calculated) attribute of a particle - * that can then be mapped to the cells the particle contributes to. - * This method is called on a per-thread basis (each thread of a block - * handles a particle of a frame). - * - * @tparam T_Particle particle in the frame - * @param particle particle in the frame - * - * @return new attribute for the particle (type @see T_AttributeType) - */ - template< class T_Particle > - DINLINE float_X - operator()( T_Particle& particle ) const; - }; -} // namespace derivedAttributes -} // namespace particleToGrid + static HINLINE std::string getName() + { + return "boundElectronDensity"; + } -namespace traits -{ - template< typename T_Species > - struct SpeciesEligibleForSolver< - T_Species, - particleToGrid::derivedAttributes::BoundElectronDensity - > - { - using FrameType = typename T_Species::FrameType; + /** Calculate a new attribute per particle + * + * Returns a new (on-the-fly calculated) attribute of a particle + * that can then be mapped to the cells the particle contributes to. + * This method is called on a per-thread basis (each thread of a block + * handles a particle of a frame). + * + * @tparam T_Particle particle in the frame + * @param particle particle in the frame + * + * @return new attribute for the particle (type @see T_AttributeType) + */ + template + DINLINE float_X operator()(T_Particle& particle) const; + }; + } // namespace derivedAttributes + } // namespace particleToGrid + + namespace traits + { + template + struct SpeciesEligibleForSolver + { + using FrameType = typename T_Species::FrameType; - using RequiredIdentifiers = MakeSeq_t< - weighting, - position<>, - boundElectrons - >; + using RequiredIdentifiers = MakeSeq_t, boundElectrons>; - using type = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - }; -} // namespace traits -} // namespace particles + using type = typename pmacc::traits::HasIdentifiers::type; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/BoundElectronDensity.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/BoundElectronDensity.hpp index 199d5af94b..8721f3caa2 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/BoundElectronDensity.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/BoundElectronDensity.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -25,35 +25,32 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - - HDINLINE float1_64 - BoundElectronDensity::getUnit() const + namespace particles { - constexpr float_64 UNIT_VOLUME = UNIT_LENGTH * UNIT_LENGTH * UNIT_LENGTH; - return particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE / UNIT_VOLUME; - } - - template< class T_Particle > - DINLINE float_X - BoundElectronDensity::operator()( T_Particle& particle ) const - { - // read existing attributes - float_X const weighting = particle[ weighting_ ]; - float_X const boundElectrons = particle[ boundElectrons_ ]; - - // calculate new attribute - float_X const boundElectronDensity = weighting * boundElectrons / - ( particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE * CELL_VOLUME ); - - return boundElectronDensity; - } -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + namespace particleToGrid + { + namespace derivedAttributes + { + HDINLINE float1_64 BoundElectronDensity::getUnit() const + { + constexpr float_64 UNIT_VOLUME = UNIT_LENGTH * UNIT_LENGTH * UNIT_LENGTH; + return particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE / UNIT_VOLUME; + } + + template + DINLINE float_X BoundElectronDensity::operator()(T_Particle& particle) const + { + // read existing attributes + float_X const weighting = particle[weighting_]; + float_X const boundElectrons = particle[boundElectrons_]; + + // calculate new attribute + float_X const boundElectronDensity = weighting * boundElectrons + / (particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE * CELL_VOLUME); + + return boundElectronDensity; + } + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/ChargeDensity.def b/include/picongpu/particles/particleToGrid/derivedAttributes/ChargeDensity.def index 8340d9299e..d6d2a0a7ac 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/ChargeDensity.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/ChargeDensity.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -33,99 +33,79 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - /** Charge Density Operation for Particle to Grid Projections - * - * Derives a scalar charge density field from a particle species at runtime. - * Each value is mapped per cell according to the species' spatial shape. - * - * @note for species that do not change their charge state, this is identical - * to the density times the (constant) particles' charge, - * @see CreateDensityOperation - */ - struct ChargeDensity + namespace particles { - - HDINLINE float1_64 - getUnit() const; - - HINLINE std::vector - getUnitDimension() const + namespace particleToGrid { - /* L, M, T, I, theta, N, J - * - * ChargeDensity is in Coulomb / cubic meter: Q / m^3 = A * s / m^3 - * -> L^-3 * T * I - */ - std::vector unitDimension( 7, 0.0 ); - unitDimension.at(SIBaseUnits::length) = -3.0; - unitDimension.at(SIBaseUnits::time) = 1.0; - unitDimension.at(SIBaseUnits::electricCurrent) = 1.0; - - return unitDimension; - } - - HINLINE static - std::string - getName() + namespace derivedAttributes + { + /** Charge Density Operation for Particle to Grid Projections + * + * Derives a scalar charge density field from a particle species at runtime. + * Each value is mapped per cell according to the species' spatial shape. + * + * @note for species that do not change their charge state, this is identical + * to the density times the (constant) particles' charge, + * @see CreateDensityOperation + */ + struct ChargeDensity + { + HDINLINE float1_64 getUnit() const; + + HINLINE std::vector getUnitDimension() const + { + /* L, M, T, I, theta, N, J + * + * ChargeDensity is in Coulomb / cubic meter: Q / m^3 = A * s / m^3 + * -> L^-3 * T * I + */ + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::length) = -3.0; + unitDimension.at(SIBaseUnits::time) = 1.0; + unitDimension.at(SIBaseUnits::electricCurrent) = 1.0; + + return unitDimension; + } + + HINLINE static std::string getName() + { + return "chargeDensity"; + } + + /** Calculate a new attribute per particle + * + * Returns a new (on-the-fly calculated) attribute of a particle + * that can then be mapped to the cells the particle contributes to. + * This method is called on a per-thread basis (each thread of a block + * handles a particle of a frame). + * + * \tparam T_Particle particle in the frame + * \param particle particle in the frame + * + * \return new attribute for the particle (type \see T_AttributeType) + */ + template + DINLINE float_X operator()(T_Particle& particle) const; + }; + } // namespace derivedAttributes + } // namespace particleToGrid + + namespace traits { - return "chargeDensity"; - } - - /** Calculate a new attribute per particle - * - * Returns a new (on-the-fly calculated) attribute of a particle - * that can then be mapped to the cells the particle contributes to. - * This method is called on a per-thread basis (each thread of a block - * handles a particle of a frame). - * - * \tparam T_Particle particle in the frame - * \param particle particle in the frame - * - * \return new attribute for the particle (type \see T_AttributeType) - */ - template< class T_Particle > - DINLINE float_X - operator()( T_Particle& particle ) const; - }; -} // namespace derivedAttributes -} // namespace particleToGrid - -namespace traits -{ - template< typename T_Species > - struct SpeciesEligibleForSolver< - T_Species, - particleToGrid::derivedAttributes::ChargeDensity - > - { - using FrameType = typename T_Species::FrameType; - - using RequiredIdentifiers = MakeSeq_t< - weighting, - position<> - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - using SpeciesHasFlags = typename pmacc::traits::HasFlag< - FrameType, - chargeRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasFlags - >; - }; -} // namespace traits -} // namespace particles + template + struct SpeciesEligibleForSolver + { + using FrameType = typename T_Species::FrameType; + + using RequiredIdentifiers = MakeSeq_t>; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + using SpeciesHasFlags = typename pmacc::traits::HasFlag>::type; + + using type = typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/ChargeDensity.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/ChargeDensity.hpp index 63c6db51f6..e91f506c0c 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/ChargeDensity.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/ChargeDensity.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -26,35 +26,32 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - - HDINLINE float1_64 - ChargeDensity::getUnit() const - { - const float_64 UNIT_VOLUME = (UNIT_LENGTH * UNIT_LENGTH * UNIT_LENGTH); - return UNIT_CHARGE / UNIT_VOLUME; - } - - template< class T_Particle > - DINLINE float_X - ChargeDensity::operator()( T_Particle& particle ) const + namespace particles { - /* read existing attributes */ - const float_X weighting = particle[weighting_]; - const float_X charge = attribute::getCharge( weighting, particle ); - - /* calculate new attribute */ - const float_X particleChargeDensity = charge / CELL_VOLUME; - - /* return attribute */ - return particleChargeDensity; - } -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + namespace particleToGrid + { + namespace derivedAttributes + { + HDINLINE float1_64 ChargeDensity::getUnit() const + { + const float_64 UNIT_VOLUME = (UNIT_LENGTH * UNIT_LENGTH * UNIT_LENGTH); + return UNIT_CHARGE / UNIT_VOLUME; + } + + template + DINLINE float_X ChargeDensity::operator()(T_Particle& particle) const + { + /* read existing attributes */ + const float_X weighting = particle[weighting_]; + const float_X charge = attribute::getCharge(weighting, particle); + + /* calculate new attribute */ + const float_X particleChargeDensity = charge / CELL_VOLUME; + + /* return attribute */ + return particleChargeDensity; + } + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/Counter.def b/include/picongpu/particles/particleToGrid/derivedAttributes/Counter.def index 1d3f8e78ac..7130f8213f 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/Counter.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/Counter.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -29,88 +29,74 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - /** Real-Particle Counter Operation for Particle to Grid Projections - * - * Derives a scalar field with real particle numbers per cell from a particle - * species at runtime. - * Each macro particle's weighting is assigned straight to the cell it belongs - * to, which is in most cases a floor operation in space (and not necessarily - * the "nearest" cell-origin). - * - * @note Use this only for debug purposes, since the deposition "shape" is - * non-physical (inconsistent with charge & momentum-conserving shapes). - * Be aware that this is NOT the same as NGP (0. order shape) assignment - * in a staggered grid. - */ - struct Counter + namespace particles { - - HDINLINE float1_64 - getUnit() const; - - HINLINE std::vector - getUnitDimension() const + namespace particleToGrid { - /* L, M, T, I, theta, N, J - * - * Counter is unitless - */ - std::vector unitDimension( 7, 0.0 ); + namespace derivedAttributes + { + /** Real-Particle Counter Operation for Particle to Grid Projections + * + * Derives a scalar field with real particle numbers per cell from a particle + * species at runtime. + * Each macro particle's weighting is assigned straight to the cell it belongs + * to, which is in most cases a floor operation in space (and not necessarily + * the "nearest" cell-origin). + * + * @note Use this only for debug purposes, since the deposition "shape" is + * non-physical (inconsistent with charge & momentum-conserving shapes). + * Be aware that this is NOT the same as NGP (0. order shape) assignment + * in a staggered grid. + */ + struct Counter + { + HDINLINE float1_64 getUnit() const; - return unitDimension; - } + HINLINE std::vector getUnitDimension() const + { + /* L, M, T, I, theta, N, J + * + * Counter is unitless + */ + std::vector unitDimension(7, 0.0); - HINLINE static - std::string - getName() - { - return "particleCounter"; - } + return unitDimension; + } - /** Calculate a new attribute per particle - * - * Returns a new (on-the-fly calculated) attribute of a particle - * that can then be mapped to the cells the particle contributes to. - * This method is called on a per-thread basis (each thread of a block - * handles a particle of a frame). - * - * \tparam T_Particle particle in the frame - * \param particle particle in the frame - * - * \return new attribute for the particle (type \see T_AttributeType) - */ - template< class T_Particle > - DINLINE float_X - operator()( T_Particle& particle ) const; - }; -} // namespace derivedAttributes -} // namespace particleToGrid + HINLINE static std::string getName() + { + return "particleCounter"; + } -namespace traits -{ - template< typename T_Species > - struct SpeciesEligibleForSolver< - T_Species, - particleToGrid::derivedAttributes::Counter - > - { - using FrameType = typename T_Species::FrameType; + /** Calculate a new attribute per particle + * + * Returns a new (on-the-fly calculated) attribute of a particle + * that can then be mapped to the cells the particle contributes to. + * This method is called on a per-thread basis (each thread of a block + * handles a particle of a frame). + * + * \tparam T_Particle particle in the frame + * \param particle particle in the frame + * + * \return new attribute for the particle (type \see T_AttributeType) + */ + template + DINLINE float_X operator()(T_Particle& particle) const; + }; + } // namespace derivedAttributes + } // namespace particleToGrid + + namespace traits + { + template + struct SpeciesEligibleForSolver + { + using FrameType = typename T_Species::FrameType; - using RequiredIdentifiers = MakeSeq_t< - weighting - >; + using RequiredIdentifiers = MakeSeq_t; - using type = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - }; -} // namespace traits -} // namespace particles + using type = typename pmacc::traits::HasIdentifiers::type; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/Counter.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/Counter.hpp index 453511d980..8a0949aad8 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/Counter.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/Counter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -26,34 +26,30 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - - HDINLINE float1_64 - Counter::getUnit() const - { - return particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE; - } - - template< class T_Particle > - DINLINE float_X - Counter::operator()( T_Particle& particle ) const + namespace particles { - /* read existing attributes */ - const float_X weighting = particle[weighting_]; - - /* calculate new attribute */ - const float_X particleCounter = weighting / - particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE; - - /* return attribute */ - return particleCounter; - } -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + namespace particleToGrid + { + namespace derivedAttributes + { + HDINLINE float1_64 Counter::getUnit() const + { + return particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE; + } + + template + DINLINE float_X Counter::operator()(T_Particle& particle) const + { + /* read existing attributes */ + const float_X weighting = particle[weighting_]; + + /* calculate new attribute */ + const float_X particleCounter = weighting / particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE; + + /* return attribute */ + return particleCounter; + } + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/Density.def b/include/picongpu/particles/particleToGrid/derivedAttributes/Density.def index f2682d0b0a..e040cb9d05 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/Density.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/Density.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -30,83 +30,68 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - /** Density Operation for Particle to Grid Projections - * - * Derives a scalar density field from a particle species at runtime. - * Each value is mapped per cell according to the species' spatial shape. - */ - struct Density + namespace particles { - - HDINLINE float1_64 - getUnit() const; - - HINLINE std::vector - getUnitDimension() const + namespace particleToGrid { - /* L, M, T, I, theta, N, J - * - * Density is in inverse cubic meter: m^-3 - * -> L^-3 - */ - std::vector unitDimension( 7, 0.0 ); - unitDimension.at(SIBaseUnits::length) = -3.0; + namespace derivedAttributes + { + /** Density Operation for Particle to Grid Projections + * + * Derives a scalar density field from a particle species at runtime. + * Each value is mapped per cell according to the species' spatial shape. + */ + struct Density + { + HDINLINE float1_64 getUnit() const; - return unitDimension; - } + HINLINE std::vector getUnitDimension() const + { + /* L, M, T, I, theta, N, J + * + * Density is in inverse cubic meter: m^-3 + * -> L^-3 + */ + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::length) = -3.0; - HINLINE static - std::string - getName() - { - return "density"; - } + return unitDimension; + } - /** Calculate a new attribute per particle - * - * Returns a new (on-the-fly calculated) attribute of a particle - * that can then be mapped to the cells the particle contributes to. - * This method is called on a per-thread basis (each thread of a block - * handles a particle of a frame). - * - * \tparam T_Particle particle in the frame - * \param particle particle in the frame - * - * \return new attribute for the particle (type \see T_AttributeType) - */ - template< class T_Particle > - DINLINE float_X - operator()( T_Particle& particle ) const; - }; -} // namespace derivedAttributes -} // namespace particleToGrid + HINLINE static std::string getName() + { + return "density"; + } -namespace traits -{ - template< typename T_Species > - struct SpeciesEligibleForSolver< - T_Species, - particleToGrid::derivedAttributes::Density - > - { - using FrameType = typename T_Species::FrameType; + /** Calculate a new attribute per particle + * + * Returns a new (on-the-fly calculated) attribute of a particle + * that can then be mapped to the cells the particle contributes to. + * This method is called on a per-thread basis (each thread of a block + * handles a particle of a frame). + * + * \tparam T_Particle particle in the frame + * \param particle particle in the frame + * + * \return new attribute for the particle (type \see T_AttributeType) + */ + template + DINLINE float_X operator()(T_Particle& particle) const; + }; + } // namespace derivedAttributes + } // namespace particleToGrid + + namespace traits + { + template + struct SpeciesEligibleForSolver + { + using FrameType = typename T_Species::FrameType; - using RequiredIdentifiers = MakeSeq_t< - weighting, - position<> - >; + using RequiredIdentifiers = MakeSeq_t>; - using type = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - }; -} // namespace traits -} // namespace particles + using type = typename pmacc::traits::HasIdentifiers::type; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/Density.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/Density.hpp index 4cc88fa7d6..b8501fc6ef 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/Density.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/Density.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -26,35 +26,32 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - - HDINLINE float1_64 - Density::getUnit() const - { - const float_64 UNIT_VOLUME = (UNIT_LENGTH * UNIT_LENGTH * UNIT_LENGTH); - return particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE / UNIT_VOLUME; - } - - template< class T_Particle > - DINLINE float_X - Density::operator()( T_Particle& particle ) const + namespace particles { - /* read existing attributes */ - const float_X weighting = particle[weighting_]; - - /* calculate new attribute */ - const float_X particleDensity = weighting / - ( particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE * CELL_VOLUME ); - - /* return attribute */ - return particleDensity; - } -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + namespace particleToGrid + { + namespace derivedAttributes + { + HDINLINE float1_64 Density::getUnit() const + { + const float_64 UNIT_VOLUME = (UNIT_LENGTH * UNIT_LENGTH * UNIT_LENGTH); + return particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE / UNIT_VOLUME; + } + + template + DINLINE float_X Density::operator()(T_Particle& particle) const + { + /* read existing attributes */ + const float_X weighting = particle[weighting_]; + + /* calculate new attribute */ + const float_X particleDensity + = weighting / (particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE * CELL_VOLUME); + + /* return attribute */ + return particleDensity; + } + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/DerivedAttributes.def b/include/picongpu/particles/particleToGrid/derivedAttributes/DerivedAttributes.def index ab25f4267c..d64d0fa48e 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/DerivedAttributes.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/DerivedAttributes.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl, Richard Pausch +/* Copyright 2015-2021 Axel Huebl, Richard Pausch * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/DerivedAttributes.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/DerivedAttributes.hpp index cd396fdc1b..a1a605d8e0 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/DerivedAttributes.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/DerivedAttributes.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl, Richard Pausch +/* Copyright 2015-2021 Axel Huebl, Richard Pausch * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/Energy.def b/include/picongpu/particles/particleToGrid/derivedAttributes/Energy.def index 42128a0f6e..eb1565e6f1 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/Energy.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/Energy.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -33,100 +33,79 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - /** Kinetic Energy Operation for Particle to Grid Projections - * - * Derives a scalar field for summed kinetic particle energy from a particle - * species at runtime. - * Each value is mapped per cell according to the species' spatial shape. - * - * @note this is the same as @see CreateEnergyDensityOperation times the cell - * volume - */ - struct Energy + namespace particles { - - HDINLINE float1_64 - getUnit() const; - - HINLINE std::vector - getUnitDimension() const + namespace particleToGrid { - /* L, M, T, I, theta, N, J - * - * Energy is in Joule: J = kg * m^2 / s^2 - * -> L^2 * M * T^-2 - */ - std::vector unitDimension( 7, 0.0 ); - unitDimension.at(SIBaseUnits::length) = 2.0; - unitDimension.at(SIBaseUnits::mass) = 1.0; - unitDimension.at(SIBaseUnits::time) = -2.0; - - return unitDimension; - } - - HINLINE static - std::string - getName() + namespace derivedAttributes + { + /** Kinetic Energy Operation for Particle to Grid Projections + * + * Derives a scalar field for summed kinetic particle energy from a particle + * species at runtime. + * Each value is mapped per cell according to the species' spatial shape. + * + * @note this is the same as @see CreateEnergyDensityOperation times the cell + * volume + */ + struct Energy + { + HDINLINE float1_64 getUnit() const; + + HINLINE std::vector getUnitDimension() const + { + /* L, M, T, I, theta, N, J + * + * Energy is in Joule: J = kg * m^2 / s^2 + * -> L^2 * M * T^-2 + */ + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::length) = 2.0; + unitDimension.at(SIBaseUnits::mass) = 1.0; + unitDimension.at(SIBaseUnits::time) = -2.0; + + return unitDimension; + } + + HINLINE static std::string getName() + { + return "particleEnergy"; + } + + /** Calculate a new attribute per particle + * + * Returns a new (on-the-fly calculated) attribute of a particle + * that can then be mapped to the cells the particle contributes to. + * This method is called on a per-thread basis (each thread of a block + * handles a particle of a frame). + * + * \tparam T_Particle particle in the frame + * \param particle particle in the frame + * + * \return new attribute for the particle (type \see T_AttributeType) + */ + template + DINLINE float_X operator()(T_Particle& particle) const; + }; + } // namespace derivedAttributes + } // namespace particleToGrid + + namespace traits { - return "particleEnergy"; - } - - /** Calculate a new attribute per particle - * - * Returns a new (on-the-fly calculated) attribute of a particle - * that can then be mapped to the cells the particle contributes to. - * This method is called on a per-thread basis (each thread of a block - * handles a particle of a frame). - * - * \tparam T_Particle particle in the frame - * \param particle particle in the frame - * - * \return new attribute for the particle (type \see T_AttributeType) - */ - template< class T_Particle > - DINLINE float_X - operator()( T_Particle& particle ) const; - }; -} // namespace derivedAttributes -} // namespace particleToGrid - -namespace traits -{ - template< typename T_Species > - struct SpeciesEligibleForSolver< - T_Species, - particleToGrid::derivedAttributes::Energy - > - { - using FrameType = typename T_Species::FrameType; - - using RequiredIdentifiers = MakeSeq_t< - weighting, - position<>, - momentum - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - using SpeciesHasFlags = typename pmacc::traits::HasFlag< - FrameType, - massRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasFlags - >; - }; -} // namespace traits -} // namespace particles + template + struct SpeciesEligibleForSolver + { + using FrameType = typename T_Species::FrameType; + + using RequiredIdentifiers = MakeSeq_t, momentum>; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + using SpeciesHasFlags = typename pmacc::traits::HasFlag>::type; + + using type = typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/Energy.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/Energy.hpp index ae36c2d87c..fbad56efd8 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/Energy.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/Energy.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -27,31 +27,28 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - - HDINLINE float1_64 - Energy::getUnit() const + namespace particles { - return UNIT_ENERGY; - } - - template< class T_Particle > - DINLINE float_X - Energy::operator()( T_Particle& particle ) const - { - /* read existing attributes */ - const float_X weighting = particle[weighting_]; - const float3_X mom = particle[momentum_]; - const float_X mass = attribute::getMass( weighting, particle ); - - return KinEnergy<>()( mom, mass ); - } -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + namespace particleToGrid + { + namespace derivedAttributes + { + HDINLINE float1_64 Energy::getUnit() const + { + return UNIT_ENERGY; + } + + template + DINLINE float_X Energy::operator()(T_Particle& particle) const + { + /* read existing attributes */ + const float_X weighting = particle[weighting_]; + const float3_X mom = particle[momentum_]; + const float_X mass = attribute::getMass(weighting, particle); + + return KinEnergy<>()(mom, mass); + } + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensity.def b/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensity.def index 0a92d1a156..098048b0f6 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensity.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensity.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -33,102 +33,81 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - /** Energy Density Operation for Particle to Grid Projections - * - * Derives a scalar field for average kinetic particle energy per cell times the - * particle density from a particle species at runtime. - * Each value is mapped per cell according to the species' spatial shape. - * - * @note this is the same as the sum of kinetic particle energy - * divided by a constant for the cell volume - * @see CreateEnergyOperation - */ - struct EnergyDensity + namespace particles { - - HDINLINE float1_64 - getUnit() const; - - HINLINE std::vector - getUnitDimension() const + namespace particleToGrid { - /* L, M, T, I, theta, N, J - * - * EnergyDensity is in Joule / cubic meter: J / m^3 = kg * m^2 / s^2 / m^3 - * = kg / (s^2 * m) - * -> L^-1 * M * T^-2 - */ - std::vector unitDimension( 7, 0.0 ); - unitDimension.at(SIBaseUnits::length) = -1.0; - unitDimension.at(SIBaseUnits::mass) = 1.0; - unitDimension.at(SIBaseUnits::time) = -2.0; - - return unitDimension; - } - - HINLINE static - std::string - getName() + namespace derivedAttributes + { + /** Energy Density Operation for Particle to Grid Projections + * + * Derives a scalar field for average kinetic particle energy per cell times the + * particle density from a particle species at runtime. + * Each value is mapped per cell according to the species' spatial shape. + * + * @note this is the same as the sum of kinetic particle energy + * divided by a constant for the cell volume + * @see CreateEnergyOperation + */ + struct EnergyDensity + { + HDINLINE float1_64 getUnit() const; + + HINLINE std::vector getUnitDimension() const + { + /* L, M, T, I, theta, N, J + * + * EnergyDensity is in Joule / cubic meter: J / m^3 = kg * m^2 / s^2 / m^3 + * = kg / (s^2 * m) + * -> L^-1 * M * T^-2 + */ + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::length) = -1.0; + unitDimension.at(SIBaseUnits::mass) = 1.0; + unitDimension.at(SIBaseUnits::time) = -2.0; + + return unitDimension; + } + + HINLINE static std::string getName() + { + return "energyDensity"; + } + + /** Calculate a new attribute per particle + * + * Returns a new (on-the-fly calculated) attribute of a particle + * that can then be mapped to the cells the particle contributes to. + * This method is called on a per-thread basis (each thread of a block + * handles a particle of a frame). + * + * \tparam T_Particle particle in the frame + * \param particle particle in the frame + * + * \return new attribute for the particle (type \see T_AttributeType) + */ + template + DINLINE float_X operator()(T_Particle& particle) const; + }; + } // namespace derivedAttributes + } // namespace particleToGrid + + namespace traits { - return "energyDensity"; - } - - /** Calculate a new attribute per particle - * - * Returns a new (on-the-fly calculated) attribute of a particle - * that can then be mapped to the cells the particle contributes to. - * This method is called on a per-thread basis (each thread of a block - * handles a particle of a frame). - * - * \tparam T_Particle particle in the frame - * \param particle particle in the frame - * - * \return new attribute for the particle (type \see T_AttributeType) - */ - template< class T_Particle > - DINLINE float_X - operator()( T_Particle& particle ) const; - }; -} // namespace derivedAttributes -} // namespace particleToGrid - -namespace traits -{ - template< typename T_Species > - struct SpeciesEligibleForSolver< - T_Species, - particleToGrid::derivedAttributes::EnergyDensity - > - { - using FrameType = typename T_Species::FrameType; - - using RequiredIdentifiers = MakeSeq_t< - weighting, - position<>, - momentum - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - using SpeciesHasFlags = typename pmacc::traits::HasFlag< - FrameType, - massRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasFlags - >; - }; -} // namespace traits -} // namespace particles + template + struct SpeciesEligibleForSolver + { + using FrameType = typename T_Species::FrameType; + + using RequiredIdentifiers = MakeSeq_t, momentum>; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + using SpeciesHasFlags = typename pmacc::traits::HasFlag>::type; + + using type = typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensity.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensity.hpp index d0b336f569..456d029de8 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensity.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensity.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Heiko Burau +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Heiko Burau * * This file is part of PIConGPU. * @@ -27,34 +27,31 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - - HDINLINE float1_64 - EnergyDensity::getUnit() const - { - constexpr float_64 UNIT_VOLUME = (UNIT_LENGTH * UNIT_LENGTH * UNIT_LENGTH); - return UNIT_ENERGY / UNIT_VOLUME; - } - - template< class T_Particle > - DINLINE float_X - EnergyDensity::operator()( T_Particle& particle ) const + namespace particles { - /* read existing attributes */ - const float_X weighting = particle[weighting_]; - const float3_X mom = particle[momentum_]; - const float_X mass = attribute::getMass( weighting, particle ); - - constexpr float_X INV_CELL_VOLUME = float_X(1.0) / CELL_VOLUME; - - return KinEnergy<>()( mom, mass ) * INV_CELL_VOLUME; - } -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + namespace particleToGrid + { + namespace derivedAttributes + { + HDINLINE float1_64 EnergyDensity::getUnit() const + { + constexpr float_64 UNIT_VOLUME = (UNIT_LENGTH * UNIT_LENGTH * UNIT_LENGTH); + return UNIT_ENERGY / UNIT_VOLUME; + } + + template + DINLINE float_X EnergyDensity::operator()(T_Particle& particle) const + { + /* read existing attributes */ + const float_X weighting = particle[weighting_]; + const float3_X mom = particle[momentum_]; + const float_X mass = attribute::getMass(weighting, particle); + + constexpr float_X INV_CELL_VOLUME = float_X(1.0) / CELL_VOLUME; + + return KinEnergy<>()(mom, mass) * INV_CELL_VOLUME; + } + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensityCutoff.def b/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensityCutoff.def index 8c9df4c6a6..f97240c191 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensityCutoff.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensityCutoff.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Marco Garten +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Marco Garten * * This file is part of PIConGPU. * @@ -33,70 +33,60 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - /** Energy Density Operation with Maxmimum Energy Cut-Off for Particle to Grid Projections - * - * Derives a scalar field for average kinetic particle energy per cell times the - * particle density from a particle species at runtime. - * Each value is mapped per cell according to the species' spatial shape. - * - * @note Only energies below a user-definable cut-off energy are taken for - * calculation! - * - * @tparam T_ParamClass parameter class containing the maximum energy cutoff - * - * @note T_ParamClass requires the member `constexpr float_X cutoffMaxEnergy`. - */ - template< typename T_ParamClass > - struct EnergyDensityCutoff : public EnergyDensity + namespace particles { - - HINLINE static - std::string - getName() + namespace particleToGrid { - return "energyDensityCutoff"; - } + namespace derivedAttributes + { + /** Energy Density Operation with Maxmimum Energy Cut-Off for Particle to Grid Projections + * + * Derives a scalar field for average kinetic particle energy per cell times the + * particle density from a particle species at runtime. + * Each value is mapped per cell according to the species' spatial shape. + * + * @note Only energies below a user-definable cut-off energy are taken for + * calculation! + * + * @tparam T_ParamClass parameter class containing the maximum energy cutoff + * + * @note T_ParamClass requires the member `constexpr float_X cutoffMaxEnergy`. + */ + template + struct EnergyDensityCutoff : public EnergyDensity + { + HINLINE static std::string getName() + { + return "energyDensityCutoff"; + } - /** Calculate a new attribute per particle - * - * Returns a new (on-the-fly calculated) attribute of a particle - * that can then be mapped to the cells the particle contributes to. - * This method is called on a per-thread basis (each thread of a block - * handles a particle of a frame). - * - * @tparam T_Particle particle in the frame - * @param particle particle in the frame - * - * @return new attribute for the particle (type @see T_AttributeType) - */ - template< class T_Particle > - DINLINE float_X - operator()( T_Particle& particle ) const; - }; -} // namespace derivedAttributes -} // namespace particleToGrid + /** Calculate a new attribute per particle + * + * Returns a new (on-the-fly calculated) attribute of a particle + * that can then be mapped to the cells the particle contributes to. + * This method is called on a per-thread basis (each thread of a block + * handles a particle of a frame). + * + * @tparam T_Particle particle in the frame + * @param particle particle in the frame + * + * @return new attribute for the particle (type @see T_AttributeType) + */ + template + DINLINE float_X operator()(T_Particle& particle) const; + }; + } // namespace derivedAttributes + } // namespace particleToGrid -namespace traits -{ - template< - typename T_Species, - typename T_ParamClass - > - struct SpeciesEligibleForSolver< - T_Species, - particleToGrid::derivedAttributes::EnergyDensityCutoff< T_ParamClass > - > : public SpeciesEligibleForSolver< - T_Species, - particleToGrid::derivedAttributes::EnergyDensity - > - { - }; -} // namespace traits -} // namespace particles + namespace traits + { + template + struct SpeciesEligibleForSolver< + T_Species, + particleToGrid::derivedAttributes::EnergyDensityCutoff> + : public SpeciesEligibleForSolver + { + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensityCutoff.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensityCutoff.hpp index b32a0a73d3..080071dfa7 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensityCutoff.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/EnergyDensityCutoff.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Heiko Burau, Marco Garten +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Heiko Burau, Marco Garten * * This file is part of PIConGPU. * @@ -26,46 +26,38 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - - template< class T_ParamClass > - template< class T_Particle > - DINLINE float_X - EnergyDensityCutoff< T_ParamClass >::operator()( T_Particle& particle ) const + namespace particles { - using ParamClass = T_ParamClass; - - /* read existing attributes */ - float_X const weighting = particle[ weighting_ ]; - float3_X const mom = particle[ momentum_ ]; - float_X const mass = attribute::getMass( - weighting, - particle - ); - - constexpr float_X INV_CELL_VOLUME = float_X( 1.0 ) / CELL_VOLUME; - - /* value for energy cut-off */ - float_X const cutoffMaxEnergy = ParamClass::cutoffMaxEnergy; - float_X const cutoff = cutoffMaxEnergy / UNIT_ENERGY * weighting; - - float_X const kinEnergy = KinEnergy< >( )( - mom, - mass - ); - - float_X result( 0. ); - if( kinEnergy < cutoff ) - result = kinEnergy * INV_CELL_VOLUME; - - return result; - } -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + namespace particleToGrid + { + namespace derivedAttributes + { + template + template + DINLINE float_X EnergyDensityCutoff::operator()(T_Particle& particle) const + { + using ParamClass = T_ParamClass; + + /* read existing attributes */ + float_X const weighting = particle[weighting_]; + float3_X const mom = particle[momentum_]; + float_X const mass = attribute::getMass(weighting, particle); + + constexpr float_X INV_CELL_VOLUME = float_X(1.0) / CELL_VOLUME; + + /* value for energy cut-off */ + float_X const cutoffMaxEnergy = ParamClass::cutoffMaxEnergy; + float_X const cutoff = cutoffMaxEnergy / UNIT_ENERGY * weighting; + + float_X const kinEnergy = KinEnergy<>()(mom, mass); + + float_X result(0.); + if(kinEnergy < cutoff) + result = kinEnergy * INV_CELL_VOLUME; + + return result; + } + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/LarmorPower.def b/include/picongpu/particles/particleToGrid/derivedAttributes/LarmorPower.def index a1e7dee591..b7ed695a3e 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/LarmorPower.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/LarmorPower.def @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -33,103 +33,77 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - /** Radiated Larmor Power Operation for Particle to Grid Projections - * - * Derives a scalar field with the radiated power according to the Larmor - * formula from a particle species at runtime. - * Each value is mapped per cell according to the species' spatial shape. - */ - struct LarmorPower + namespace particles { - - HDINLINE float1_64 - getUnit() const; - - HINLINE std::vector - getUnitDimension() const + namespace particleToGrid { - /* L, M, T, I, theta, N, J - * - * LarmorEnergy is in Joule: J = kg * m^2 / s^2 - * -> L^2 * M * T^-2 - */ - std::vector unitDimension( 7, 0.0 ); - unitDimension.at(SIBaseUnits::length) = 2.0; - unitDimension.at(SIBaseUnits::mass) = 1.0; - unitDimension.at(SIBaseUnits::time) = -2.0; - - return unitDimension; - } - - HINLINE static - std::string - getName() + namespace derivedAttributes + { + /** Radiated Larmor Power Operation for Particle to Grid Projections + * + * Derives a scalar field with the radiated power according to the Larmor + * formula from a particle species at runtime. + * Each value is mapped per cell according to the species' spatial shape. + */ + struct LarmorPower + { + HDINLINE float1_64 getUnit() const; + + HINLINE std::vector getUnitDimension() const + { + /* L, M, T, I, theta, N, J + * + * LarmorEnergy is in Joule: J = kg * m^2 / s^2 + * -> L^2 * M * T^-2 + */ + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::length) = 2.0; + unitDimension.at(SIBaseUnits::mass) = 1.0; + unitDimension.at(SIBaseUnits::time) = -2.0; + + return unitDimension; + } + + HINLINE static std::string getName() + { + return "larmorPower"; + } + + /** Calculate a new attribute per particle + * + * Returns a new (on-the-fly calculated) attribute of a particle + * that can then be mapped to the cells the particle contributes to. + * This method is called on a per-thread basis (each thread of a block + * handles a particle of a frame). + * + * \tparam T_Particle particle in the frame + * \param particle particle in the frame + * + * \return new attribute for the particle (type \see T_AttributeType) + */ + template + DINLINE float_X operator()(T_Particle& particle) const; + }; + } // namespace derivedAttributes + } // namespace particleToGrid + + namespace traits { - return "larmorPower"; - } - - /** Calculate a new attribute per particle - * - * Returns a new (on-the-fly calculated) attribute of a particle - * that can then be mapped to the cells the particle contributes to. - * This method is called on a per-thread basis (each thread of a block - * handles a particle of a frame). - * - * \tparam T_Particle particle in the frame - * \param particle particle in the frame - * - * \return new attribute for the particle (type \see T_AttributeType) - */ - template< class T_Particle > - DINLINE float_X - operator()( T_Particle& particle ) const; - }; -} // namespace derivedAttributes -} // namespace particleToGrid - -namespace traits -{ - template< typename T_Species > - struct SpeciesEligibleForSolver< - T_Species, - particleToGrid::derivedAttributes::LarmorPower - > - { - using FrameType = typename T_Species::FrameType; - - using RequiredIdentifiers = MakeSeq_t< - weighting, - position<>, - momentum, - momentumPrev1 - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - using SpeciesHasMass = typename pmacc::traits::HasFlag< - FrameType, - massRatio<> - >::type; - using SpeciesHasCharge = typename pmacc::traits::HasFlag< - FrameType, - chargeRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasMass, - SpeciesHasCharge - >; - }; -} // namespace traits -} // namespace particles + template + struct SpeciesEligibleForSolver + { + using FrameType = typename T_Species::FrameType; + + using RequiredIdentifiers = MakeSeq_t, momentum, momentumPrev1>; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + using SpeciesHasMass = typename pmacc::traits::HasFlag>::type; + using SpeciesHasCharge = typename pmacc::traits::HasFlag>::type; + + using type = typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/LarmorPower.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/LarmorPower.hpp index 93a0a8da33..2c44e91314 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/LarmorPower.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/LarmorPower.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -27,58 +27,53 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - - HDINLINE float1_64 - LarmorPower::getUnit() const - { - return UNIT_ENERGY; - } - - template< class T_Particle > - DINLINE float_X - LarmorPower::operator()( T_Particle& particle ) const + namespace particles { + namespace particleToGrid + { + namespace derivedAttributes + { + HDINLINE float1_64 LarmorPower::getUnit() const + { + return UNIT_ENERGY; + } - constexpr bool hasMomentumPrev1 = pmacc::traits::HasIdentifier< - typename T_Particle::FrameType, - momentumPrev1 - >::type::value; - PMACC_CASSERT_MSG_TYPE( species_must_have_the_attribute_momentumPrev1, T_Particle, hasMomentumPrev1 ); + template + DINLINE float_X LarmorPower::operator()(T_Particle& particle) const + { + constexpr bool hasMomentumPrev1 + = pmacc::traits::HasIdentifier::type::value; + PMACC_CASSERT_MSG_TYPE( + species_must_have_the_attribute_momentumPrev1, + T_Particle, + hasMomentumPrev1); - /* read existing attributes */ - const float3_X mom = particle[momentum_]; - const float3_X mom_mt1 = particle[momentumPrev1_]; - const float_X weighting = particle[weighting_]; - const float_X charge = attribute::getCharge( weighting, particle ); - const float_X mass = attribute::getMass( weighting, particle ); + /* read existing attributes */ + const float3_X mom = particle[momentum_]; + const float3_X mom_mt1 = particle[momentumPrev1_]; + const float_X weighting = particle[weighting_]; + const float_X charge = attribute::getCharge(weighting, particle); + const float_X mass = attribute::getMass(weighting, particle); - /* calculate new attribute */ - Gamma calcGamma; - const typename Gamma::valueType gamma = calcGamma( mom, mass ); - const float_X gamma2 = gamma * gamma; - const float_X c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; + /* calculate new attribute */ + Gamma calcGamma; + const typename Gamma::valueType gamma = calcGamma(mom, mass); + const float_X gamma2 = gamma * gamma; + const float_X c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; - const float3_X mom_dt = (mom - mom_mt1) / float_X(DELTA_T); - const float_X el_factor = charge * charge - / (float_X(6.0) * PI * EPS0 * - c2 * SPEED_OF_LIGHT * mass * mass) * gamma2 * gamma2; - const float_X momentumToBetaConvert = float_X(1.0)/ (mass * SPEED_OF_LIGHT * gamma); - const float_X larmorPower = el_factor - * ( math::abs2(mom_dt) - - momentumToBetaConvert * momentumToBetaConvert - * math::abs2(math::cross(mom, mom_dt)) - ); + const float3_X mom_dt = (mom - mom_mt1) / float_X(DELTA_T); + const float_X el_factor = charge * charge + / (float_X(6.0) * PI * EPS0 * c2 * SPEED_OF_LIGHT * mass * mass) * gamma2 * gamma2; + const float_X momentumToBetaConvert = float_X(1.0) / (mass * SPEED_OF_LIGHT * gamma); + const float_X larmorPower = el_factor + * (pmacc::math::abs2(mom_dt) + - momentumToBetaConvert * momentumToBetaConvert + * pmacc::math::abs2(pmacc::math::cross(mom, mom_dt))); - /* return attribute */ - return larmorPower; - } -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + /* return attribute */ + return larmorPower; + } + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/MacroCounter.def b/include/picongpu/particles/particleToGrid/derivedAttributes/MacroCounter.def index 496ed7cad9..30ee6cd20a 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/MacroCounter.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/MacroCounter.def @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -29,64 +29,58 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - /** Macro-Particle Counter Operation for Particle to Grid Projections - * - * Derives a scalar field with number of macro particles per cell from a particle - * species at runtime. - * Each macro particle is counted straight to the cell it belongs to, which is - * in most cases as floor operation in space (and not necessarily the "nearest" - * cell-origin). - * - * @note Use this only for debug purposes, e.g. to validate particle memory. - */ - struct MacroCounter + namespace particles { - - HDINLINE float1_64 - getUnit() const; - - HINLINE std::vector - getUnitDimension() const + namespace particleToGrid { - /* L, M, T, I, theta, N, J - * - * Counter is unitless - */ - std::vector unitDimension( 7, 0.0 ); + namespace derivedAttributes + { + /** Macro-Particle Counter Operation for Particle to Grid Projections + * + * Derives a scalar field with number of macro particles per cell from a particle + * species at runtime. + * Each macro particle is counted straight to the cell it belongs to, which is + * in most cases as floor operation in space (and not necessarily the "nearest" + * cell-origin). + * + * @note Use this only for debug purposes, e.g. to validate particle memory. + */ + struct MacroCounter + { + HDINLINE float1_64 getUnit() const; - return unitDimension; - } + HINLINE std::vector getUnitDimension() const + { + /* L, M, T, I, theta, N, J + * + * Counter is unitless + */ + std::vector unitDimension(7, 0.0); - HINLINE static - std::string - getName() - { - return "macroParticleCounter"; - } + return unitDimension; + } + + HINLINE static std::string getName() + { + return "macroParticleCounter"; + } - /** Calculate a new attribute per particle - * - * Returns a new (on-the-fly calculated) attribute of a particle - * that can then be mapped to the cells the particle contributes to. - * This method is called on a per-thread basis (each thread of a block - * handles a particle of a frame). - * - * \tparam T_Particle particle in the frame - * \param particle particle in the frame - * - * \return new attribute for the particle (type \see T_AttributeType) - */ - template< class T_Particle > - DINLINE float_X - operator()( T_Particle& particle ) const; - }; -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + /** Calculate a new attribute per particle + * + * Returns a new (on-the-fly calculated) attribute of a particle + * that can then be mapped to the cells the particle contributes to. + * This method is called on a per-thread basis (each thread of a block + * handles a particle of a frame). + * + * \tparam T_Particle particle in the frame + * \param particle particle in the frame + * + * \return new attribute for the particle (type \see T_AttributeType) + */ + template + DINLINE float_X operator()(T_Particle& particle) const; + }; + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/MacroCounter.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/MacroCounter.hpp index 5a03a97c33..53048a1681 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/MacroCounter.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/MacroCounter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -26,27 +26,24 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - - HDINLINE float1_64 - MacroCounter::getUnit() const + namespace particles { - return 1.0; - } + namespace particleToGrid + { + namespace derivedAttributes + { + HDINLINE float1_64 MacroCounter::getUnit() const + { + return 1.0; + } - template< class T_Particle > - DINLINE float_X - MacroCounter::operator()( T_Particle& particle ) const - { - /* return attribute */ - return 1.0; - } -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + template + DINLINE float_X MacroCounter::operator()(T_Particle& particle) const + { + /* return attribute */ + return 1.0; + } + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/MidCurrentDensityComponent.def b/include/picongpu/particles/particleToGrid/derivedAttributes/MidCurrentDensityComponent.def index 2b5c0d450d..0f3bc83747 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/MidCurrentDensityComponent.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/MidCurrentDensityComponent.def @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Axel Huebl +/* Copyright 2016-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -35,117 +35,96 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - /** Current Density Operation for Particle to Grid Projections - * - * Calculate the on-charge current density in a selected direction. - * Derives a scalar field with density * charge * velocity_component from a - * particle species at runtime. - * Each value is mapped per cell according to the species' spatial shape. - * - * @note Mainly useful for debug purposes, e.g. when implementing a new current - * solver. - * - * @tparam T_direction perpendicular direction x=0, y=1, z=2 - */ - template< size_t T_direction > - struct MidCurrentDensityComponent + namespace particles { - PMACC_CASSERT_MSG( Valid_directions_are_0_to_2_for_X_to_Z__in_fileOutput_param, ((T_direction)>=0) ); - PMACC_CASSERT_MSG( Valid_directions_are_0_to_2_for_X_to_Z__in_fileOutput_param, ((T_direction)<3) ); - - HDINLINE float1_64 - getUnit() const; - - HINLINE std::vector - getUnitDimension() const + namespace particleToGrid { - /* L, M, T, I, theta, N, J - * - * MidCurrentDensity is in Ampere / square meters: A / m^2 - * charge density: Coulomb / m^3 - * velocity: m / s - * current density = charge density * velocity - * -> L^-2 * I - */ - std::vector unitDimension( 7, 0.0 ); - unitDimension.at(SIBaseUnits::length) = -2.0; - unitDimension.at(SIBaseUnits::electricCurrent) = 1.0; - - return unitDimension; - } - - HINLINE static - std::string - getName() + namespace derivedAttributes + { + /** Current Density Operation for Particle to Grid Projections + * + * Calculate the on-charge current density in a selected direction. + * Derives a scalar field with density * charge * velocity_component from a + * particle species at runtime. + * Each value is mapped per cell according to the species' spatial shape. + * + * @note Mainly useful for debug purposes, e.g. when implementing a new current + * solver. + * + * @tparam T_direction perpendicular direction x=0, y=1, z=2 + */ + template + struct MidCurrentDensityComponent + { + PMACC_CASSERT_MSG( + Valid_directions_are_0_to_2_for_X_to_Z__in_fileOutput_param, + ((T_direction) >= 0)); + PMACC_CASSERT_MSG( + Valid_directions_are_0_to_2_for_X_to_Z__in_fileOutput_param, + ((T_direction) < 3)); + + HDINLINE float1_64 getUnit() const; + + HINLINE std::vector getUnitDimension() const + { + /* L, M, T, I, theta, N, J + * + * MidCurrentDensity is in Ampere / square meters: A / m^2 + * charge density: Coulomb / m^3 + * velocity: m / s + * current density = charge density * velocity + * -> L^-2 * I + */ + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::length) = -2.0; + unitDimension.at(SIBaseUnits::electricCurrent) = 1.0; + + return unitDimension; + } + + HINLINE static std::string getName() + { + auto const componentNames = plugins::misc::getComponentNames(3); + return "midCurrentDensity/" + componentNames[T_direction]; + } + + /** Calculate a new attribute per particle + * + * Returns a new (on-the-fly calculated) attribute of a particle + * that can then be mapped to the cells the particle contributes to. + * This method is called on a per-thread basis (each thread of a block + * handles a particle of a frame). + * + * \tparam T_Particle particle in the frame + * \param particle particle in the frame + * + * \return new attribute for the particle (type \see T_AttributeType) + */ + template + DINLINE float_X operator()(T_Particle& particle) const; + }; + } // namespace derivedAttributes + } // namespace particleToGrid + + namespace traits { - auto const componentNames = plugins::misc::getComponentNames( 3 ); - return "midCurrentDensity/" + componentNames[T_direction]; - } - - /** Calculate a new attribute per particle - * - * Returns a new (on-the-fly calculated) attribute of a particle - * that can then be mapped to the cells the particle contributes to. - * This method is called on a per-thread basis (each thread of a block - * handles a particle of a frame). - * - * \tparam T_Particle particle in the frame - * \param particle particle in the frame - * - * \return new attribute for the particle (type \see T_AttributeType) - */ - template< class T_Particle > - DINLINE float_X - operator()( T_Particle& particle ) const; - }; -} // namespace derivedAttributes -} // namespace particleToGrid - -namespace traits -{ - template< - typename T_Species, - size_t T_direction - > - struct SpeciesEligibleForSolver< - T_Species, - particleToGrid::derivedAttributes::MidCurrentDensityComponent< T_direction > - > - { - using FrameType = typename T_Species::FrameType; - - using RequiredIdentifiers = MakeSeq_t< - weighting, - position<>, - momentum - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - using SpeciesHasMass = typename pmacc::traits::HasFlag< - FrameType, - massRatio<> - >::type; - using SpeciesHasCharge = typename pmacc::traits::HasFlag< - FrameType, - chargeRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasMass, - SpeciesHasCharge - >; - }; -} // namespace traits -} // namespace particles + template + struct SpeciesEligibleForSolver< + T_Species, + particleToGrid::derivedAttributes::MidCurrentDensityComponent> + { + using FrameType = typename T_Species::FrameType; + + using RequiredIdentifiers = MakeSeq_t, momentum>; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + using SpeciesHasMass = typename pmacc::traits::HasFlag>::type; + using SpeciesHasCharge = typename pmacc::traits::HasFlag>::type; + + using type = typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/MidCurrentDensityComponent.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/MidCurrentDensityComponent.hpp index b964000bde..dfd8166a67 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/MidCurrentDensityComponent.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/MidCurrentDensityComponent.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Axel Huebl +/* Copyright 2016-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -26,46 +26,42 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - - template< size_t T_direction> - HDINLINE float1_64 - MidCurrentDensityComponent::getUnit() const + namespace particles { - const float_64 UNIT_AREA = UNIT_LENGTH * UNIT_LENGTH; - return UNIT_CHARGE / ( UNIT_TIME * UNIT_AREA ); - } + namespace particleToGrid + { + namespace derivedAttributes + { + template + HDINLINE float1_64 MidCurrentDensityComponent::getUnit() const + { + const float_64 UNIT_AREA = UNIT_LENGTH * UNIT_LENGTH; + return UNIT_CHARGE / (UNIT_TIME * UNIT_AREA); + } - template< size_t T_direction> - template< class T_Particle > - DINLINE float_X - MidCurrentDensityComponent::operator()( T_Particle& particle ) const - { - /* read existing attributes */ - const float_X weighting = particle[weighting_]; - const float_X charge = attribute::getCharge( weighting, particle ); - const float3_X mom = particle[momentum_]; - const float_X momCom = mom[T_direction]; - const float_X mass = attribute::getMass( weighting, particle ); + template + template + DINLINE float_X MidCurrentDensityComponent::operator()(T_Particle& particle) const + { + /* read existing attributes */ + const float_X weighting = particle[weighting_]; + const float_X charge = attribute::getCharge(weighting, particle); + const float3_X mom = particle[momentum_]; + const float_X momCom = mom[T_direction]; + const float_X mass = attribute::getMass(weighting, particle); - /* calculate new attribute */ - Gamma calcGamma; - const typename Gamma::valueType gamma = calcGamma( mom, mass ); + /* calculate new attribute */ + Gamma calcGamma; + const typename Gamma::valueType gamma = calcGamma(mom, mass); - /* calculate new attribute */ - const float_X particleCurrentDensity = - charge / CELL_VOLUME * /* rho */ - momCom / ( gamma * mass ); /* v_component */ + /* calculate new attribute */ + const float_X particleCurrentDensity = charge / CELL_VOLUME * /* rho */ + momCom / (gamma * mass); /* v_component */ - /* return attribute */ - return particleCurrentDensity; - } -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + /* return attribute */ + return particleCurrentDensity; + } + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/MomentumComponent.def b/include/picongpu/particles/particleToGrid/derivedAttributes/MomentumComponent.def index 7bfd2d06d0..f9d640eb2c 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/MomentumComponent.def +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/MomentumComponent.def @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Axel Huebl +/* Copyright 2016-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -31,93 +31,82 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - /** Momentum Ratio Operation for Particle to Grid Projections - * - * Calculate the ratio of momentum in a selected direction to total momentum. - * Derives a scalar field with ratio between a selected momentum component and - * total momentum from a particle species at runtime. Results are in the range - * [ -1. : 1. ], resting particles are set to 0. - * Each value is mapped per cell according to the species' spatial shape. - * - * @param T_direction perpendicular direction x=0, y=1, z=2 - */ - template< size_t T_direction > - struct MomentumComponent + namespace particles { - PMACC_CASSERT_MSG( Valid_directions_are_0_to_2_for_X_to_Z__in_fileOutput_param, ((T_direction)>=0) ); - PMACC_CASSERT_MSG( Valid_directions_are_0_to_2_for_X_to_Z__in_fileOutput_param, ((T_direction)<3) ); + namespace particleToGrid + { + namespace derivedAttributes + { + /** Momentum Ratio Operation for Particle to Grid Projections + * + * Calculate the ratio of momentum in a selected direction to total momentum. + * Derives a scalar field with ratio between a selected momentum component and + * total momentum from a particle species at runtime. Results are in the range + * [ -1. : 1. ], resting particles are set to 0. + * Each value is mapped per cell according to the species' spatial shape. + * + * @param T_direction perpendicular direction x=0, y=1, z=2 + */ + template + struct MomentumComponent + { + PMACC_CASSERT_MSG( + Valid_directions_are_0_to_2_for_X_to_Z__in_fileOutput_param, + ((T_direction) >= 0)); + PMACC_CASSERT_MSG( + Valid_directions_are_0_to_2_for_X_to_Z__in_fileOutput_param, + ((T_direction) < 3)); - HDINLINE float1_64 - getUnit() const; + HDINLINE float1_64 getUnit() const; - HINLINE std::vector - getUnitDimension() const - { - /* L, M, T, I, theta, N, J - * - * The ratio between momentum in a certain direction direction to - * total momentum is unitless. - */ - std::vector unitDimension( 7, 0.0 ); + HINLINE std::vector getUnitDimension() const + { + /* L, M, T, I, theta, N, J + * + * The ratio between momentum in a certain direction direction to + * total momentum is unitless. + */ + std::vector unitDimension(7, 0.0); - return unitDimension; - } + return unitDimension; + } - HINLINE static - std::string - getName() - { - return "particleMomentumComponent"; - } + HINLINE static std::string getName() + { + return "particleMomentumComponent"; + } - /** Calculate a new attribute per particle - * - * Returns a new (on-the-fly calculated) attribute of a particle - * that can then be mapped to the cells the particle contributes to. - * This method is called on a per-thread basis (each thread of a block - * handles a particle of a frame). - * - * \tparam T_Particle particle in the frame - * \param particle particle in the frame - * - * \return new attribute for the particle (type \see T_AttributeType) - */ - template< class T_Particle > - DINLINE float_X - operator()( T_Particle& particle ) const; - }; -} // namespace derivedAttributes -} // namespace particleToGrid + /** Calculate a new attribute per particle + * + * Returns a new (on-the-fly calculated) attribute of a particle + * that can then be mapped to the cells the particle contributes to. + * This method is called on a per-thread basis (each thread of a block + * handles a particle of a frame). + * + * \tparam T_Particle particle in the frame + * \param particle particle in the frame + * + * \return new attribute for the particle (type \see T_AttributeType) + */ + template + DINLINE float_X operator()(T_Particle& particle) const; + }; + } // namespace derivedAttributes + } // namespace particleToGrid -namespace traits -{ - template< - typename T_Species, - size_t T_direction - > - struct SpeciesEligibleForSolver< - T_Species, - particleToGrid::derivedAttributes::MomentumComponent< T_direction > - > - { - using FrameType = typename T_Species::FrameType; + namespace traits + { + template + struct SpeciesEligibleForSolver< + T_Species, + particleToGrid::derivedAttributes::MomentumComponent> + { + using FrameType = typename T_Species::FrameType; - using RequiredIdentifiers = MakeSeq_t< - position<>, - momentum - >; + using RequiredIdentifiers = MakeSeq_t, momentum>; - using type = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - }; -} // namespace traits -} // namespace particles + using type = typename pmacc::traits::HasIdentifiers::type; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/particleToGrid/derivedAttributes/MomentumComponent.hpp b/include/picongpu/particles/particleToGrid/derivedAttributes/MomentumComponent.hpp index 652fede7a5..94ecba2a78 100644 --- a/include/picongpu/particles/particleToGrid/derivedAttributes/MomentumComponent.hpp +++ b/include/picongpu/particles/particleToGrid/derivedAttributes/MomentumComponent.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Axel Huebl +/* Copyright 2016-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -26,42 +26,37 @@ namespace picongpu { -namespace particles -{ -namespace particleToGrid -{ -namespace derivedAttributes -{ - - template< size_t T_direction> - HDINLINE float1_64 - MomentumComponent::getUnit() const + namespace particles { - return 1.0; - } - - template< size_t T_direction> - template< class T_Particle > - DINLINE float_X - MomentumComponent::operator()( T_Particle& particle ) const - { - // read existing attributes - const float3_X mom = particle[momentum_]; - - // calculate new attribute: |p| and p.[x|y|z] - const float_X momAbs = math::abs(mom); - const float_X momCom = mom[T_direction]; - - // total momentum == 0 then perpendicular measure shall be zero, too - // values: [-1.:1.] - const float_X momComOverTotal = (momAbs > float_X(0.)) ? - momCom / momAbs : - float_X(0.); - - // return attribute - return momComOverTotal; - } -} // namespace derivedAttributes -} // namespace particleToGrid -} // namespace particles + namespace particleToGrid + { + namespace derivedAttributes + { + template + HDINLINE float1_64 MomentumComponent::getUnit() const + { + return 1.0; + } + + template + template + DINLINE float_X MomentumComponent::operator()(T_Particle& particle) const + { + // read existing attributes + const float3_X mom = particle[momentum_]; + + // calculate new attribute: |p| and p.[x|y|z] + const float_X momAbs = math::abs(mom); + const float_X momCom = mom[T_direction]; + + // total momentum == 0 then perpendicular measure shall be zero, too + // values: [-1.:1.] + const float_X momComOverTotal = (momAbs > float_X(0.)) ? momCom / momAbs : float_X(0.); + + // return attribute + return momComOverTotal; + } + } // namespace derivedAttributes + } // namespace particleToGrid + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/pusher/Traits.hpp b/include/picongpu/particles/pusher/Traits.hpp new file mode 100644 index 0000000000..76696fcf66 --- /dev/null +++ b/include/picongpu/particles/pusher/Traits.hpp @@ -0,0 +1,50 @@ +/* Copyright 2020-2021 Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/particles/pusher/particlePusherComposite.hpp" + +#include + +#include + + +namespace picongpu +{ + namespace particles + { + namespace pusher + { + /** Check if pusher type is composite (use several underlying pushers) + * + * The only composite pusher types are children of + * particlePusherComposite::Push template classes + * + * @tparam T_Pusher pusher type + * @treturn ::type std::true_type or std::false_type + */ + template + struct IsComposite : public pmacc::traits::IsBaseTemplateOf_t + { + }; + + } // namespace pusher + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/particles/pusher/particlePusherAcceleration.hpp b/include/picongpu/particles/pusher/particlePusherAcceleration.hpp index 8bd5fe001a..8254a84a40 100644 --- a/include/picongpu/particles/pusher/particlePusherAcceleration.hpp +++ b/include/picongpu/particles/pusher/particlePusherAcceleration.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, * Richard Pausch, Klaus Steiniger * * This file is part of PIConGPU. @@ -27,75 +27,71 @@ namespace picongpu { -namespace particlePusherAcceleration -{ - -struct UnitlessParam : public particlePusherAccelerationParam -{ - /** Normalize input values from `pusher.param` to PIC units */ - static constexpr float_X AMPLITUDEx = float_X(AMPLITUDEx_SI / UNIT_EFIELD); // unit: Volt / meter - static constexpr float_X AMPLITUDEy = float_X(AMPLITUDEy_SI / UNIT_EFIELD); // unit: Volt / meter - static constexpr float_X AMPLITUDEz = float_X(AMPLITUDEz_SI / UNIT_EFIELD); // unit: Volt / meter - - static constexpr float_X ACCELERATION_TIME = float_X(ACCELERATION_TIME_SI / UNIT_TIME); // unit: second - -}; - -template -struct Push -{ - /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation - * for particle positions outside the super cell in one push - */ - using LowerMargin = pmacc::math::CT::make_Int::type; - using UpperMargin = pmacc::math::CT::make_Int::type; - - template< typename T_FunctorFieldE, typename T_FunctorFieldB, typename T_Particle, typename T_Pos > - HDINLINE void operator()( - const T_FunctorFieldB, - const T_FunctorFieldE, - T_Particle & particle, - T_Pos & pos, - const uint32_t currentStep - ) + namespace particlePusherAcceleration { - using UnitlessParam = ::picongpu::particlePusherAcceleration::UnitlessParam; - - float_X const weighting = particle[ weighting_ ]; - float_X const mass = attribute::getMass( weighting, particle ); - float_X const charge = attribute::getCharge( weighting, particle ); - - using MomType = momentum::type; - MomType new_mom = particle[ momentum_ ]; - - const float_X deltaT = DELTA_T; - - // normalize input SI values to - const float3_X eField(UnitlessParam::AMPLITUDEx, UnitlessParam::AMPLITUDEy, UnitlessParam::AMPLITUDEz); - - /* ToDo: Refactor to ensure a smooth and slow increase of eField with time - * which may help to reduce radiation due to acceleration, if present. - */ - if ( currentStep * DELTA_T <= UnitlessParam::ACCELERATION_TIME ) - new_mom += charge * eField * deltaT; - - particle[ momentum_ ] = new_mom; - - Velocity velocity; - const float3_X vel = velocity( new_mom, mass ); - - for( uint32_t d = 0; d < simDim; ++d ) + struct UnitlessParam : public particlePusherAccelerationParam { - pos[d] += ( vel[d] * deltaT ) / cellSize[d]; - } + /** Normalize input values from `pusher.param` to PIC units */ + static constexpr float_X AMPLITUDEx = float_X(AMPLITUDEx_SI / UNIT_EFIELD); // unit: Volt / meter + static constexpr float_X AMPLITUDEy = float_X(AMPLITUDEy_SI / UNIT_EFIELD); // unit: Volt / meter + static constexpr float_X AMPLITUDEz = float_X(AMPLITUDEz_SI / UNIT_EFIELD); // unit: Volt / meter - } + static constexpr float_X ACCELERATION_TIME = float_X(ACCELERATION_TIME_SI / UNIT_TIME); // unit: second + }; - static pmacc::traits::StringProperty getStringProperties() - { - pmacc::traits::StringProperty propList( "name", "Acceleration" ); - return propList; - } -}; -} // namespace particlePusherAcceleration + template + struct Push + { + /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation + * for particle positions outside the super cell in one push + */ + using LowerMargin = pmacc::math::CT::make_Int::type; + using UpperMargin = pmacc::math::CT::make_Int::type; + + template + HDINLINE void operator()( + const T_FunctorFieldB, + const T_FunctorFieldE, + T_Particle& particle, + T_Pos& pos, + const uint32_t currentStep) + { + using UnitlessParam = ::picongpu::particlePusherAcceleration::UnitlessParam; + + float_X const weighting = particle[weighting_]; + float_X const mass = attribute::getMass(weighting, particle); + float_X const charge = attribute::getCharge(weighting, particle); + + using MomType = momentum::type; + MomType new_mom = particle[momentum_]; + + const float_X deltaT = DELTA_T; + + // normalize input SI values to + const float3_X eField(UnitlessParam::AMPLITUDEx, UnitlessParam::AMPLITUDEy, UnitlessParam::AMPLITUDEz); + + /* ToDo: Refactor to ensure a smooth and slow increase of eField with time + * which may help to reduce radiation due to acceleration, if present. + */ + if(currentStep * DELTA_T <= UnitlessParam::ACCELERATION_TIME) + new_mom += charge * eField * deltaT; + + particle[momentum_] = new_mom; + + Velocity velocity; + const float3_X vel = velocity(new_mom, mass); + + for(uint32_t d = 0; d < simDim; ++d) + { + pos[d] += (vel[d] * deltaT) / cellSize[d]; + } + } + + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "Acceleration"); + return propList; + } + }; + } // namespace particlePusherAcceleration } // namespace picongpu diff --git a/include/picongpu/particles/pusher/particlePusherAxel.hpp b/include/picongpu/particles/pusher/particlePusherAxel.hpp index c3c410b917..cec0a358ee 100644 --- a/include/picongpu/particles/pusher/particlePusherAxel.hpp +++ b/include/picongpu/particles/pusher/particlePusherAxel.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Benjamin Worpitz * * This file is part of PIConGPU. @@ -27,7 +27,7 @@ // That is a sum over two out of 3 coordinates, as described in the script // above. (See Ref.!) -#define FOR_JK_NOT_I(I,J,K,code) (code(I,J,K)) + (code(I, K, J)) +#define FOR_JK_NOT_I(I, J, K, code) (code(I, J, K)) + (code(I, K, J)) #include @@ -35,15 +35,14 @@ namespace picongpu { namespace particlePusherAxel { - template struct Push { /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation * for particle positions outside the super cell in one push */ - using LowerMargin = typename pmacc::math::CT::make_Int::type; - using UpperMargin = typename pmacc::math::CT::make_Int::type; + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = typename pmacc::math::CT::make_Int::type; enum coords { @@ -52,80 +51,83 @@ namespace picongpu z = 2 }; - HDINLINE float_X levichivita( const unsigned int i, const unsigned int j, const unsigned int k ) + HDINLINE float_X levichivita(const unsigned int i, const unsigned int j, const unsigned int k) { - if( i == j || j == k || i == k ) return float_X(0.0); + if(i == j || j == k || i == k) + return float_X(0.0); - if( i == x && j == y ) return float_X(1.0); - if( i == z && j == x ) return float_X(1.0); - if( i == y && j == z ) return float_X(1.0); + if(i == x && j == y) + return float_X(1.0); + if(i == z && j == x) + return float_X(1.0); + if(i == y && j == z) + return float_X(1.0); return float_X(-1.0); } - template< - typename T_FunctorFieldE, - typename T_FunctorFieldB, - typename T_Particle, - typename T_Pos - > + template HDINLINE void operator()( const T_FunctorFieldB functorBField, /* at t=0 */ const T_FunctorFieldE functorEField, /* at t=0 */ - T_Particle & particle, - T_Pos & pos, /* at t=0 */ - const uint32_t - ) + T_Particle& particle, + T_Pos& pos, /* at t=0 */ + const uint32_t) { - float_X const weighting = particle[ weighting_ ]; - float_X const mass = attribute::getMass( weighting, particle ); - float_X const charge = attribute::getCharge( weighting, particle ); + float_X const weighting = particle[weighting_]; + float_X const mass = attribute::getMass(weighting, particle); + float_X const charge = attribute::getCharge(weighting, particle); using MomType = momentum::type; - MomType mom = particle[ momentum_ ]; + MomType mom = particle[momentum_]; - auto bField = functorBField(pos); - auto eField = functorEField(pos); + auto bField = functorBField(pos); + auto eField = functorEField(pos); Gamma gammaCalc; Velocity velocityCalc; const float_X epsilon = 1.0e-6; const float_X deltaT = DELTA_T; - //const float3_X velocity_atMinusHalf = velocity(mom, mass); - const float_X gamma = gammaCalc( mom, mass ); + // const float3_X velocity_atMinusHalf = velocity(mom, mass); + const float_X gamma = gammaCalc(mom, mass); const MomType mom_old = mom; - const float_X B2 = math::abs2( bField ); - const float_X B = math::abs( bField ); + const float_X B2 = pmacc::math::abs2(bField); + const float_X B = math::abs(bField); - if( B2 > epsilon ) + if(B2 > epsilon) { trigo_X sinres; trigo_X cosres; trigo_X arg = B * charge * deltaT / gamma; - math::sincos( arg, sinres, cosres ); + pmacc::math::sincos(arg, sinres, cosres); - mom.x() = bField.x() * bField.x() * ( eField.x() * charge * deltaT + mom_old.x() ); - mom.y() = bField.y() * bField.y() * ( eField.y() * charge * deltaT + mom_old.y() ); - mom.z() = bField.z() * bField.z() * ( eField.z() * charge * deltaT + mom_old.z() ); + mom.x() = bField.x() * bField.x() * (eField.x() * charge * deltaT + mom_old.x()); + mom.y() = bField.y() * bField.y() * (eField.y() * charge * deltaT + mom_old.y()); + mom.z() = bField.z() * bField.z() * (eField.z() * charge * deltaT + mom_old.z()); -#define SUM_PLINE1(I,J,K) bField.J() * ( -levichivita(I,J,K) * gamma * eField.K() + bField.I() * ( eField.J() * charge * deltaT + mom_old.J() ) ) -#define SUM_PLINE2(I,J,K) -bField.J() * ( -levichivita(I,J,K) * gamma * eField.K() + bField.I() * mom_old.J() - bField.J() * mom_old.I() ) -#define SUM_PLINE3(I,J,K) bField.J() * bField.J() * gamma * eField.I() - bField.I() * bField.J() * gamma * eField.J() + levichivita(I,J,K) * mom_old.J() * bField.K() * B2 +#define SUM_PLINE1(I, J, K) \ + bField.J() \ + * (-levichivita(I, J, K) * gamma * eField.K() + bField.I() * (eField.J() * charge * deltaT + mom_old.J())) +#define SUM_PLINE2(I, J, K) \ + -bField.J() * (-levichivita(I, J, K) * gamma * eField.K() + bField.I() * mom_old.J() - bField.J() * mom_old.I()) +#define SUM_PLINE3(I, J, K) \ + bField.J() * bField.J() * gamma* eField.I() - bField.I() * bField.J() * gamma* eField.J() \ + + levichivita(I, J, K) * mom_old.J() * bField.K() * B2 - mom.x() += FOR_JK_NOT_I( x, y, z, SUM_PLINE1 ); - mom.x() += float_X(cosres ) * ( FOR_JK_NOT_I( x, y, z, SUM_PLINE2 ) ); - mom.x() += float_X(sinres ) / B * ( FOR_JK_NOT_I( x, y, z, SUM_PLINE3 ) ); + mom.x() += FOR_JK_NOT_I(x, y, z, SUM_PLINE1); + mom.x() += float_X(cosres) * (FOR_JK_NOT_I(x, y, z, SUM_PLINE2)); + mom.x() += float_X(sinres) / B * (FOR_JK_NOT_I(x, y, z, SUM_PLINE3)); - mom.y() += FOR_JK_NOT_I( y, z, x, SUM_PLINE1 ); - mom.y() += float_X(cosres ) * ( FOR_JK_NOT_I( y, z, x, SUM_PLINE2 ) ); - mom.y() += float_X(sinres ) / B * ( FOR_JK_NOT_I( y, z, x, SUM_PLINE3 ) ); + mom.y() += FOR_JK_NOT_I(y, z, x, SUM_PLINE1); + mom.y() += float_X(cosres) * (FOR_JK_NOT_I(y, z, x, SUM_PLINE2)); + mom.y() += float_X(sinres) / B * (FOR_JK_NOT_I(y, z, x, SUM_PLINE3)); - mom.z() += FOR_JK_NOT_I( z, x, y, SUM_PLINE1 ); - mom.z() += float_X(cosres ) * ( FOR_JK_NOT_I( z, x, y, SUM_PLINE2 ) ); - mom.z() += float_X(sinres ) / B * ( FOR_JK_NOT_I( z, x, y, SUM_PLINE3 ) ); + mom.z() += FOR_JK_NOT_I(z, x, y, SUM_PLINE1); + mom.z() += float_X(cosres) * (FOR_JK_NOT_I(z, x, y, SUM_PLINE2)); + mom.z() += float_X(sinres) / B * (FOR_JK_NOT_I(z, x, y, SUM_PLINE3)); mom *= float_X(1.0) / B2; } @@ -134,58 +136,79 @@ namespace picongpu mom += eField * charge * deltaT; } - particle[ momentum_ ] = mom; + particle[momentum_] = mom; float3_X dr(float3_X::create(0.0)); // old spacial change calculation: linear step - if( TrajectoryInterpolation == LINEAR ) + if(TrajectoryInterpolation == LINEAR) { - const float3_X vel = velocityCalc( mom, mass ); - dr = float3_X( vel.x() * deltaT / CELL_WIDTH, - vel.y() * deltaT / CELL_HEIGHT, - vel.z() * deltaT / CELL_DEPTH ); + const float3_X vel = velocityCalc(mom, mass); + dr = float3_X( + vel.x() * deltaT / CELL_WIDTH, + vel.y() * deltaT / CELL_HEIGHT, + vel.z() * deltaT / CELL_DEPTH); } // new spacial change calculation - if( TrajectoryInterpolation == NONLINEAR ) + if(TrajectoryInterpolation == NONLINEAR) { - const float3_X vel_old = velocityCalc( mom_old, mass ); + const float3_X vel_old = velocityCalc(mom_old, mass); const float_X QoM = charge / mass; const float_X B4 = B2 * B2; float3_X r = pos; - if( B4 > epsilon ) + if(B4 > epsilon) { trigo_X sinres; trigo_X cosres; trigo_X arg = B * QoM * deltaT / SPEED_OF_LIGHT; - math::sincos( arg, sinres, cosres ); + pmacc::math::sincos(arg, sinres, cosres); r.x() = bField.x() * bField.x() * bField.x() * bField.x() * QoM - * ( eField.x() * QoM * deltaT * deltaT + 2.0f * ( deltaT * vel_old.x() + pos.x() ) ); - -#define SUM_RLINE1(I,J,K) 2.0 * bField.J() * bField.J() * bField.J() * bField.J() * QoM * pos.x() \ - + 2.0 * bField.J() * bField.J() * bField.K() * bField.K() * QoM * pos.x() \ - + bField.J() * bField.J() * bField.J() * ( -levichivita(I,J,K) * 2.0 * SPEED_OF_LIGHT * ( eField.K() * QoM * deltaT + vel_old.K() ) + bField.I() * QoM * deltaT * ( eField.J() * QoM * deltaT + 2.0 * vel_old.J() ) ) \ - + bField.J() * bField.J() * ( 2.0 * SPEED_OF_LIGHT * SPEED_OF_LIGHT * eField.I() + bField.I() * bField.I() * QoM * ( eField.I() * QoM * deltaT * deltaT + 2.0 * deltaT * vel_old.I() + 4.0 * pos.I() ) + levichivita(I,J,K) * 2.0 * SPEED_OF_LIGHT * bField.K() * vel_old.J() + bField.K() * QoM * ( levichivita(I,J,K) * 2.0 * eField.J() * SPEED_OF_LIGHT * deltaT + bField.I() * bField.K() * QoM * deltaT * deltaT ) ) \ - + bField.I() * bField.J() * ( bField.I() * bField.I() * QoM * deltaT * ( eField.J() * QoM * deltaT + 2.0 * vel_old.J() ) - levichivita(I,J,K) * 2.0 * bField.I() * SPEED_OF_LIGHT * ( eField.K() * QoM * deltaT + vel_old.K() ) - 2.0 * SPEED_OF_LIGHT * SPEED_OF_LIGHT * eField.J() ) - -#define SUM_RLINE2(I,J,K) - bField.J() * ( SPEED_OF_LIGHT * eField.I() * bField.J() - levichivita(I,J,K) * bField.J() * bField.J() * vel_old.K() - bField.I() * SPEED_OF_LIGHT * eField.J() - levichivita(I,J,K) * bField.J() * vel_old.K() * ( bField.I() * bField.I() + bField.K() *bField.K() ) ) - -#define SUM_RLINE3(I,J,K) levichivita(I,J,K) * bField.J() * ( SPEED_OF_LIGHT * eField.K() + levichivita(I,J,K) * ( bField.J() * vel_old.I() - bField.I() * vel_old.J() ) ) - - r.x() += FOR_JK_NOT_I( x, y, z, SUM_RLINE1 ); - r.x() += float_X(cosres ) * 2.0 * SPEED_OF_LIGHT * ( FOR_JK_NOT_I( x, y, z, SUM_RLINE2 ) ); - r.x() += float_X(sinres ) * 2.0 * SPEED_OF_LIGHT * B * ( FOR_JK_NOT_I( x, y, z, SUM_RLINE3 ) ); - - r.y() += FOR_JK_NOT_I( y, z, x, SUM_RLINE1 ); - r.y() += float_X(cosres ) * 2.0 * SPEED_OF_LIGHT * ( FOR_JK_NOT_I( y, z, x, SUM_RLINE2 ) ); - r.y() += float_X(sinres ) * 2.0 * SPEED_OF_LIGHT * B * ( FOR_JK_NOT_I( y, z, x, SUM_RLINE3 ) ); - - r.z() += FOR_JK_NOT_I( z, x, y, SUM_RLINE1 ); - r.z() += float_X(cosres ) * 2.0 * SPEED_OF_LIGHT * ( FOR_JK_NOT_I( z, x, y, SUM_RLINE2 ) ); - r.z() += float_X(sinres ) * 2.0 * SPEED_OF_LIGHT * B * ( FOR_JK_NOT_I( z, x, y, SUM_RLINE3 ) ); + * (eField.x() * QoM * deltaT * deltaT + 2.0f * (deltaT * vel_old.x() + pos.x())); + +#define SUM_RLINE1(I, J, K) \ + 2.0 * bField.J() * bField.J() * bField.J() * bField.J() * QoM* pos.x() \ + + 2.0 * bField.J() * bField.J() * bField.K() * bField.K() * QoM* pos.x() \ + + bField.J() * bField.J() * bField.J() \ + * (-levichivita(I, J, K) * 2.0 * SPEED_OF_LIGHT * (eField.K() * QoM * deltaT + vel_old.K()) \ + + bField.I() * QoM * deltaT * (eField.J() * QoM * deltaT + 2.0 * vel_old.J())) \ + + bField.J() * bField.J() \ + * (2.0 * SPEED_OF_LIGHT * SPEED_OF_LIGHT * eField.I() \ + + bField.I() * bField.I() * QoM \ + * (eField.I() * QoM * deltaT * deltaT + 2.0 * deltaT * vel_old.I() + 4.0 * pos.I()) \ + + levichivita(I, J, K) * 2.0 * SPEED_OF_LIGHT * bField.K() * vel_old.J() \ + + bField.K() * QoM \ + * (levichivita(I, J, K) * 2.0 * eField.J() * SPEED_OF_LIGHT * deltaT \ + + bField.I() * bField.K() * QoM * deltaT * deltaT)) \ + + bField.I() * bField.J() \ + * (bField.I() * bField.I() * QoM * deltaT * (eField.J() * QoM * deltaT + 2.0 * vel_old.J()) \ + - levichivita(I, J, K) * 2.0 * bField.I() * SPEED_OF_LIGHT * (eField.K() * QoM * deltaT + vel_old.K()) \ + - 2.0 * SPEED_OF_LIGHT * SPEED_OF_LIGHT * eField.J()) + +#define SUM_RLINE2(I, J, K) \ + -bField.J() \ + * (SPEED_OF_LIGHT * eField.I() * bField.J() - levichivita(I, J, K) * bField.J() * bField.J() * vel_old.K() \ + - bField.I() * SPEED_OF_LIGHT * eField.J() \ + - levichivita(I, J, K) * bField.J() * vel_old.K() * (bField.I() * bField.I() + bField.K() * bField.K())) + +#define SUM_RLINE3(I, J, K) \ + levichivita(I, J, K) * bField.J() \ + * (SPEED_OF_LIGHT * eField.K() \ + + levichivita(I, J, K) * (bField.J() * vel_old.I() - bField.I() * vel_old.J())) + + r.x() += FOR_JK_NOT_I(x, y, z, SUM_RLINE1); + r.x() += float_X(cosres) * 2.0 * SPEED_OF_LIGHT * (FOR_JK_NOT_I(x, y, z, SUM_RLINE2)); + r.x() += float_X(sinres) * 2.0 * SPEED_OF_LIGHT * B * (FOR_JK_NOT_I(x, y, z, SUM_RLINE3)); + + r.y() += FOR_JK_NOT_I(y, z, x, SUM_RLINE1); + r.y() += float_X(cosres) * 2.0 * SPEED_OF_LIGHT * (FOR_JK_NOT_I(y, z, x, SUM_RLINE2)); + r.y() += float_X(sinres) * 2.0 * SPEED_OF_LIGHT * B * (FOR_JK_NOT_I(y, z, x, SUM_RLINE3)); + + r.z() += FOR_JK_NOT_I(z, x, y, SUM_RLINE1); + r.z() += float_X(cosres) * 2.0 * SPEED_OF_LIGHT * (FOR_JK_NOT_I(z, x, y, SUM_RLINE2)); + r.z() += float_X(sinres) * 2.0 * SPEED_OF_LIGHT * B * (FOR_JK_NOT_I(z, x, y, SUM_RLINE3)); r *= float_X(0.5) / B4 / QoM; } @@ -196,7 +219,6 @@ namespace picongpu dr = r - pos; dr *= float3_X::create(1.0) / cellSize; - } pos += dr; @@ -204,12 +226,10 @@ namespace picongpu static pmacc::traits::StringProperty getStringProperties() { - pmacc::traits::StringProperty propList( "name", "other" ); + pmacc::traits::StringProperty propList("name", "other"); propList["param"] = "semi analytical, Axel Huebl (2011)"; return propList; } }; - } //namespace -} - - + } // namespace particlePusherAxel +} // namespace picongpu diff --git a/include/picongpu/particles/pusher/particlePusherBoris.hpp b/include/picongpu/particles/pusher/particlePusherBoris.hpp index 69736c9b85..46c22e63b9 100644 --- a/include/picongpu/particles/pusher/particlePusherBoris.hpp +++ b/include/picongpu/particles/pusher/particlePusherBoris.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -26,70 +26,67 @@ namespace picongpu { -namespace particlePusherBoris -{ - -template -struct Push -{ - /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation - * for particle positions outside the super cell in one push - */ - using LowerMargin = typename pmacc::math::CT::make_Int::type; - using UpperMargin = typename pmacc::math::CT::make_Int::type; - - template< typename T_FunctorFieldE, typename T_FunctorFieldB, typename T_Particle, typename T_Pos > - HDINLINE void operator()( - const T_FunctorFieldB functorBField, - const T_FunctorFieldE functorEField, - T_Particle & particle, - T_Pos & pos, - const uint32_t - ) + namespace particlePusherBoris { - float_X const weighting = particle[ weighting_ ]; - float_X const mass = attribute::getMass( weighting, particle ); - float_X const charge = attribute::getCharge( weighting, particle ); - - using MomType = momentum::type; - MomType const mom = particle[ momentum_ ]; - - auto bField = functorBField(pos); - auto eField = functorEField(pos); - - const float_X QoM = charge / mass; - - const float_X deltaT = DELTA_T; - - const MomType mom_minus = mom + float_X(0.5) * charge * eField * deltaT; - - Gamma gamma; - const float_X gamma_reci = float_X(1.0) / gamma(mom_minus, mass); - const float3_X t = float_X(0.5) * QoM * bField * gamma_reci * deltaT; - auto s = float_X(2.0) * t * (float_X(1.0) / (float_X(1.0) + math::abs2(t))); - - const MomType mom_prime = mom_minus + math::cross(mom_minus, t); - const MomType mom_plus = mom_minus + math::cross(mom_prime, s); - - const MomType new_mom = mom_plus + float_X(0.5) * charge * eField * deltaT; - - particle[ momentum_ ] = new_mom; - - Velocity velocity; - const float3_X vel = velocity(new_mom, mass); - - for(uint32_t d=0;d + struct Push { - pos[d] += (vel[d] * deltaT) / cellSize[d]; - } - - } - - static pmacc::traits::StringProperty getStringProperties() - { - pmacc::traits::StringProperty propList( "name", "Boris" ); - return propList; - } -}; -} // namespace particlePusherBoris + /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation + * for particle positions outside the super cell in one push + */ + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = typename pmacc::math::CT::make_Int::type; + + template + HDINLINE void operator()( + const T_FunctorFieldB functorBField, + const T_FunctorFieldE functorEField, + T_Particle& particle, + T_Pos& pos, + const uint32_t) + { + float_X const weighting = particle[weighting_]; + float_X const mass = attribute::getMass(weighting, particle); + float_X const charge = attribute::getCharge(weighting, particle); + + using MomType = momentum::type; + MomType const mom = particle[momentum_]; + + auto bField = functorBField(pos); + auto eField = functorEField(pos); + + const float_X QoM = charge / mass; + + const float_X deltaT = DELTA_T; + + const MomType mom_minus = mom + float_X(0.5) * charge * eField * deltaT; + + Gamma gamma; + const float_X gamma_reci = float_X(1.0) / gamma(mom_minus, mass); + const float3_X t = float_X(0.5) * QoM * bField * gamma_reci * deltaT; + auto s = float_X(2.0) * t * (float_X(1.0) / (float_X(1.0) + pmacc::math::abs2(t))); + + const MomType mom_prime = mom_minus + pmacc::math::cross(mom_minus, t); + const MomType mom_plus = mom_minus + pmacc::math::cross(mom_prime, s); + + const MomType new_mom = mom_plus + float_X(0.5) * charge * eField * deltaT; + + particle[momentum_] = new_mom; + + Velocity velocity; + const float3_X vel = velocity(new_mom, mass); + + for(uint32_t d = 0; d < simDim; ++d) + { + pos[d] += (vel[d] * deltaT) / cellSize[d]; + } + } + + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "Boris"); + return propList; + } + }; + } // namespace particlePusherBoris } // namespace picongpu diff --git a/include/picongpu/particles/pusher/particlePusherComposite.hpp b/include/picongpu/particles/pusher/particlePusherComposite.hpp new file mode 100644 index 0000000000..04e6a43f98 --- /dev/null +++ b/include/picongpu/particles/pusher/particlePusherComposite.hpp @@ -0,0 +1,139 @@ +/* Copyright 2020-2021 Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" + +#include + +#include +#include + + +namespace picongpu +{ + namespace particlePusherComposite + { + /** Concept for an activation functor for a composite pusher + * + * This concept defines an interface for the corresponding template + * argument. This class is not supposed to be used directly. + * However, a helper activator class to be reused is provided below. + */ + struct ActivationFunctor + { + /** Return a 1-based index of which pusher of the composite to use + * + * Return value out of the range [1, #pushers] means no pusher to be used. + * + * @param currentStep current time iteration + */ + HDINLINE uint32_t operator()(uint32_t const currentStep) const; + }; + + /** Helper activation functor for a composite of two pushers + * + * Uses the first pusher for currentStep < T_switchTimeStep and the second + * one otherwise. + */ + template + struct BinarySwitchActivationFunctor + { + HDINLINE constexpr uint32_t operator()(uint32_t const currentStep) const + { + return currentStep < T_switchTimeStep ? 1 : 2; + } + }; + + /** Composite of two particle pushers, each implementing the pusher concept. + * + * The decision which pusher to use is made by the activation functor. + * The composite pushers implement the pusher concept themselves, however + * for performance reasons special treatment is recommended during the + * particle push simulation stage. + * + * @tparam T_FirstPusher first pusher type + * @tparam T_SecondPusher second pusher type + * @tparam T_ActivationFunctor activation functor to decide which pusher to use, + * implements the ActivationFunctor concept + */ + template + struct Push + : public T_FirstPusher + , T_SecondPusher + { + using FirstPusher = T_FirstPusher; + using SecondPusher = T_SecondPusher; + using ActivationFunctor = T_ActivationFunctor; + + /* These are done logically correct, but should not be used directly for + * the particle push stage. + */ + using LowerMargin = typename pmacc::math::CT::max< + typename traits::GetLowerMargin::type, + typename traits::GetLowerMargin::type>::type; + using UpperMargin = typename pmacc::math::CT:: + max::type, typename GetUpperMargin::type>::type; + + /** Get active pusher 1-based index + * + * Result other than 1 or 2 means no pusher should be used + * + * @param currentStep current time iteration + */ + static HDINLINE uint32_t activePusherIdx(uint32_t const currentStep) + { + return ActivationFunctor{}(currentStep); + } + + /** Push one particle, this is compatibility-only + * + * Should not be used for the particle push stage due to shared memory + * and register consumption. + */ + template + HDINLINE void operator()( + T_FunctorFieldB const functorBField, + T_FunctorFieldE const functorEField, + T_Particle& particle, + T_Pos& pos, + uint32_t const currentStep) const + { + auto const pusherIdx = activePusherIdx(currentStep); + if(pusherIdx == 1) + FirstPusher::operator()(functorBField, functorEField, particle, pos, currentStep); + else if(pusherIdx == 2) + SecondPusher::operator()(functorBField, functorEField, particle, pos, currentStep); + } + + static pmacc::traits::StringProperty getStringProperties() + { + auto firstProperty = FirstPusher::getStringProperties(); + auto secondProperty = SecondPusher::getStringProperties(); + pmacc::traits::StringProperty propList( + "name", + std::string("Composite of ") + firstProperty["name"].value + " and " + + secondProperty["name"].value); + return propList; + } + }; + + } // namespace particlePusherComposite +} // namespace picongpu diff --git a/include/picongpu/particles/pusher/particlePusherFree.hpp b/include/picongpu/particles/pusher/particlePusherFree.hpp index 38a28f1ed4..9103a2aa23 100644 --- a/include/picongpu/particles/pusher/particlePusherFree.hpp +++ b/include/picongpu/particles/pusher/particlePusherFree.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, * Richard Pausch * * This file is part of PIConGPU. @@ -34,29 +34,28 @@ namespace picongpu /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation * for particle positions outside the super cell in one push */ - using LowerMargin = typename pmacc::math::CT::make_Int::type; - using UpperMargin = typename pmacc::math::CT::make_Int::type; + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = typename pmacc::math::CT::make_Int::type; - template< typename T_FunctorFieldE, typename T_FunctorFieldB, typename T_Particle, typename T_Pos > + template HDINLINE void operator()( const T_FunctorFieldB functorBField, const T_FunctorFieldE functorEField, - T_Particle & particle, - T_Pos & pos, - const uint32_t - ) + T_Particle& particle, + T_Pos& pos, + const uint32_t) { - float_X const weighting = particle[ weighting_ ]; - float_X const mass = attribute::getMass( weighting, particle ); + float_X const weighting = particle[weighting_]; + float_X const mass = attribute::getMass(weighting, particle); using MomType = momentum::type; - MomType const mom = particle[ momentum_ ]; + MomType const mom = particle[momentum_]; Velocity velocity; const MomType vel = velocity(mom, mass); - for(uint32_t d=0;d. + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/traits/attribute/GetMass.hpp" +#include "picongpu/traits/attribute/GetCharge.hpp" + + +namespace picongpu +{ + namespace particlePusherHigueraCary + { + /** Implementation of the Higuera-Cary pusher as presented in doi:10.1063/1.4979989. + * + * A correction is applied to the given formulas as documented by the WarpX team: + * (https://github.com/ECP-WarpX/WarpX/issues/320). + * + * Note, while Higuera and Ripperda present the formulas for the quantity u = gamma * v, + * PIConGPU uses the real momentum p = gamma * m * v = u * m for calculations. + * Here, all auxiliary quantities are equal to those used in Ripperda's article. + * + * Further references: + * [Higuera's article on arxiv](https://arxiv.org/abs/1701.05605) + * [Riperda's comparison of relativistic particle integrators](https://doi.org/10.3847/1538-4365/aab114) + * + * @tparam Velocity functor to compute the velocity of a particle with momentum p and mass m + * @tparam Gamma functor to compute the Lorentz factor (= Energy/mc^2) of a particle with momentum p and mass m + */ + template + struct Push + { + /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation + * for particle positions outside the super cell in one push + */ + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = typename pmacc::math::CT::make_Int::type; + + template + HDINLINE void operator()( + T_FunctorFieldB const functorBField, + T_FunctorFieldE const functorEField, + T_Particle& particle, + T_Pos& pos, + uint32_t const) + { + float_X const weighting = particle[weighting_]; + float_X const mass = attribute::getMass(weighting, particle); + float_X const charge = attribute::getCharge(weighting, particle); + + using MomType = momentum::type; + MomType const mom = particle[momentum_]; + + auto bField = functorBField(pos); + auto eField = functorEField(pos); + + float_X const deltaT = DELTA_T; + + + Gamma gamma; + + /* Momentum update + * Notation is according to Ripperda's paper + */ + // First half electric field acceleration + namespace sqrt_HC = sqrt_HigueraCary; + + sqrt_HC::float3_X const mom_minus + = precisionCast(mom + float_X(0.5) * charge * eField * deltaT); + + // Auxiliary quantitites + sqrt_HC::float_X const gamma_minus = gamma(mom_minus, mass); + + sqrt_HC::float3_X const tau + = precisionCast(float_X(0.5) * bField * charge * deltaT / mass); + + sqrt_HC::float_X const sigma = pmacc::math::abs2(gamma_minus) - pmacc::math::abs2(tau); + + sqrt_HC::float_X const u_star + = pmacc::math::dot(mom_minus, tau) / precisionCast(mass * SPEED_OF_LIGHT); + + sqrt_HC::float_X const gamma_plus = math::sqrt( + sqrt_HC::float_X(0.5) + * (sigma + + math::sqrt( + pmacc::math::abs2(sigma) + + sqrt_HC::float_X(4.0) * (pmacc::math::abs2(tau) + pmacc::math::abs2(u_star))))); + + sqrt_HC::float3_X const t_vector = tau / gamma_plus; + + sqrt_HC::float_X const s + = sqrt_HC::float_X(1.0) / (sqrt_HC::float_X(1.0) + pmacc::math::abs2(t_vector)); + + // Rotation step + sqrt_HC::float3_X const mom_plus = s + * (mom_minus + pmacc::math::dot(mom_minus, t_vector) * t_vector + + pmacc::math::cross(mom_minus, t_vector)); + + // Second half electric field acceleration (Note correction mom_minus -> mom_plus here compared to + // Ripperda) + MomType const mom_diff1 = float_X(0.5) * charge * eField * deltaT; + MomType const mom_diff2 = precisionCast(pmacc::math::cross(mom_plus, t_vector)); + MomType const mom_diff = mom_diff1 + mom_diff2; + + MomType const new_mom = precisionCast(mom_plus) + mom_diff; + + particle[momentum_] = new_mom; + + // Position update + Velocity velocity; + + float3_X const vel = velocity(new_mom, mass); + + for(uint32_t d = 0; d < simDim; ++d) + { + pos[d] += (vel[d] * deltaT) / cellSize[d]; + } + } + + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "other:Higuera-Cary"); + return propList; + } + }; + + } // namespace particlePusherHigueraCary +} // namespace picongpu diff --git a/include/picongpu/particles/pusher/particlePusherPhoton.hpp b/include/picongpu/particles/pusher/particlePusherPhoton.hpp index 3de76d8eae..89cec3dba3 100644 --- a/include/picongpu/particles/pusher/particlePusherPhoton.hpp +++ b/include/picongpu/particles/pusher/particlePusherPhoton.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, * Alexander Grund, Richard Pausch * * This file is part of PIConGPU. @@ -33,25 +33,24 @@ namespace picongpu /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation * for particle positions outside the super cell in one push */ - using LowerMargin = typename pmacc::math::CT::make_Int::type; - using UpperMargin = typename pmacc::math::CT::make_Int::type; + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = typename pmacc::math::CT::make_Int::type; - template< typename T_FunctorFieldE, typename T_FunctorFieldB, typename T_Particle, typename T_Pos > + template HDINLINE void operator()( const T_FunctorFieldB functorBField, const T_FunctorFieldE functorEField, - T_Particle & particle, - T_Pos & pos, - const uint32_t - ) + T_Particle& particle, + T_Pos& pos, + const uint32_t) { using MomType = momentum::type; - MomType const mom = particle[ momentum_ ]; + MomType const mom = particle[momentum_]; - const float_X mom_abs = math::abs( mom ); - const MomType vel = mom * ( SPEED_OF_LIGHT / mom_abs ); + const float_X mom_abs = math::abs(mom); + const MomType vel = mom * (SPEED_OF_LIGHT / mom_abs); - for(uint32_t d=0;d - struct Push + namespace particlePusherProbe { - using ActualPush = T_ActualPush; - - /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation - * for particle positions outside the super cell in one push + /** Probe electro-magnetic fields and store the result with a particle + * + * @tparam T_ValueFunctor pmacc::nvidia::functors::*, binary functor + * handling how to store the obtained field on the particle, + * default is assigning a new value + * @tparam T_ActualPush allows to perform a real particle push after + * probing the electro-magnetic field (e.g. to let a probe + * particle stream with a moving window or to define a tracer + * particle species that records its fields), + * default is void and means no push (just a static probe) */ - using LowerMargin = typename ActualPush::LowerMargin; - using UpperMargin = typename ActualPush::UpperMargin; - - template< - typename T_FunctorFieldE, - typename T_FunctorFieldB, - typename T_Particle, - typename T_Pos - > - HDINLINE void - operator()( - T_FunctorFieldB const functorBField, - T_FunctorFieldE const functorEField, - T_Particle & particle, - T_Pos & pos, - uint32_t const currentStep - ) + template + struct Push { - T_ValueFunctor valueFunctor; - valueFunctor( - particle[ probeB_ ], - functorBField( pos ) - ); - valueFunctor( - particle[ probeE_ ], - functorEField( pos ) - ); + using ActualPush = T_ActualPush; - ActualPush actualPush; - actualPush( - functorBField, - functorEField, - particle, - pos, - currentStep - ); - } + /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation + * for particle positions outside the super cell in one push + */ + using LowerMargin = typename ActualPush::LowerMargin; + using UpperMargin = typename ActualPush::UpperMargin; - static - pmacc::traits::StringProperty - getStringProperties() - { - pmacc::traits::GetStringProperties< ActualPush > propList; - propList[ "param" ] = "moving probe"; - return propList; - } - }; + template + HDINLINE void operator()( + T_FunctorFieldB const functorBField, + T_FunctorFieldE const functorEField, + T_Particle& particle, + T_Pos& pos, + uint32_t const currentStep) + { + T_ValueFunctor valueFunctor; + valueFunctor(particle[probeB_], functorBField(pos)); + valueFunctor(particle[probeE_], functorEField(pos)); - template< typename T_ValueFunctor > - struct Push< - T_ValueFunctor, - void - > - { - /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation - * for particle positions outside the super cell in one push - */ - using LowerMargin = typename pmacc::math::CT::make_Int< - simDim, - 0 - >::type; - using UpperMargin = typename pmacc::math::CT::make_Int< - simDim, - 0 - >::type; + ActualPush actualPush; + actualPush(functorBField, functorEField, particle, pos, currentStep); + } - template< - typename T_FunctorFieldE, - typename T_FunctorFieldB, - typename T_Particle, - typename T_Pos - > - HDINLINE void - operator()( - T_FunctorFieldB const functorBField, - T_FunctorFieldE const functorEField, - T_Particle & particle, - T_Pos & pos, - uint32_t const - ) - { - T_ValueFunctor valueFunctor; - valueFunctor( - particle[ probeB_ ], - functorBField( pos ) - ); - valueFunctor( - particle[ probeE_ ], - functorEField( pos ) - ); - } + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::GetStringProperties propList; + propList["param"] = "moving probe"; + return propList; + } + }; - static - pmacc::traits::StringProperty - getStringProperties() + template + struct Push { - pmacc::traits::StringProperty propList( - "name", - "other" - ); - propList[ "param" ] = "static probe"; - return propList; - } - }; -} // namespace particlePusherProbe + /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation + * for particle positions outside the super cell in one push + */ + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = typename pmacc::math::CT::make_Int::type; + + template + HDINLINE void operator()( + T_FunctorFieldB const functorBField, + T_FunctorFieldE const functorEField, + T_Particle& particle, + T_Pos& pos, + uint32_t const) + { + T_ValueFunctor valueFunctor; + valueFunctor(particle[probeB_], functorBField(pos)); + valueFunctor(particle[probeE_], functorEField(pos)); + } + + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "other"); + propList["param"] = "static probe"; + return propList; + } + }; + } // namespace particlePusherProbe } // namespace picongpu diff --git a/include/picongpu/particles/pusher/particlePusherReducedLandauLifshitz.hpp b/include/picongpu/particles/pusher/particlePusherReducedLandauLifshitz.hpp index 8ee17d51d2..920b56869c 100644 --- a/include/picongpu/particles/pusher/particlePusherReducedLandauLifshitz.hpp +++ b/include/picongpu/particles/pusher/particlePusherReducedLandauLifshitz.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -29,182 +29,207 @@ namespace picongpu { -namespace particlePusherReducedLandauLifshitz -{ -/* This pusher uses the Lorentz force and a reduced - * Landau Lifshitz term to push particles based on the - * Runge Kutta solver 4th order. It takes into account - * the energy loss due to radiation. - * - * More details on this approach can be found in - * Marija Vranic's paper: Classical Radiation Reaction - * in Particle-In-Cell Simulations - * http://arxiv.org/abs/1502.02432 - */ -template -struct Push -{ - /* this is an optional extention for sub-sampling pushes that enables grid to particle interpolation - * for particle positions outside the super cell in one push - */ - using LowerMargin = typename pmacc::math::CT::make_Int::type; - using UpperMargin = typename pmacc::math::CT::make_Int::type; - - template< - typename T_FunctorFieldE, - typename T_FunctorFieldB, - typename T_Particle, - typename T_Pos - > - HDINLINE void operator()( - const T_FunctorFieldB functorBField, /* at t=0 */ - const T_FunctorFieldE functorEField, /* at t=0 */ - T_Particle & particle, - T_Pos & pos, /* at t=0 */ - const uint32_t - ) - { - float_X const weighting = particle[ weighting_ ]; - float_X const mass = attribute::getMass( weighting, particle ); - float_X const charge = attribute::getCharge( weighting, particle ); - - using TypeBFieldFunctor = T_FunctorFieldB; - using TypeEFieldFunctor = T_FunctorFieldE; - using TypePosition = position_pic::type; - using TypeMomentum = momentum::type; - using TypeMass = float_X; - using TypeCharge = float_X; - using TypeWeighting = weighting::type; - - TypeMomentum mom = particle[ momentum_ ]; - - const float_X deltaT = DELTA_T; - const uint32_t dimMomentum = GetNComponents::value; - // the transver data type adjust to 3D3V, 2D3V, 2D2V, ... - using VariableType = pmacc::math::Vector< picongpu::float_X, simDim + dimMomentum >; - VariableType var; - - // transfer position - for(uint32_t i=0; i; - DiffEqType diffEq(functorEField, functorBField, mass, charge, weighting); - - VariableType varNew = pmacc::math::RungeKutta4()(diffEq, var, float_X(0.0), deltaT); - - // transfer position - for(uint32_t i=0; i - struct DiffEquation - { - - // alias for types to follow coding guide line - using VariableType = T_Var; - using TimeType = T_Time; - using EFieldFuncType = T_FieldEFunc; - using BFieldFuncType = T_FieldBFunc; - using PositionType = T_Pos; - using MomentumType = T_Mom; - using MassType = T_Mass; - using ChargeType = T_Charge; - using WeightingType = T_Weighting; - using VelocityType = T_Velocity; - using GammaType = T_Gamma; - - - HDINLINE DiffEquation(EFieldFuncType funcE, BFieldFuncType funcB, MassType m, ChargeType q, WeightingType w) - : fieldEFunc(funcE), fieldBFunc(funcB), mass(m), charge(q), weighting(w) - { } - - HDINLINE VariableType operator()(TimeType time, VariableType var) const + namespace particlePusherReducedLandauLifshitz { - PositionType pos; - PositionType posInterpolation; - MomentumType mom; - // transfer position - for(uint32_t i=0; i::value; - for(uint32_t i=0; i + struct Push + { + /* this is an optional extention for sub-sampling pushes that enables grid to particle interpolation + * for particle positions outside the super cell in one push + */ + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = typename pmacc::math::CT::make_Int::type; + + template + HDINLINE void operator()( + const T_FunctorFieldB functorBField, /* at t=0 */ + const T_FunctorFieldE functorEField, /* at t=0 */ + T_Particle& particle, + T_Pos& pos, /* at t=0 */ + const uint32_t) + { + float_X const weighting = particle[weighting_]; + float_X const mass = attribute::getMass(weighting, particle); + float_X const charge = attribute::getCharge(weighting, particle); + + using TypeBFieldFunctor = T_FunctorFieldB; + using TypeEFieldFunctor = T_FunctorFieldE; + using TypePosition = position_pic::type; + using TypeMomentum = momentum::type; + using TypeMass = float_X; + using TypeCharge = float_X; + using TypeWeighting = weighting::type; + + TypeMomentum mom = particle[momentum_]; + + const float_X deltaT = DELTA_T; + const uint32_t dimMomentum = GetNComponents::value; + // the transver data type adjust to 3D3V, 2D3V, 2D2V, ... + using VariableType = pmacc::math::Vector; + VariableType var; + + // transfer position + for(uint32_t i = 0; i < picongpu::simDim; ++i) + var[i] = pos[i]; + + // transfer momentum + for(uint32_t i = 0; i < dimMomentum; ++i) + var[simDim + i] = mom[i]; + + using DiffEqType = DiffEquation< + VariableType, + float_X, + TypeEFieldFunctor, + TypeBFieldFunctor, + TypePosition, + TypeMomentum, + TypeMass, + TypeCharge, + TypeWeighting, + Velocity, + Gamma>; + DiffEqType diffEq(functorEField, functorBField, mass, charge, weighting); + + VariableType varNew = pmacc::math::RungeKutta4()(diffEq, var, float_X(0.0), deltaT); + + // transfer position + for(uint32_t i = 0; i < picongpu::simDim; ++i) + pos[i] = varNew[i]; + + // transfer momentum + for(uint32_t i = 0; i < dimMomentum; ++i) + mom[i] = varNew[simDim + i]; + + particle[momentum_] = mom; + } + + template< + typename T_Var, + typename T_Time, + typename T_FieldEFunc, + typename T_FieldBFunc, + typename T_Pos, + typename T_Mom, + typename T_Mass, + typename T_Charge, + typename T_Weighting, + typename T_Velocity, + typename T_Gamma> + struct DiffEquation + { + // alias for types to follow coding guide line + using VariableType = T_Var; + using TimeType = T_Time; + using EFieldFuncType = T_FieldEFunc; + using BFieldFuncType = T_FieldBFunc; + using PositionType = T_Pos; + using MomentumType = T_Mom; + using MassType = T_Mass; + using ChargeType = T_Charge; + using WeightingType = T_Weighting; + using VelocityType = T_Velocity; + using GammaType = T_Gamma; + + + HDINLINE DiffEquation( + EFieldFuncType funcE, + BFieldFuncType funcB, + MassType m, + ChargeType q, + WeightingType w) + : fieldEFunc(funcE) + , fieldBFunc(funcB) + , mass(m) + , charge(q) + , weighting(w) + { + } + + HDINLINE VariableType operator()(TimeType time, VariableType var) const + { + PositionType pos; + PositionType posInterpolation; + MomentumType mom; + // transfer position + for(uint32_t i = 0; i < picongpu::simDim; ++i) + { + posInterpolation[i] = var[i]; + pos[i] = var[i] * cellSize[i]; + } + + auto fieldE = fieldEFunc( + posInterpolation, + picongpu::particles::interpolationMemoryPolicy::ShiftToValidRange()); + auto fieldB = fieldBFunc( + posInterpolation, + picongpu::particles::interpolationMemoryPolicy::ShiftToValidRange()); + + // transfer momentum + const uint32_t dimMomentum = GetNComponents::value; + for(uint32_t i = 0; i < dimMomentum; ++i) + mom[i] = var[simDim + i]; + + VelocityType velocityCalc; + GammaType gammaCalc; + const float_X c = SPEED_OF_LIGHT; + const float3_X velocity = velocityCalc(mom, mass); + const float_X gamma = gammaCalc(mom, mass); + const float_X conversionMomentum2Beta = 1.0 / (gamma * mass * c); + + const float_X c2 = c * c; + const float_X charge2 = charge * charge; + const float3_X beta = velocity / c; + + const float_X prefactorRR = 2. / 3. * charge2 * charge2 / (4. * PI * EPS0 * mass * mass * c2 * c2); + const float3_X lorentz = fieldE + conversionMomentum2Beta * c * pmacc::math::cross(mom, fieldB); + const float_X fieldETimesBeta = pmacc::math::dot(fieldE, mom) * conversionMomentum2Beta; + const float3_X radReactionVec = c + * (pmacc::math::cross(fieldE, fieldB) + + c * conversionMomentum2Beta + * pmacc::math::cross(fieldB, pmacc::math::cross(fieldB, mom))) + + conversionMomentum2Beta * fieldE * pmacc::math::dot(mom, fieldE) + - gamma * gamma * conversionMomentum2Beta + * (mom * (pmacc::math::dot(lorentz, lorentz) - fieldETimesBeta * fieldETimesBeta)); + + const float3_X diffMom = charge * lorentz + (prefactorRR / weighting) * radReactionVec; + const float3_X diffPos = velocity; + + VariableType returnVar; + for(uint32_t i = 0; i < picongpu::simDim; ++i) + returnVar[i] = diffPos[i] / cellSize[i]; + + for(uint32_t i = 0; i < dimMomentum; ++i) + returnVar[simDim + i] = diffMom[i]; + + return returnVar; + } + + + private: + EFieldFuncType fieldEFunc; /* functor E field interpolation */ + BFieldFuncType fieldBFunc; /* functor B field interpolation */ + MassType mass; /* mass of the macro particle */ + ChargeType charge; /* charge of the macro particle */ + WeightingType weighting; /* weighting of the macro particle */ + }; + + static pmacc::traits::StringProperty getStringProperties() + { + pmacc::traits::StringProperty propList("name", "other"); + propList["param"] = "reduced Landau-Lifshitz pusher via RK4 and " + "classical radiation reaction, Marija Vranic (2015)"; + return propList; + } + }; + } // namespace particlePusherReducedLandauLifshitz +} // namespace picongpu diff --git a/include/picongpu/particles/pusher/particlePusherVay.hpp b/include/picongpu/particles/pusher/particlePusherVay.hpp index e5c22f90bd..b262a2f844 100644 --- a/include/picongpu/particles/pusher/particlePusherVay.hpp +++ b/include/picongpu/particles/pusher/particlePusherVay.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -26,90 +26,87 @@ namespace picongpu { -namespace particlePusherVay -{ + namespace particlePusherVay + { + template + struct Push + { + /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation + * for particle positions outside the super cell in one push + */ + using LowerMargin = typename pmacc::math::CT::make_Int::type; + using UpperMargin = typename pmacc::math::CT::make_Int::type; -template -struct Push -{ - /* this is an optional extension for sub-sampling pushes that enables grid to particle interpolation - * for particle positions outside the super cell in one push - */ - using LowerMargin = typename pmacc::math::CT::make_Int::type; - using UpperMargin = typename pmacc::math::CT::make_Int::type; + template + HDINLINE void operator()( + const T_FunctorFieldB functorBField, /* at t=0 */ + const T_FunctorFieldE functorEField, /* at t=0 */ + T_Particle& particle, + T_Pos& pos, /* at t=0 */ + const uint32_t) + { + float_X const weighting = particle[weighting_]; + float_X const mass = attribute::getMass(weighting, particle); + float_X const charge = attribute::getCharge(weighting, particle); - template< - typename T_FunctorFieldE, - typename T_FunctorFieldB, - typename T_Particle, - typename T_Pos - > - HDINLINE void operator()( - const T_FunctorFieldB functorBField, /* at t=0 */ - const T_FunctorFieldE functorEField, /* at t=0 */ - T_Particle & particle, - T_Pos & pos, /* at t=0 */ - const uint32_t - ) - { - float_X const weighting = particle[ weighting_ ]; - float_X const mass = attribute::getMass( weighting, particle ); - float_X const charge = attribute::getCharge( weighting, particle ); + using MomType = momentum::type; + MomType const mom = particle[momentum_]; - using MomType = momentum::type; - MomType const mom = particle[ momentum_ ]; + auto bField = functorBField(pos); + auto eField = functorEField(pos); + /* + time index in paper is reduced by a half: i=0 --> i=-1/2 so that momenta are + at half time steps and fields and locations are at full time steps - auto bField = functorBField(pos); - auto eField = functorEField(pos); - /* - time index in paper is reduced by a half: i=0 --> i=-1/2 so that momenta are - at half time steps and fields and locations are at full time steps + Here the real (PIConGPU) momentum (p) is used, not the momentum from the Vay paper (u) + p = m_0 * u + */ + const float_X deltaT = DELTA_T; + const float_X factor = 0.5 * charge * deltaT; + Gamma gamma; + Velocity velocity; - Here the real (PIConGPU) momentum (p) is used, not the momentum from the Vay paper (u) - p = m_0 * u - */ - const float_X deltaT = DELTA_T; - const float_X factor = 0.5 * charge * deltaT; - Gamma gamma; - Velocity velocity; + // first step in Vay paper: + const float3_X velocity_atMinusHalf = velocity(mom, mass); + // mom /(mass*mass + abs2(mom)/(SPEED_OF_LIGHT*SPEED_OF_LIGHT)); + const MomType momentum_atZero + = mom + factor * (eField + pmacc::math::cross(velocity_atMinusHalf, bField)); - // first step in Vay paper: - const float3_X velocity_atMinusHalf = velocity(mom, mass); - //mom /(mass*mass + abs2(mom)/(SPEED_OF_LIGHT*SPEED_OF_LIGHT)); - const MomType momentum_atZero = mom + factor * (eField + math::cross(velocity_atMinusHalf, bField)); + // second step in Vay paper: + const MomType momentum_prime = momentum_atZero + factor * eField; + const float_X gamma_prime = gamma(momentum_prime, mass); - // second step in Vay paper: - const MomType momentum_prime = momentum_atZero + factor * eField; - const float_X gamma_prime = gamma(momentum_prime, mass); - //algorithms::math::sqrt(1.0 + abs2(momentum_prime*(1.0/(mass * SPEED_OF_LIGHT)))); - const sqrt_Vay::float3_X tau(factor / mass * bField); - const sqrt_Vay::float_X u_star = math::dot( precisionCast(momentum_prime), tau ) / precisionCast( SPEED_OF_LIGHT * mass ); - const sqrt_Vay::float_X sigma = gamma_prime * gamma_prime - math::abs2( tau ); - const sqrt_Vay::float_X gamma_atPlusHalf = math::sqrt( sqrt_Vay::float_X(0.5) * - ( sigma + - math::sqrt( sigma * sigma + - sqrt_Vay::float_X(4.0) * ( math::abs2( tau ) + u_star * u_star ) ) - ) - ); - const float3_X t(tau * (float_X(1.0) / gamma_atPlusHalf)); - const float_X s = float_X(1.0) / (float_X(1.0) + math::abs2(t)); - const MomType momentum_atPlusHalf = s * (momentum_prime + math::dot(momentum_prime, t) * t + math::cross(momentum_prime, t)); + const sqrt_Vay::float3_X tau(factor / mass * bField); + const sqrt_Vay::float_X u_star + = pmacc::math::dot(precisionCast(momentum_prime), tau) + / precisionCast(SPEED_OF_LIGHT * mass); + const sqrt_Vay::float_X sigma = gamma_prime * gamma_prime - pmacc::math::abs2(tau); + const sqrt_Vay::float_X gamma_atPlusHalf = math::sqrt( + sqrt_Vay::float_X(0.5) + * (sigma + + math::sqrt( + sigma * sigma + sqrt_Vay::float_X(4.0) * (pmacc::math::abs2(tau) + u_star * u_star)))); + const float3_X t(tau * (float_X(1.0) / gamma_atPlusHalf)); + const float_X s = float_X(1.0) / (float_X(1.0) + pmacc::math::abs2(t)); + const MomType momentum_atPlusHalf = s + * (momentum_prime + pmacc::math::dot(momentum_prime, t) * t + + pmacc::math::cross(momentum_prime, t)); - particle[ momentum_ ] = momentum_atPlusHalf; + particle[momentum_] = momentum_atPlusHalf; - const float3_X vel = velocity(momentum_atPlusHalf, mass); + const float3_X vel = velocity(momentum_atPlusHalf, mass); - for(uint32_t d=0;d -struct CIC -{ - /** - * width of the support of this form_factor. This is the area where the function - * is non-zero. - */ - static constexpr int support = 2; -}; -}//namespace shared_CIC - -struct CIC : public shared_CIC::CIC +namespace picongpu { - using CloudShape = picongpu::particles::shapes::NGP; - - struct ChargeAssignment : public shared_CIC::CIC + namespace particles { - - HDINLINE float_X operator()( float_X const x ) + namespace shapes { - /* - - * | 1-|x| if |x|<1 - * W(x)=<| - * | 0 otherwise - * - + namespace detail + { + struct CIC + { + /** Support of the assignment function in cells + * + * Specifies width of the area where the function can be non-zero. + * Is the same for all directions + */ + static constexpr uint32_t support = 2; + }; + + } // namespace detail + + /** Cloud-in-cell particle shape + * + * Cloud density form: piecewise constant + * Assignment function: first order B-spline */ - float_X const abs_x = algorithms::math::abs( x ); - - bool const below_1 = abs_x < 1.0_X; - float_X const onSupport = 1.0_X - abs_x; - - float_X result( 0.0 ); - if( below_1 ) - result = onSupport; - - return result; - } - }; - - struct ChargeAssignmentOnSupport : public shared_CIC::CIC - { - - /** form factor of this particle shape. - * \param x has to be within [-support/2, support/2] - */ - HDINLINE float_X operator()( float_X const x ) - { - /* - * W(x)=1-|x| - */ - return 1.0_X - algorithms::math::abs( x ); - } - - }; - -}; - -} // namespace shapes -} // namespace particles + struct CIC + { + //! Order of the assignment function spline + static constexpr uint32_t assignmentFunctionOrder = detail::CIC::support - 1u; + + struct ChargeAssignment : public detail::CIC + { + HDINLINE float_X operator()(float_X const x) + { + /* - + * | 1-|x| if |x|<1 + * W(x)=<| + * | 0 otherwise + * - + */ + float_X const abs_x = math::abs(x); + + bool const below_1 = abs_x < 1.0_X; + float_X const onSupport = 1.0_X - abs_x; + + float_X result(0.0); + if(below_1) + result = onSupport; + + return result; + } + }; + + struct ChargeAssignmentOnSupport : public detail::CIC + { + /** form factor of this particle shape. + * \param x has to be within [-support/2, support/2] + */ + HDINLINE float_X operator()(float_X const x) + { + /* + * W(x)=1-|x| + */ + return 1.0_X - math::abs(x); + } + }; + }; + + } // namespace shapes + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/shapes/Counter.hpp b/include/picongpu/particles/shapes/Counter.hpp index 8b0f7a5208..cf20fc19e3 100644 --- a/include/picongpu/particles/shapes/Counter.hpp +++ b/include/picongpu/particles/shapes/Counter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -17,70 +17,83 @@ * If not, see . */ - #pragma once #include "picongpu/simulation_defines.hpp" -namespace picongpu -{ -namespace particles -{ -namespace shapes -{ +#include - namespace shared_Counter - { - struct Counter - { - /** - * width of the support of this form_factor. This is the area where the function - * is non-zero. - */ - static constexpr int support = 0; - }; - - } // namespace shared_Counter - - struct Counter : public shared_Counter::Counter +namespace picongpu +{ + namespace particles { - - struct ChargeAssignment : public shared_Counter::Counter + namespace shapes { - - HDINLINE float_X operator()( float_X const x ) + namespace detail { - /* - - * | -1 if -1. */ - #pragma once #include "picongpu/simulation_defines.hpp" -namespace picongpu -{ -namespace particles -{ -namespace shapes -{ +#include - namespace shared_NGP - { - struct NGP - { - /** - * width of the support of this form_factor. This is the area where the function - * is non-zero. - */ - static constexpr int support = 1; - }; - - } // namespace shared_NGP - - struct NGP : public shared_NGP::NGP +namespace picongpu +{ + namespace particles { - - struct ChargeAssignment : public shared_NGP::NGP + namespace shapes { - - HDINLINE float_X operator()( float_X const x ) + namespace detail { - /* - - * | 1 if -1/2<=x<1/2 - * W(x)=<| - * | 0 otherwise - * - - */ - - bool const below_half = -0.5_X <= x && x < 0.5_X; - - return float_X( below_half ); - } - }; - - struct ChargeAssignmentOnSupport : public shared_NGP::NGP - { - - /** form factor of this particle shape. - * \param x has to be within [-support/2, support/2) + struct NGP + { + /** Support of the assignment function in cells + * + * Specifies width of the area where the function can be non-zero. + * Is the same for all directions + */ + static constexpr uint32_t support = 1; + }; + + } // namespace detail + + /** Nearest grid point particle shape + * + * Cloud density form: delta function + * Assignment function: zero order B-spline */ - HDINLINE float_X operator()( float_X const ) + struct NGP { - /* - * W(x)=1 - */ - return 1.0_X; - } - - }; - - }; - -} // namespace shapes -} // namespace particles + //! Order of the assignment function spline + static constexpr uint32_t assignmentFunctionOrder = detail::NGP::support - 1u; + + struct ChargeAssignment : public detail::NGP + { + HDINLINE float_X operator()(float_X const x) + { + /* - + * | 1 if -1/2<=x<1/2 + * W(x)=<| + * | 0 otherwise + * - + */ + + bool const below_half = -0.5_X <= x && x < 0.5_X; + + return float_X(below_half); + } + }; + + struct ChargeAssignmentOnSupport : public detail::NGP + { + /** form factor of this particle shape. + * \param x has to be within [-support/2, support/2) + */ + HDINLINE float_X operator()(float_X const) + { + /* + * W(x)=1 + */ + return 1.0_X; + } + }; + }; + + } // namespace shapes + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/shapes/P4S.hpp b/include/picongpu/particles/shapes/P4S.hpp deleted file mode 100644 index 772d301a1b..0000000000 --- a/include/picongpu/particles/shapes/P4S.hpp +++ /dev/null @@ -1,155 +0,0 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/simulation_defines.hpp" - -namespace picongpu -{ -namespace particles -{ -namespace shapes -{ - -namespace shared_P4S -{ - -struct P4S -{ - static constexpr int support = 5; - - HDINLINE static float_X ff_1st_radius( float_X const x ) - { - /* - * W(x)= 115/192 - 5/8 * x^2 + 1/4 * x^4 - * = 115/192 + x^2 * (-5/8 + 1/4 * x^2) - */ - float_X const square_x = x * x; - return 115._X / 192._X + square_x * ( - -5._X / 8._X + - 1.0_X / 4.0_X * square_x - ); - } - - HDINLINE static float_X ff_2nd_radius( float_X const x ) - { - /* - * W(x)= 1/96 * (55 + 20 * x - 120 * x^2 + 80 * x^3 - 16 * x^4) - * = 1/96 * (55 + 4 * x * (5 - 2 * x * (15 + 2 * x * (-5 + x)))) - */ - return 1._X / 96._X * ( - 55._X + 4._X * x * ( - 5._X - 2._X * x * ( - 15._X + 2._X * x * ( - -5._X + x - ) - ) - ) - ); - } - - HDINLINE static float_X ff_3rd_radius( float_X const x ) - { - /* - * W(x)=1/384 * (5 - 2*x)^4 - */ - float_X const tmp = 5._X - 2._X * x; - float_X const square_tmp = tmp * tmp; - float_X const biquadratic_tmp = square_tmp * square_tmp; - - return 1._X / 384._X * biquadratic_tmp; - } -}; - -} //namespace shared_P4S - -/** particle assignment shape `piecewise biquadratic spline` - */ -struct P4S : public shared_P4S::P4S -{ - using CloudShape = picongpu::particles::shapes::PCS; - - struct ChargeAssignmentOnSupport : public shared_P4S::P4S - { - - HDINLINE float_X operator()( float_X const x ) - { - /* - - * | 115/192 + x^2 * (-5/8 + 1/4 * x^2) if -1/2 < x < 1/2 - * W(x)=<| - * | 1/96 * (55 + 4 * x * (5 - 2 * x * (15 + 2 * x * (-5 + x)))) if 1/2 <= |x| < 3/2 - * | - * | 1/384 * (5 - 2 * x)^4 if 3/2 <= |x| < 5/2 - * - - */ - float_X const abs_x = algorithms::math::abs( x ); - - bool const below_2nd_radius = abs_x < 1.5_X; - bool const below_1st_radius = abs_x < 0.5_X; - - float_X const rad1 = ff_1st_radius( abs_x ); - float_X const rad2 = ff_2nd_radius( abs_x ); - float_X const rad3 = ff_3rd_radius( abs_x ); - - float_X result = rad3; - if( below_1st_radius ) - result = rad1; - else if( below_2nd_radius ) - result = rad2; - - return result; - } - - }; - - struct ChargeAssignment : public shared_P4S::P4S - { - - HDINLINE float_X operator()( float_X const x ) - { - - /* - - * | 115/192 + x^2 * (-5/8 + 1/4 * x^2) if -1/2 < x < 1/2 - * W(x)=<| - * | 1/96 * (55 + 4 * x * (5 - 2 * x * (15 + 2 * x * (-5 + x)))) if 1/2 <= |x| < 3/2 - * | - * | 1/384 * (5 - 2*x)^4 if 3/2 <= |x| < 5/2 - * | - * | 0 otherwise - * - - */ - float_X const abs_x = algorithms::math::abs( x ); - - bool const below_max = abs_x < 2.5_X; - - float_X const onSupport = ChargeAssignmentOnSupport()( abs_x ); - - float_X result( 0.0 ); - if( below_max ) - result = onSupport; - - return result; - } - }; -}; - -} // namespace shapes -} //namespace particles -} //namespace picongpu diff --git a/include/picongpu/particles/shapes/PCS.hpp b/include/picongpu/particles/shapes/PCS.hpp index d0dec31b94..0450c4eac0 100644 --- a/include/picongpu/particles/shapes/PCS.hpp +++ b/include/picongpu/particles/shapes/PCS.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl, Sergei Bastrakov, Klaus Steiniger * * This file is part of PIConGPU. * @@ -21,110 +21,131 @@ #include "picongpu/simulation_defines.hpp" -namespace picongpu -{ -namespace particles -{ -namespace shapes -{ - -namespace shared_PCS -{ -struct PCS -{ - static constexpr int support = 4; - +#include - HDINLINE static float_X ff_1st_radius( float_X const x ) - { - /* - * W(x)=1/6*(4 - 6*x^2 + 3*|x|^3) - */ - float_X const square_x = x * x; - float_X const triple_x = square_x * x; - return 1.0_X / 6.0_X * ( 4.0_X - 6.0_X * square_x + 3.0_X * triple_x ); - } - - HDINLINE static float_X ff_2nd_radius( float_X const x ) - { - /* - * W(x)=1/6*(2 - |x|)^3 - */ - float_X const tmp = 2.0_X - x; - float_X const triple_tmp = tmp * tmp * tmp; - return 1.0_X / 6.0_X * triple_tmp; - } -}; - -} //namespace shared_PCS -struct PCS : public shared_PCS::PCS +namespace picongpu { - using CloudShape = picongpu::particles::shapes::TSC; - - struct ChargeAssignment : public shared_PCS::PCS - { - - HDINLINE float_X operator()( float_X const x ) - { - /* - - * | 1/6*(4 - 6*x^2 + 3*|x|^3) if 0<=|x|<1 - * W(x)=<| 1/6*(2 - |x|)^3 if 1<=|x|<2 - * | 0 otherwise - * - - */ - float_X const abs_x = algorithms::math::abs( x ); - - bool const below_1 = abs_x < 1.0_X; - bool const below_2 = abs_x < 2.0_X; - - float_X const rad1 = ff_1st_radius( abs_x ); - float_X const rad2 = ff_2nd_radius( abs_x ); - - float_X result( 0.0 ); - if( below_1 ) - result = rad1; - else if( below_2 ) - result = rad2; - - return result; - } - }; - - struct ChargeAssignmentOnSupport : public shared_PCS::PCS + namespace particles { - - HDINLINE float_X operator()( float_X const x ) + namespace shapes { - /* - - * | 1/6*(4 - 6*x^2 + 3*|x|^3) if 0<=|x|<1 - * W(x)=<| - * | 1/6*(2 - |x|)^3 if 1<=|x|<2 - * - + namespace detail + { + struct PCS + { + /** Support of the assignment function in cells + * + * Specifies width of the area where the function can be non-zero. + * Is the same for all directions + */ + static constexpr uint32_t support = 5; + + HDINLINE static float_X ff_1st_radius(float_X const x) + { + /* + * W(x)= 115/192 - 5/8 * x^2 + 1/4 * x^4 + * = 115/192 + x^2 * (-5/8 + 1/4 * x^2) + */ + float_X const square_x = x * x; + return 115._X / 192._X + square_x * (-5._X / 8._X + 1.0_X / 4.0_X * square_x); + } + + HDINLINE static float_X ff_2nd_radius(float_X const x) + { + /* + * W(x)= 1/96 * (55 + 20 * x - 120 * x^2 + 80 * x^3 - 16 * x^4) + * = 1/96 * (55 + 4 * x * (5 - 2 * x * (15 + 2 * x * (-5 + x)))) + */ + return 1._X / 96._X + * (55._X + 4._X * x * (5._X - 2._X * x * (15._X + 2._X * x * (-5._X + x)))); + } + + HDINLINE static float_X ff_3rd_radius(float_X const x) + { + /* + * W(x)=1/384 * (5 - 2*x)^4 + */ + float_X const tmp = 5._X - 2._X * x; + float_X const square_tmp = tmp * tmp; + float_X const biquadratic_tmp = square_tmp * square_tmp; + + return 1._X / 384._X * biquadratic_tmp; + } + }; + + } // namespace detail + + /** Piecewise cubic cloud particle shape + * + * Cloud density form: piecewise cubic B-Spline + * Assignment function: piecewise quartic B-spline */ - float_X const abs_x = algorithms::math::abs( x ); - - bool const below_1 = abs_x < 1.0_X; - float_X const rad1 = ff_1st_radius( abs_x ); - float_X const rad2 = ff_2nd_radius( abs_x ); - - float_X result = rad2; - if( below_1 ) - result = rad1; - - return result; - - /* Semantics: - if( abs_x < 1.0_X ) - return ff_1st_radius( abs_x ); - return ff_2nd_radius( abs_x ); - */ - } - - }; - -}; - -} // namespace shapes -} // namespace particles + struct PCS + { + //! Order of the assignment function spline + static constexpr uint32_t assignmentFunctionOrder = detail::PCS::support - 1u; + + struct ChargeAssignmentOnSupport : public detail::PCS + { + HDINLINE float_X operator()(float_X const x) + { + /* - + * | 115/192 + x^2 * (-5/8 + 1/4 * x^2) if -1/2 < x < 1/2 + * W(x)=<| + * | 1/96 * (55 + 4 * x * (5 - 2 * x * (15 + 2 * x * (-5 + x)))) if 1/2 <= |x| < 3/2 + * | + * | 1/384 * (5 - 2 * x)^4 if 3/2 <= |x| < 5/2 + * - + */ + float_X const abs_x = math::abs(x); + + bool const below_2nd_radius = abs_x < 1.5_X; + bool const below_1st_radius = abs_x < 0.5_X; + + float_X const rad1 = ff_1st_radius(abs_x); + float_X const rad2 = ff_2nd_radius(abs_x); + float_X const rad3 = ff_3rd_radius(abs_x); + + float_X result = rad3; + if(below_1st_radius) + result = rad1; + else if(below_2nd_radius) + result = rad2; + + return result; + } + }; + + struct ChargeAssignment : public detail::PCS + { + HDINLINE float_X operator()(float_X const x) + { + /* - + * | 115/192 + x^2 * (-5/8 + 1/4 * x^2) if -1/2 < x < 1/2 + * W(x)=<| + * | 1/96 * (55 + 4 * x * (5 - 2 * x * (15 + 2 * x * (-5 + x)))) if 1/2 <= |x| < 3/2 + * | + * | 1/384 * (5 - 2*x)^4 if 3/2 <= |x| < 5/2 + * | + * | 0 otherwise + * - + */ + float_X const abs_x = math::abs(x); + + bool const below_max = abs_x < 2.5_X; + + float_X const onSupport = ChargeAssignmentOnSupport()(abs_x); + + float_X result(0.0); + if(below_max) + result = onSupport; + + return result; + } + }; + }; + + } // namespace shapes + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/shapes/PQS.hpp b/include/picongpu/particles/shapes/PQS.hpp new file mode 100644 index 0000000000..8780e95900 --- /dev/null +++ b/include/picongpu/particles/shapes/PQS.hpp @@ -0,0 +1,138 @@ +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov, Klaus Steiniger + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" + +#include + + +namespace picongpu +{ + namespace particles + { + namespace shapes + { + namespace detail + { + struct PQS + { + /** Support of the assignment function in cells + * + * Specifies width of the area where the function can be non-zero. + * Is the same for all directions + */ + static constexpr uint32_t support = 4; + + HDINLINE static float_X ff_1st_radius(float_X const x) + { + /* + * W(x)=1/6*(4 - 6*x^2 + 3*|x|^3) + */ + float_X const square_x = x * x; + float_X const triple_x = square_x * x; + return 1.0_X / 6.0_X * (4.0_X - 6.0_X * square_x + 3.0_X * triple_x); + } + + HDINLINE static float_X ff_2nd_radius(float_X const x) + { + /* + * W(x)=1/6*(2 - |x|)^3 + */ + float_X const tmp = 2.0_X - x; + float_X const triple_tmp = tmp * tmp * tmp; + return 1.0_X / 6.0_X * triple_tmp; + } + }; + + } // namespace detail + + /** Piecewise quadratic cloud particle shape + * + * Cloud density form: piecewise quadratic B-spline + * Assignment function: piecewise cubic B-spline + */ + struct PQS + { + //! Order of the assignment function spline + static constexpr uint32_t assignmentFunctionOrder = detail::PQS::support - 1u; + + struct ChargeAssignment : public detail::PQS + { + HDINLINE float_X operator()(float_X const x) + { + /* - + * | 1/6*(4 - 6*x^2 + 3*|x|^3) if 0<=|x|<1 + * W(x)=<| 1/6*(2 - |x|)^3 if 1<=|x|<2 + * | 0 otherwise + * - + */ + float_X const abs_x = math::abs(x); + + bool const below_1 = abs_x < 1.0_X; + bool const below_2 = abs_x < 2.0_X; + + float_X const rad1 = ff_1st_radius(abs_x); + float_X const rad2 = ff_2nd_radius(abs_x); + + float_X result(0.0); + if(below_1) + result = rad1; + else if(below_2) + result = rad2; + + return result; + } + }; + + struct ChargeAssignmentOnSupport : public detail::PQS + { + HDINLINE float_X operator()(float_X const x) + { + /* - + * | 1/6*(4 - 6*x^2 + 3*|x|^3) if 0<=|x|<1 + * W(x)=<| + * | 1/6*(2 - |x|)^3 if 1<=|x|<2 + * - + */ + float_X const abs_x = math::abs(x); + + bool const below_1 = abs_x < 1.0_X; + float_X const rad1 = ff_1st_radius(abs_x); + float_X const rad2 = ff_2nd_radius(abs_x); + + float_X result = rad2; + if(below_1) + result = rad1; + + return result; + + /* Semantics: + if( abs_x < 1.0_X ) + return ff_1st_radius( abs_x ); + return ff_2nd_radius( abs_x ); + */ + } + }; + }; + + } // namespace shapes + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/particles/shapes/TSC.hpp b/include/picongpu/particles/shapes/TSC.hpp index cfd6e1d83b..3067713744 100644 --- a/include/picongpu/particles/shapes/TSC.hpp +++ b/include/picongpu/particles/shapes/TSC.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -22,113 +22,115 @@ #include "picongpu/simulation_defines.hpp" -namespace picongpu -{ -namespace particles -{ -namespace shapes -{ - -namespace shared_TSC -{ - -struct TSC -{ - /** - * width of the support of this form_factor. This is the area where the function - * is non-zero. - */ - static constexpr int support = 3; +#include - HDINLINE static float_X ff_1st_radius( float_X const x ) - { - /* - * W(x)=3/4 - x^2 - */ - float_X const square_x = x * x; - return 0.75_X - square_x; - } - - HDINLINE static float_X ff_2nd_radius( float_X const x ) - { - /* - * W(x)=1/2*(3/2 - |x|)^2 - */ - float_X const tmp = 3.0_X / 2.0_X - x; - float_X const square_tmp = tmp * tmp; - return 0.5_X * square_tmp; - } -}; - -} //namespace shared_TSC - -struct TSC : public shared_TSC::TSC +namespace picongpu { - using CloudShape = picongpu::particles::shapes::CIC; - - struct ChargeAssignment : public shared_TSC::TSC - { - - HDINLINE float_X operator()( float_X const x ) - { - /* - - * | 3/4 - x^2 if |x|<1/2 - * W(x)=<| 1/2*(3/2 - |x|)^2 if 1/2<=|x|<3/2 - * | 0 otherwise - * - - */ - float_X const abs_x = algorithms::math::abs( x ); - - bool const below_05 = abs_x < 0.5_X; - bool const below_1_5 = abs_x < 1.5_X; - - float_X const rad1 = ff_1st_radius( abs_x ); - float_X const rad2 = ff_2nd_radius( abs_x ); - - float_X result( 0.0 ); - if( below_05 ) - result = rad1; - else if( below_1_5 ) - result = rad2; - - return result; - - } - }; - - struct ChargeAssignmentOnSupport : public shared_TSC::TSC + namespace particles { - - /** form factor of this particle shape. - * \param x has to be within [-support/2, support/2] - */ - HDINLINE float_X operator()( float_X const x ) + namespace shapes { - /* - - * | 3/4 - x^2 if |x|<1/2 - * W(x)=<| - * | 1/2*(3/2 - |x|)^2 if 1/2<=|x|<3/2 - * - + namespace detail + { + struct TSC + { + /** Support of the assignment function in cells + * + * Specifies width of the area where the function can be non-zero. + * Is the same for all directions + */ + static constexpr uint32_t support = 3; + + HDINLINE static float_X ff_1st_radius(float_X const x) + { + /* + * W(x)=3/4 - x^2 + */ + float_X const square_x = x * x; + return 0.75_X - square_x; + } + + HDINLINE static float_X ff_2nd_radius(float_X const x) + { + /* + * W(x)=1/2*(3/2 - |x|)^2 + */ + float_X const tmp = 3.0_X / 2.0_X - x; + float_X const square_tmp = tmp * tmp; + return 0.5_X * square_tmp; + } + }; + + } // namespace detail + + /** Triagle-shaped cloud particle shape + * + * Cloud density form: piecewise linear + * Assignment function: second order B-spline */ - float_X const abs_x = algorithms::math::abs( x ); - - bool const below_05 = abs_x < 0.5_X; - - float_X const rad1 = ff_1st_radius( abs_x ); - float_X const rad2 = ff_2nd_radius( abs_x ); - - float_X result = rad2; - if( below_05 ) - result = rad1; - - return result; - } - - }; - -}; - -} // namespace shapes -} // namespace partciles + struct TSC + { + //! Order of the assignment function spline + static constexpr uint32_t assignmentFunctionOrder = detail::TSC::support - 1u; + + struct ChargeAssignment : public detail::TSC + { + HDINLINE float_X operator()(float_X const x) + { + /* - + * | 3/4 - x^2 if |x|<1/2 + * W(x)=<| 1/2*(3/2 - |x|)^2 if 1/2<=|x|<3/2 + * | 0 otherwise + * - + */ + float_X const abs_x = math::abs(x); + + bool const below_05 = abs_x < 0.5_X; + bool const below_1_5 = abs_x < 1.5_X; + + float_X const rad1 = ff_1st_radius(abs_x); + float_X const rad2 = ff_2nd_radius(abs_x); + + float_X result(0.0); + if(below_05) + result = rad1; + else if(below_1_5) + result = rad2; + + return result; + } + }; + + struct ChargeAssignmentOnSupport : public detail::TSC + { + /** form factor of this particle shape. + * \param x has to be within [-support/2, support/2] + */ + HDINLINE float_X operator()(float_X const x) + { + /* - + * | 3/4 - x^2 if |x|<1/2 + * W(x)=<| + * | 1/2*(3/2 - |x|)^2 if 1/2<=|x|<3/2 + * - + */ + float_X const abs_x = math::abs(x); + + bool const below_05 = abs_x < 0.5_X; + + float_X const rad1 = ff_1st_radius(abs_x); + float_X const rad2 = ff_2nd_radius(abs_x); + + float_X result = rad2; + if(below_05) + result = rad1; + + return result; + } + }; + }; + + } // namespace shapes + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/startPosition/OnePositionImpl.def b/include/picongpu/particles/startPosition/OnePositionImpl.def index 5f692dc62c..18bba3f4e3 100644 --- a/include/picongpu/particles/startPosition/OnePositionImpl.def +++ b/include/picongpu/particles/startPosition/OnePositionImpl.def @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Axel Huebl, Rene Widera +/* Copyright 2016-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -27,34 +27,33 @@ namespace picongpu { -namespace particles -{ -namespace startPosition -{ -namespace acc -{ - - /** set the particle attribute position - * - * This functor also sets the macro particle weighting. - */ - template< typename T_ParamClass > - struct OnePositionImpl; - -} // namespace acc - - - /** Set the in cell position - * - * All macro particles are set to the same in cell position defined in - * T_ParamClass. - * - * @tparam T_ParamClass Parameter class with off `InCellOffset` defined as - * CONST_VECTOR of 3 float_X [0.0, 1.0). - */ - template< typename T_ParamClass > - using OnePositionImpl = generic::Free< acc::OnePositionImpl< T_ParamClass > >; - -} // namespace startPosition -} // namespace particles + namespace particles + { + namespace startPosition + { + namespace acc + { + /** set the particle attribute position + * + * This functor also sets the macro particle weighting. + */ + template + struct OnePositionImpl; + + } // namespace acc + + + /** Set the in cell position + * + * All macro particles are set to the same in cell position defined in + * T_ParamClass. + * + * @tparam T_ParamClass Parameter class with off `InCellOffset` defined as + * CONST_VECTOR of 3 float_X [0.0, 1.0). + */ + template + using OnePositionImpl = generic::Free>; + + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/startPosition/OnePositionImpl.hpp b/include/picongpu/particles/startPosition/OnePositionImpl.hpp index c8e2f715d9..30173cbb2a 100644 --- a/include/picongpu/particles/startPosition/OnePositionImpl.hpp +++ b/include/picongpu/particles/startPosition/OnePositionImpl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -30,106 +30,81 @@ namespace picongpu { -namespace particles -{ -namespace startPosition -{ -namespace acc -{ -namespace detail -{ - template< bool T_hasWeighting > - struct SetWeighting - { - template< typename T_Particle > - HDINLINE void - operator() - ( - T_Particle & particle, - float_X const weighting - ) - { - particle[ weighting_ ] = weighting; - } - }; - - template<> - struct SetWeighting< false > - { - template< typename T_Particle > - HDINLINE void - operator() - ( - T_Particle &, - float_X const - ) - { - } - }; - -} // namespace detail - - template< typename T_ParamClass > - struct OnePositionImpl + namespace particles { - /** set in-cell position and weighting - * - * @tparam T_Particle pmacc::Particle, particle type - * @tparam T_Args pmacc::Particle, arbitrary number of particles types - * - * @param particle particle to be manipulated - * @param ... unused particles - */ - template< - typename T_Particle, - typename ... T_Args - > - HDINLINE void operator()( - T_Particle & particle, - T_Args && ... - ) - { - particle[ position_ ] = T_ParamClass{}.inCellOffset.template shrink< simDim >( ); - - // set the weighting attribute if the particle species has it - bool const hasWeighting = pmacc::traits::HasIdentifier< - typename T_Particle::FrameType, - weighting - >::type::value; - detail::SetWeighting< hasWeighting > setWeighting; - setWeighting( - particle, - m_weighting - ); - } - - template< typename T_Particle > - HDINLINE uint32_t - numberOfMacroParticles( float_X const realParticlesPerCell ) + namespace startPosition { - bool const hasWeighting = pmacc::traits::HasIdentifier< - typename T_Particle::FrameType, - weighting - >::type::value; - - // note: m_weighting member might stay uninitialized! - uint32_t result( T_ParamClass::numParticlesPerCell ); - - if( hasWeighting ) - result = startPosition::detail::WeightMacroParticles{}( - realParticlesPerCell, - T_ParamClass::numParticlesPerCell, - m_weighting - ); - - return result; - } - - private: - float_X m_weighting; - }; - -} // namespace acc -} // namespace startPosition -} // namespace particles + namespace acc + { + namespace detail + { + template + struct SetWeighting + { + template + HDINLINE void operator()(T_Particle& particle, float_X const weighting) + { + particle[weighting_] = weighting; + } + }; + + template<> + struct SetWeighting + { + template + HDINLINE void operator()(T_Particle&, float_X const) + { + } + }; + + } // namespace detail + + template + struct OnePositionImpl + { + /** set in-cell position and weighting + * + * @tparam T_Particle pmacc::Particle, particle type + * @tparam T_Args pmacc::Particle, arbitrary number of particles types + * + * @param particle particle to be manipulated + * @param ... unused particles + */ + template + HDINLINE void operator()(T_Particle& particle, T_Args&&...) + { + particle[position_] = T_ParamClass{}.inCellOffset.template shrink(); + + // set the weighting attribute if the particle species has it + bool const hasWeighting + = pmacc::traits::HasIdentifier::type::value; + detail::SetWeighting setWeighting; + setWeighting(particle, m_weighting); + } + + template + HDINLINE uint32_t numberOfMacroParticles(float_X const realParticlesPerCell) + { + bool const hasWeighting + = pmacc::traits::HasIdentifier::type::value; + + // note: m_weighting member might stay uninitialized! + uint32_t result(T_ParamClass::numParticlesPerCell); + + if(hasWeighting) + result = startPosition::detail::WeightMacroParticles{}( + realParticlesPerCell, + T_ParamClass::numParticlesPerCell, + m_weighting); + + return result; + } + + private: + float_X m_weighting; + }; + + } // namespace acc + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/startPosition/QuietImpl.def b/include/picongpu/particles/startPosition/QuietImpl.def index 050f957d10..fcd835d738 100644 --- a/include/picongpu/particles/startPosition/QuietImpl.def +++ b/include/picongpu/particles/startPosition/QuietImpl.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,32 +27,31 @@ namespace picongpu { -namespace particles -{ -namespace startPosition -{ -namespace acc -{ - - /** Set the in cell position (accelerator) - * - * Set the in cell position and the weighting of the macro particle. - */ - template< typename T_ParamClass > - struct QuietImpl; - -} // namespace acc - - - /** Set the in cell position - * - * The position of the particle is chosen depending of the number of - * macro particles within the cell so that the distance to the next particle - * is equal. - */ - template< typename T_ParamClass > - using QuietImpl = generic::Free< acc::QuietImpl< T_ParamClass > >; - -} // namespace startPosition -} // namespace particles + namespace particles + { + namespace startPosition + { + namespace acc + { + /** Set the in cell position (accelerator) + * + * Set the in cell position and the weighting of the macro particle. + */ + template + struct QuietImpl; + + } // namespace acc + + + /** Set the in cell position + * + * The position of the particle is chosen depending of the number of + * macro particles within the cell so that the distance to the next particle + * is equal. + */ + template + using QuietImpl = generic::Free>; + + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/startPosition/QuietImpl.hpp b/include/picongpu/particles/startPosition/QuietImpl.hpp index 9873e34a52..715845f08f 100644 --- a/include/picongpu/particles/startPosition/QuietImpl.hpp +++ b/include/picongpu/particles/startPosition/QuietImpl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -27,115 +27,99 @@ namespace picongpu { -namespace particles -{ -namespace startPosition -{ -namespace acc -{ - - template< typename T_ParamClass > - struct QuietImpl + namespace particles { - /** set in-cell position and weighting - * - * @warning It is not allowed to call this functor as many times as - * the resulting value of numberOfMacroParticles. - * - * @tparam T_Particle pmacc::Particle, particle type - * @tparam T_Args pmacc::Particle, arbitrary number of particles types - * - * @param particle particle to be manipulated - * @param ... unused particles - */ - template< - typename T_Particle, - typename ... T_Args - > - HDINLINE void operator()( - T_Particle & particle, - T_Args && ... - ) + namespace startPosition { - uint32_t maxNumMacroParticles = pmacc::math::CT::volume< - typename T_ParamClass::numParticlesPerDimension - >::type::value; - - /* reset the particle position if the operator is called more times - * than allowed (m_currentMacroParticles underflow protection for) - */ - if( maxNumMacroParticles <= m_currentMacroParticles ) - m_currentMacroParticles = maxNumMacroParticles - 1u; - - // spacing between particles in each direction in the cell - DataSpace< simDim > const numParDirection( T_ParamClass::numParticlesPerDimension::toRT() ); - floatD_X spacing; - for( uint32_t i = 0; i < simDim; ++i ) - spacing[i] = float_X( 1.0 ) / float_X( numParDirection[ i ] ); - - /* coordinate in the local in-cell lattice - * x = [0, numParsPerCell_X-1] - * y = [0, numParsPerCell_Y-1] - * z = [0, numParsPerCell_Z-1] - */ - DataSpace< simDim > inCellCoordinate = DataSpaceOperations< simDim >::map( - numParDirection, - m_currentMacroParticles - ); - - particle[ position_ ] = precisionCast< float_X >( inCellCoordinate ) * spacing + - spacing * float_X( 0.5 ); - particle[ weighting_ ] = m_weighting; - - --m_currentMacroParticles; - - } - - template< typename T_Particle > - HDINLINE uint32_t - numberOfMacroParticles( float_X const realParticlesPerCell ) - { - auto numParInCell = T_ParamClass::numParticlesPerDimension::toRT(); - - m_weighting = float_X( 0.0 ); - uint32_t numMacroParticles = pmacc::math::CT::volume< - typename T_ParamClass::numParticlesPerDimension - >::type::value; - - if( numMacroParticles > 0u ) - m_weighting = realParticlesPerCell / float_X( numMacroParticles ); - - while( - m_weighting < MIN_WEIGHTING && - numMacroParticles > 0u - ) + namespace acc { - /* decrement component with greatest value*/ - uint32_t max_component = 0u; - for( uint32_t i = 1; i < simDim; ++i ) + template + struct QuietImpl { - if( numParInCell[ i ] > numParInCell[ max_component ] ) - max_component = i; - } - numParInCell[ max_component ] -= 1u; - - numMacroParticles = numParInCell.productOfComponents( ); - - if( numMacroParticles > 0u ) - m_weighting = realParticlesPerCell / float_X( numMacroParticles ); - else - m_weighting = float_X( 0.0 ); - } - m_currentMacroParticles = numMacroParticles - 1u; - return numMacroParticles; - } - private: - - float_X m_weighting; - uint32_t m_currentMacroParticles; - }; - -} // namespace acc -} // namespace startPosition -} // namespace particles + /** set in-cell position and weighting + * + * @warning It is not allowed to call this functor as many times as + * the resulting value of numberOfMacroParticles. + * + * @tparam T_Particle pmacc::Particle, particle type + * @tparam T_Args pmacc::Particle, arbitrary number of particles types + * + * @param particle particle to be manipulated + * @param ... unused particles + */ + template + HDINLINE void operator()(T_Particle& particle, T_Args&&...) + { + uint32_t maxNumMacroParticles + = pmacc::math::CT::volume::type::value; + + /* reset the particle position if the operator is called more times + * than allowed (m_currentMacroParticles underflow protection for) + */ + if(maxNumMacroParticles <= m_currentMacroParticles) + m_currentMacroParticles = maxNumMacroParticles - 1u; + + // spacing between particles in each direction in the cell + DataSpace const numParDirection(T_ParamClass::numParticlesPerDimension::toRT()); + floatD_X spacing; + for(uint32_t i = 0; i < simDim; ++i) + spacing[i] = float_X(1.0) / float_X(numParDirection[i]); + + /* coordinate in the local in-cell lattice + * x = [0, numParsPerCell_X-1] + * y = [0, numParsPerCell_Y-1] + * z = [0, numParsPerCell_Z-1] + */ + DataSpace inCellCoordinate + = DataSpaceOperations::map(numParDirection, m_currentMacroParticles); + + particle[position_] + = precisionCast(inCellCoordinate) * spacing + spacing * float_X(0.5); + particle[weighting_] = m_weighting; + + --m_currentMacroParticles; + } + + template + HDINLINE uint32_t numberOfMacroParticles(float_X const realParticlesPerCell) + { + auto numParInCell = T_ParamClass::numParticlesPerDimension::toRT(); + + m_weighting = float_X(0.0); + uint32_t numMacroParticles + = pmacc::math::CT::volume::type::value; + + if(numMacroParticles > 0u) + m_weighting = realParticlesPerCell / float_X(numMacroParticles); + + while(m_weighting < MIN_WEIGHTING && numMacroParticles > 0u) + { + /* decrement component with greatest value*/ + uint32_t max_component = 0u; + for(uint32_t i = 1; i < simDim; ++i) + { + if(numParInCell[i] > numParInCell[max_component]) + max_component = i; + } + numParInCell[max_component] -= 1u; + + numMacroParticles = numParInCell.productOfComponents(); + + if(numMacroParticles > 0u) + m_weighting = realParticlesPerCell / float_X(numMacroParticles); + else + m_weighting = float_X(0.0); + } + m_currentMacroParticles = numMacroParticles - 1u; + return numMacroParticles; + } + + private: + float_X m_weighting; + uint32_t m_currentMacroParticles; + }; + + } // namespace acc + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/startPosition/RandomImpl.def b/include/picongpu/particles/startPosition/RandomImpl.def index aea44713d3..48a71ba2ca 100644 --- a/include/picongpu/particles/startPosition/RandomImpl.def +++ b/include/picongpu/particles/startPosition/RandomImpl.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -29,34 +29,31 @@ namespace picongpu { -namespace particles -{ -namespace startPosition -{ -namespace acc -{ - - /** set the particle attribute position to in-cell random - * - * The particle attribute position is assigned with a random - * in-cell position. - * This functor also sets the macro particle weighting. - */ - template< typename T_ParamClass > - struct RandomImpl; - -} // namespace acc - - - /** Set the in cell position to in-cell random - * - * The new in-cell position is uniformly distributed position between [0.0;1.0). - */ - template< typename T_ParamClass > - using RandomImpl = generic::FreeRng< - acc::RandomImpl< T_ParamClass >, - pmacc::random::distributions::Uniform< float_X > - >; -} // namespace startPosition -} // namespace particles + namespace particles + { + namespace startPosition + { + namespace acc + { + /** set the particle attribute position to in-cell random + * + * The particle attribute position is assigned with a random + * in-cell position. + * This functor also sets the macro particle weighting. + */ + template + struct RandomImpl; + + } // namespace acc + + + /** Set the in cell position to in-cell random + * + * The new in-cell position is uniformly distributed position between [0.0;1.0). + */ + template + using RandomImpl + = generic::FreeRng, pmacc::random::distributions::Uniform>; + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/startPosition/RandomImpl.hpp b/include/picongpu/particles/startPosition/RandomImpl.hpp index 4727d54aee..88a0df3a66 100644 --- a/include/picongpu/particles/startPosition/RandomImpl.hpp +++ b/include/picongpu/particles/startPosition/RandomImpl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, * Alexander Grund * * This file is part of PIConGPU. @@ -24,68 +24,55 @@ #include "picongpu/particles/startPosition/generic/FreeRng.def" #include "picongpu/particles/startPosition/detail/WeightMacroParticles.hpp" -#include - #include namespace picongpu { -namespace particles -{ -namespace startPosition -{ -namespace acc -{ - - template< typename T_ParamClass > - struct RandomImpl + namespace particles { - /** set in-cell position and weighting - * - * @tparam T_Rng pmacc::nvidia::rng::RNG, type of the random number generator - * @tparam T_Particle pmacc::Particle, particle type - * @tparam T_Args pmacc::Particle, arbitrary number of particles types - * - * @param rng random number generator - * @param particle particle to be manipulated - * @param ... unused particles - */ - template< - typename T_Rng, - typename T_Particle, - typename ... T_Args - > - HDINLINE void operator()( - T_Rng & rng, - T_Particle & particle, - T_Args && ... - ) + namespace startPosition { - floatD_X tmpPos; + namespace acc + { + template + struct RandomImpl + { + /** set in-cell position and weighting + * + * @tparam T_Rng functor::misc::RngWrapper, type of the random number generator + * @tparam T_Particle pmacc::Particle, particle type + * @tparam T_Args pmacc::Particle, arbitrary number of particles types + * + * @param rng random number generator + * @param particle particle to be manipulated + * @param ... unused particles + */ + template + HDINLINE void operator()(T_Rng& rng, T_Particle& particle, T_Args&&...) + { + floatD_X tmpPos; - for( uint32_t d = 0; d < simDim; ++d ) - tmpPos[ d ] = rng( ); + for(uint32_t d = 0; d < simDim; ++d) + tmpPos[d] = rng(); - particle[ position_ ] = tmpPos; - particle[ weighting_ ] = m_weighting; - } + particle[position_] = tmpPos; + particle[weighting_] = m_weighting; + } - template< typename T_Particle > - HDINLINE uint32_t - numberOfMacroParticles( float_X const realParticlesPerCell ) - { - return startPosition::detail::WeightMacroParticles{}( - realParticlesPerCell, - T_ParamClass::numParticlesPerCell, - m_weighting - ); - } + template + HDINLINE uint32_t numberOfMacroParticles(float_X const realParticlesPerCell) + { + return startPosition::detail::WeightMacroParticles{}( + realParticlesPerCell, + T_ParamClass::numParticlesPerCell, + m_weighting); + } - float_X m_weighting; - }; + float_X m_weighting; + }; -} // namespace acc -} // namespace startPosition -} // namespace particles + } // namespace acc + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/startPosition/detail/WeightMacroParticles.hpp b/include/picongpu/particles/startPosition/detail/WeightMacroParticles.hpp index a56909e7a4..d4d42de80e 100644 --- a/include/picongpu/particles/startPosition/detail/WeightMacroParticles.hpp +++ b/include/picongpu/particles/startPosition/detail/WeightMacroParticles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, * Alexander Grund * * This file is part of PIConGPU. @@ -25,60 +25,50 @@ namespace picongpu { -namespace particles -{ -namespace startPosition -{ -namespace detail -{ - - /** Calculate the weighting per macro-particle in a cell - * - * Note: In the density regions where the weighting of macro particles would - * violate the user-specified MIN_WEIGHTING, we reduce the number of - * macro particles per cell to still initialize particles - * (see particle.param). - * - * This calculates the number of macro particles and the weighting per macro - * particle with respect to MIN_WEIGHTING. - */ - struct WeightMacroParticles + namespace particles { - /** get number of and the weighting per macro particle(s) - * - * @param realParticlesPerCell number of real particles per cell - * @param macroParticlesPerCell maximum number of macro particles per cell - * @param[out] weighting weighting per macro particle - * @return number of macro particles per cell with respect to - * MIN_WEIGHTING, range: [0;macroParticlesPerCell] - */ - HDINLINE uint32_t - operator()( - float_X const realParticlesPerCell, - uint32_t numMacroParticles, - float_X & weighting - ) const + namespace startPosition { - PMACC_CASSERT_MSG( - __MIN_WEIGHTING_must_be_greater_than_zero, - MIN_WEIGHTING > float_X( 0.0 ) - ); - weighting = float_X( 0.0 ); - float_X const maxParPerCell = realParticlesPerCell / MIN_WEIGHTING; - numMacroParticles = math::float2int_rd( - math::min( - float_X( numMacroParticles ), - maxParPerCell - ) - ); - if( numMacroParticles > 0u ) - weighting = realParticlesPerCell / float_X( numMacroParticles ); + namespace detail + { + /** Calculate the weighting per macro-particle in a cell + * + * Note: In the density regions where the weighting of macro particles would + * violate the user-specified MIN_WEIGHTING, we reduce the number of + * macro particles per cell to still initialize particles + * (see particle.param). + * + * This calculates the number of macro particles and the weighting per macro + * particle with respect to MIN_WEIGHTING. + */ + struct WeightMacroParticles + { + /** get number of and the weighting per macro particle(s) + * + * @param realParticlesPerCell number of real particles per cell + * @param macroParticlesPerCell maximum number of macro particles per cell + * @param[out] weighting weighting per macro particle + * @return number of macro particles per cell with respect to + * MIN_WEIGHTING, range: [0;macroParticlesPerCell] + */ + HDINLINE uint32_t operator()( + float_X const realParticlesPerCell, + uint32_t numMacroParticles, + float_X& weighting) const + { + PMACC_CASSERT_MSG(__MIN_WEIGHTING_must_be_greater_than_zero, MIN_WEIGHTING > float_X(0.0)); + weighting = float_X(0.0); + float_X const maxParPerCell = realParticlesPerCell / MIN_WEIGHTING; + numMacroParticles + = pmacc::math::float2int_rd(math::min(float_X(numMacroParticles), maxParPerCell)); + if(numMacroParticles > 0u) + weighting = realParticlesPerCell / float_X(numMacroParticles); - return numMacroParticles; - } - }; + return numMacroParticles; + } + }; -} // namespace detail -} // namespace startPosition -} // namespace particles + } // namespace detail + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/startPosition/functors.def b/include/picongpu/particles/startPosition/functors.def index 9a039786e0..2a35631ccd 100644 --- a/include/picongpu/particles/startPosition/functors.def +++ b/include/picongpu/particles/startPosition/functors.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/startPosition/functors.hpp b/include/picongpu/particles/startPosition/functors.hpp index 4f8bc265db..3a40125462 100644 --- a/include/picongpu/particles/startPosition/functors.hpp +++ b/include/picongpu/particles/startPosition/functors.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * diff --git a/include/picongpu/particles/startPosition/generic/Free.def b/include/picongpu/particles/startPosition/generic/Free.def index a9cd485a8d..a6cad122a2 100644 --- a/include/picongpu/particles/startPosition/generic/Free.def +++ b/include/picongpu/particles/startPosition/generic/Free.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,23 +22,22 @@ namespace picongpu { -namespace particles -{ -namespace startPosition -{ -namespace generic -{ - - /** call simple free user defined functor - * - * @tparam T_Functor user defined functor - * **optional**: can implement **one** host side constructor - * `T_Functor()` or `T_Functor(uint32_t currentTimeStep)` - */ - template< typename T_Functor > - struct Free; + namespace particles + { + namespace startPosition + { + namespace generic + { + /** call simple free user defined functor + * + * @tparam T_Functor user defined functor + * **optional**: can implement **one** host side constructor + * `T_Functor()` or `T_Functor(uint32_t currentTimeStep)` + */ + template + struct Free; -} // namespace generic -} // namespace startPosition -} // namespace particles + } // namespace generic + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/startPosition/generic/Free.hpp b/include/picongpu/particles/startPosition/generic/Free.hpp index 57139e1db6..93f4761991 100644 --- a/include/picongpu/particles/startPosition/generic/Free.hpp +++ b/include/picongpu/particles/startPosition/generic/Free.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Axel Huebl +/* Copyright 2013-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -27,141 +27,115 @@ namespace picongpu { -namespace particles -{ -namespace startPosition -{ -namespace generic -{ -namespace acc -{ - /** wrapper for the user functor on the accelerator - * - * @tparam T_Functor user defined functor - */ - template< typename T_Functor > - struct Free : private T_Functor + namespace particles { - - //! type of the user functor - using Functor = T_Functor; - - //! store user functor instance - HDINLINE Free( Functor const & functor ) : - Functor( functor ) - { } - - /** execute the user functor - * - * @tparam T_Args type of the arguments passed to the user functor - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param args arguments passed to the user functor - */ - template< - typename ... T_Args, - typename T_Acc - > - HDINLINE - void operator( )( - T_Acc const &, - T_Args && ... args - ) + namespace startPosition { - Functor::operator( )( args ... ); - } + namespace generic + { + namespace acc + { + /** wrapper for the user functor on the accelerator + * + * @tparam T_Functor user defined functor + */ + template + struct Free : private T_Functor + { + //! type of the user functor + using Functor = T_Functor; - template< typename T_Particle > - HDINLINE uint32_t - numberOfMacroParticles( float_X const realParticlesPerCell ) - { - return Functor::template numberOfMacroParticles< T_Particle >( realParticlesPerCell ); - } - }; -} // namespace acc + //! store user functor instance + HDINLINE Free(Functor const& functor) : Functor(functor) + { + } - template< typename T_Functor > - struct Free : protected T_Functor - { + /** execute the user functor + * + * @tparam T_Args type of the arguments passed to the user functor + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param args arguments passed to the user functor + */ + template + HDINLINE void operator()(T_Acc const&, T_Args&&... args) + { + Functor::operator()(args...); + } - using Functor = T_Functor; + template + HDINLINE uint32_t numberOfMacroParticles(float_X const realParticlesPerCell) + { + return Functor::template numberOfMacroParticles(realParticlesPerCell); + } + }; + } // namespace acc - template< typename T_SpeciesType > - struct apply - { - using type = Free; - }; + template + struct Free : protected T_Functor + { + using Functor = T_Functor; - /** constructor - * - * This constructor is only compiled if the user functor has - * a host side constructor with one (uint32_t) argument. - * - * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable - * the constructor - * @param currentStep current simulation time step - * @param is used to enable/disable the constructor (do not pass any value to this parameter) - */ - template< typename DeferFunctor = Functor > - HINLINE Free( - uint32_t currentStep, - typename std::enable_if< - std::is_constructible< - DeferFunctor, - uint32_t - >::value - >::type* = 0 - ) : Functor( currentStep ) - { - } + template + struct apply + { + using type = Free; + }; - /** constructor - * - * This constructor is only compiled if the user functor has a default constructor. - * - * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable - * the constructor - * @param current simulation time step - * @param is used to enable/disable the constructor (do not pass any value to this parameter) - */ - template< typename DeferFunctor = Functor > - HINLINE Free( - uint32_t, - typename std::enable_if< - std::is_constructible< DeferFunctor >::value - >::type* = 0 - ) : Functor( ) - { - } + /** constructor + * + * This constructor is only compiled if the user functor has + * a host side constructor with one (uint32_t) argument. + * + * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable + * the constructor + * @param currentStep current simulation time step + * @param is used to enable/disable the constructor (do not pass any value to this parameter) + */ + template + HINLINE Free( + uint32_t currentStep, + typename std::enable_if::value>::type* = 0) + : Functor(currentStep) + { + } - /** create device functor - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param offset (in supercells, without any guards) to the - * origin of the local domain - * @param configuration of the worker - */ - template< - typename T, - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE acc::Free< Functor > - operator()( - T_Acc const & acc, - T const &, - T_WorkerCfg const & - ) const - { - return acc::Free< Functor >( *static_cast< Functor const * >( this ) ); - } - }; + /** constructor + * + * This constructor is only compiled if the user functor has a default constructor. + * + * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable + * the constructor + * @param current simulation time step + * @param is used to enable/disable the constructor (do not pass any value to this parameter) + */ + template + HINLINE Free( + uint32_t, + typename std::enable_if::value>::type* = 0) + : Functor() + { + } + + /** create device functor + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param offset (in supercells, without any guards) to the + * origin of the local domain + * @param configuration of the worker + */ + template + HDINLINE acc::Free operator()(T_Acc const& acc, T const&, T_WorkerCfg const&) const + { + return acc::Free(*static_cast(this)); + } + }; -} // namespace generic -} // namespace startPosition -} // namespace particles + } // namespace generic + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/startPosition/generic/FreeRng.def b/include/picongpu/particles/startPosition/generic/FreeRng.def index c0a0572618..0f2f12793f 100644 --- a/include/picongpu/particles/startPosition/generic/FreeRng.def +++ b/include/picongpu/particles/startPosition/generic/FreeRng.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,26 +27,22 @@ namespace picongpu { -namespace particles -{ -namespace startPosition -{ -namespace generic -{ - - /** call simple free user defined functor and provide a random number generator - * - * - * @tparam T_Functor user defined unary functor - * @tparam T_Distribution pmacc::random::distributions, random number distribution - */ - template< - typename T_Functor, - typename T_Distribution - > - struct FreeRng; + namespace particles + { + namespace startPosition + { + namespace generic + { + /** call simple free user defined functor and provide a random number generator + * + * + * @tparam T_Functor user defined unary functor + * @tparam T_Distribution pmacc::random::distributions, random number distribution + */ + template + struct FreeRng; -} // namespace generic -} // namespace startPosition -} // namespace particles + } // namespace generic + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/startPosition/generic/FreeRng.hpp b/include/picongpu/particles/startPosition/generic/FreeRng.hpp index c7a37b952d..11a608c3b0 100644 --- a/include/picongpu/particles/startPosition/generic/FreeRng.hpp +++ b/include/picongpu/particles/startPosition/generic/FreeRng.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Alexander Grund +/* Copyright 2015-2021 Rene Widera, Alexander Grund * * This file is part of PIConGPU. * @@ -30,198 +30,137 @@ namespace picongpu { -namespace particles -{ -namespace startPosition -{ -namespace generic -{ -namespace acc -{ - template< - typename T_Functor, - typename T_RngType - > - struct FreeRng : private T_Functor + namespace particles { - - using Functor = T_Functor; - using RngType = T_RngType; - - HDINLINE FreeRng( - Functor const & functor, - RngType const & rng - ) : - T_Functor( functor ), m_rng( rng ) - { - } - - /** call user functor - * - * The random number generator is initialized with the first call. - * - * @tparam T_Particle type of the particle to manipulate - * @tparam T_Args type of the arguments passed to the user functor - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param particle particle which is given to the user functor - * @return void is used to enable the operator if the user functor except two arguments - */ - template< - typename T_Particle, - typename ... T_Args, - typename T_Acc - > - HDINLINE - void operator()( - T_Acc const &, - T_Particle& particle, - T_Args && ... args - ) - { - namespace nvrng = nvidia::rng; - - Functor::operator()( - m_rng, - particle, - args ... - ); - } - - template< typename T_Particle > - HDINLINE uint32_t - numberOfMacroParticles( float_X const realParticlesPerCell ) + namespace startPosition { - return Functor::template numberOfMacroParticles< T_Particle >( realParticlesPerCell ); - } - - private: - - RngType m_rng; - }; -} // namespace acc - - template< - typename T_Functor, - typename T_Distribution - > - struct FreeRng : - protected T_Functor, - private picongpu::particles::functor::misc::Rng< - T_Distribution - > - { - template< typename T_SpeciesType > - struct apply - { - using type = FreeRng; - }; - - using RngGenerator = picongpu::particles::functor::misc::Rng< - T_Distribution - >; - - using RngType = typename RngGenerator::RandomGen; - - using Functor = T_Functor; - using Distribution = T_Distribution; - - /** constructor - * - * This constructor is only compiled if the user functor has - * a host side constructor with one (uint32_t) argument. - * - * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable - * the constructor - * @param currentStep current simulation time step - * @param is used to enable/disable the constructor (do not pass any value to this parameter) - */ - template< typename DeferFunctor = Functor > - HINLINE FreeRng( - uint32_t currentStep, - typename std::enable_if< - std::is_constructible< - DeferFunctor, - uint32_t - >::value - >::type* = 0 - ) : - Functor( currentStep ), - RngGenerator( currentStep ) - { - } - - /** constructor - * - * This constructor is only compiled if the user functor has a default constructor. - * - * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable - * the constructor - * @param currentStep simulation time step - * @param is used to enable/disable the constructor (do not pass any value to this parameter) - */ - template< typename DeferFunctor = Functor > - HINLINE FreeRng( - uint32_t currentStep, - typename std::enable_if< - std::is_constructible< DeferFunctor >::value - >::type* = 0 - ) : - Functor( ), - RngGenerator( currentStep ) - { - } - - /** create functor for the accelerator - * - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param localSupercellOffset offset (in superCells, without any guards) relative - * to the origin of the local domain - * @param workerCfg configuration of the worker - */ - template< - typename T_WorkerCfg, - typename T_Acc - > - HDINLINE auto - operator()( - T_Acc const & acc, - DataSpace< simDim > const & localSupercellOffset, - T_WorkerCfg const & workerCfg - ) const - -> acc::FreeRng< - Functor, - RngType - > - { - RngType const rng = ( *static_cast< RngGenerator const * >( this ) )( - acc, - localSupercellOffset, - workerCfg - ); - - return acc::FreeRng< - Functor, - RngType - >( - *static_cast< Functor const * >( this ), - rng - ); - } - - static - HINLINE std::string - getName( ) - { - return std::string("FreeRNG"); - } - }; - -} // namespace generic -} // namespace startPosition -} // namespace particles + namespace generic + { + namespace acc + { + template + struct FreeRng : private T_Functor + { + using Functor = T_Functor; + using RngType = T_RngType; + + HDINLINE FreeRng(Functor const& functor, RngType const& rng) : T_Functor(functor), m_rng(rng) + { + } + + /** call user functor + * + * The random number generator is initialized with the first call. + * + * @tparam T_Particle type of the particle to manipulate + * @tparam T_Args type of the arguments passed to the user functor + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param particle particle which is given to the user functor + * @return void is used to enable the operator if the user functor except two arguments + */ + template + HDINLINE void operator()(T_Acc const&, T_Particle& particle, T_Args&&... args) + { + Functor::operator()(m_rng, particle, args...); + } + + template + HDINLINE uint32_t numberOfMacroParticles(float_X const realParticlesPerCell) + { + return Functor::template numberOfMacroParticles(realParticlesPerCell); + } + + private: + RngType m_rng; + }; + } // namespace acc + + template + struct FreeRng + : protected T_Functor + , private picongpu::particles::functor::misc::Rng + { + template + struct apply + { + using type = FreeRng; + }; + + using RngGenerator = picongpu::particles::functor::misc::Rng; + + using RngType = typename RngGenerator::RandomGen; + + using Functor = T_Functor; + using Distribution = T_Distribution; + + /** constructor + * + * This constructor is only compiled if the user functor has + * a host side constructor with one (uint32_t) argument. + * + * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable + * the constructor + * @param currentStep current simulation time step + * @param is used to enable/disable the constructor (do not pass any value to this parameter) + */ + template + HINLINE FreeRng( + uint32_t currentStep, + typename std::enable_if::value>::type* = 0) + : Functor(currentStep) + , RngGenerator(currentStep) + { + } + + /** constructor + * + * This constructor is only compiled if the user functor has a default constructor. + * + * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable + * the constructor + * @param currentStep simulation time step + * @param is used to enable/disable the constructor (do not pass any value to this parameter) + */ + template + HINLINE FreeRng( + uint32_t currentStep, + typename std::enable_if::value>::type* = 0) + : Functor() + , RngGenerator(currentStep) + { + } + + /** create functor for the accelerator + * + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param localSupercellOffset offset (in superCells, without any guards) relative + * to the origin of the local domain + * @param workerCfg configuration of the worker + */ + template + HDINLINE auto operator()( + T_Acc const& acc, + DataSpace const& localSupercellOffset, + T_WorkerCfg const& workerCfg) const -> acc::FreeRng + { + RngType const rng + = (*static_cast(this))(acc, localSupercellOffset, workerCfg); + + return acc::FreeRng(*static_cast(this), rng); + } + + static HINLINE std::string getName() + { + return std::string("FreeRNG"); + } + }; + + } // namespace generic + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/synchrotronPhotons/PhotonCreator.def b/include/picongpu/particles/synchrotronPhotons/PhotonCreator.def index 23dc70f441..bc678c73b6 100644 --- a/include/picongpu/particles/synchrotronPhotons/PhotonCreator.def +++ b/include/picongpu/particles/synchrotronPhotons/PhotonCreator.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -21,27 +21,26 @@ namespace picongpu { -namespace particles -{ -namespace synchrotronPhotons -{ - -/** Functor creating photons from electrons according to synchrotron radiation. - * - * The numerical model is taken from: - * - * Gonoskov, A., et al. "Extended particle-in-cell schemes for physics - * in ultrastrong laser fields: Review and developments." - * Physical Review E 92.2 (2015): 023305. - * - * This functor is called by the general particle creation module. - * - * \tparam T_ElectronSpecies - * \tparam T_PhotonSpecies - */ -template -struct PhotonCreator; + namespace particles + { + namespace synchrotronPhotons + { + /** Functor creating photons from electrons according to synchrotron radiation. + * + * The numerical model is taken from: + * + * Gonoskov, A., et al. "Extended particle-in-cell schemes for physics + * in ultrastrong laser fields: Review and developments." + * Physical Review E 92.2 (2015): 023305. + * + * This functor is called by the general particle creation module. + * + * \tparam T_ElectronSpecies + * \tparam T_PhotonSpecies + */ + template + struct PhotonCreator; -} // namespace synchrotronPhotons -} // namespace particles + } // namespace synchrotronPhotons + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/synchrotronPhotons/PhotonCreator.hpp b/include/picongpu/particles/synchrotronPhotons/PhotonCreator.hpp index aa3ada95bd..5418e4c42b 100644 --- a/include/picongpu/particles/synchrotronPhotons/PhotonCreator.hpp +++ b/include/picongpu/particles/synchrotronPhotons/PhotonCreator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -23,7 +23,6 @@ #include "SynchrotronFunctions.hpp" #include "picongpu/algorithms/Gamma.hpp" -#include #include #include #include "picongpu/traits/frame/GetMass.hpp" @@ -51,344 +50,303 @@ namespace picongpu { -namespace particles -{ -namespace synchrotronPhotons -{ - -/** Functor creating photons from electrons according to synchrotron radiation. - * - * The numerical model is taken from: - * - * Gonoskov, A., et al. "Extended particle-in-cell schemes for physics - * in ultrastrong laser fields: Review and developments." - * Physical Review E 92.2 (2015): 023305. - * - * This functor is called by the general particle creation module. - * - * \tparam T_ElectronSpecies - * \tparam T_PhotonSpecies - */ -template -struct PhotonCreator -{ - using ElectronSpecies = T_ElectronSpecies; - using PhotonSpecies = T_PhotonSpecies; - - using FrameType = typename ElectronSpecies::FrameType; - - /* specify field to particle interpolation scheme */ - using Field2ParticleInterpolation = typename pmacc::particles::traits::ResolveAliasFromSpecies< - ElectronSpecies, - interpolation<> - >::type; - - /* margins around the supercell for the interpolation of the field on the cells */ - using LowerMargin = typename GetMargin::LowerMargin; - using UpperMargin = typename GetMargin::UpperMargin; - - /* relevant area of a block */ - using BlockArea = SuperCellDescription< - typename MappingDesc::SuperCellSize, - LowerMargin, - UpperMargin - >; - - BlockArea BlockDescription; - - using TVec = MappingDesc::SuperCellSize; - - using ValueType_E = FieldE::ValueType; - using ValueType_B = FieldB::ValueType; - -private: - /* global memory EM-field device databoxes */ - PMACC_ALIGN(eBox, FieldE::DataBoxType); - PMACC_ALIGN(bBox, FieldB::DataBoxType); - /* shared memory EM-field device databoxes */ - PMACC_ALIGN(cachedE, DataBox >); - PMACC_ALIGN(cachedB, DataBox >); - - PMACC_ALIGN(curF_1, SynchrotronFunctions::SyncFuncCursor); - PMACC_ALIGN(curF_2, SynchrotronFunctions::SyncFuncCursor); - - PMACC_ALIGN(photon_mom, float3_X); - - /* random number generator */ - using RNGFactory = pmacc::random::RNGProvider; - using Distribution = pmacc::random::distributions::Uniform; - using RandomGen = typename RNGFactory::GetRandomType::type; - RandomGen randomGen; - -public: - /* host constructor initializing member : random number generator */ - PhotonCreator( - const SynchrotronFunctions::SyncFuncCursor& curF_1, - const SynchrotronFunctions::SyncFuncCursor& curF_2) - : curF_1(curF_1), - curF_2(curF_2), - photon_mom(float3_X::create(0)), - randomGen(RNGFactory::createRandom()) - { - DataConnector &dc = Environment<>::get().DataConnector(); - /* initialize pointers on host-side E-(B-)field databoxes */ - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); - auto fieldB = dc.get< FieldB >( FieldB::getName(), true ); - /* initialize device-side E-(B-)field databoxes */ - eBox = fieldE->getDeviceDataBox(); - bBox = fieldB->getDeviceDataBox(); - } - - /** cache fields used by this functor - * - * @warning this is a collective method and calls synchronize - * - * @tparam T_Acc alpaka accelerator type - * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker - * - * @param acc alpaka accelerator - * @param blockCell relative offset (in cells) to the local domain plus the guarding cells - * @param workerCfg configuration of the worker - */ - template< - typename T_Acc , - typename T_WorkerCfg - > - DINLINE void collectiveInit( - const T_Acc & acc, - const DataSpace& blockCell, - const T_WorkerCfg & workerCfg - ) - { - /* caching of E and B fields */ - cachedB = CachedBox::create< - 0, - ValueType_B - >( - acc, - BlockArea() - ); - cachedE = CachedBox::create< - 1, - ValueType_E - >( - acc, - BlockArea() - ); - - /* instance of nvidia assignment operator */ - nvidia::functors::Assign assign; - /* copy fields from global to shared */ - auto fieldBBlock = bBox.shift(blockCell); - ThreadCollective< - BlockArea, - T_WorkerCfg::numWorkers - > collective( workerCfg.getWorkerIdx( ) ); - collective( - acc, - assign, - cachedB, - fieldBBlock - ); - /* copy fields from global to shared */ - auto fieldEBlock = eBox.shift(blockCell); - collective( - acc, - assign, - cachedE, - fieldEBlock - ); - - /* wait for shared memory to be initialized */ - __syncthreads(); - } - - /** Initialization function on device - * - * \brief Cache EM-fields on device - * and initialize possible prerequisites for ionization, like e.g. random number generator. - * - * This function will be called inline on the device which must happen BEFORE threads diverge - * during loop execution. The reason for this is the `__syncthreads()` call which is necessary after - * initializing the E-/B-field shared boxes in shared memory. - */ - template< typename T_Acc > - DINLINE void init( - T_Acc const & acc, - const DataSpace& blockCell, - const int& linearThreadIdx, - const DataSpace& localCellOffset - ) - { - /* initialize random number generator with the local cell index in the simulation */ - this->randomGen.init(localCellOffset); - } - - /** Get the photon emission probability - * - * @param delta normalized (to the electron energy) photon energy - * @param chi quantum-nonlinearity parameter - * @param gamma electron gamma - */ - DINLINE float_X emission_prob( - const float_X delta, - const float_X chi, - const float_X gamma) const + namespace particles { - // catch these special values because otherwise a NaN is returned whereas it should be a zero. - if(chi == float_X(0.0) || delta == float_X(0.0) || (float_X(1.0) - delta) == float_X(0.0)) - return float_X(0.0); - - const float_X mass = frame::getMass(); - const float_X charge = frame::getCharge(); - - const float_X sqrtOf3 = 1.7320508075688772; - const float_X factor = DELTA_T * charge*charge * mass * SPEED_OF_LIGHT / (float_X(4.0) * PI * EPS0 * HBAR*HBAR) * - sqrtOf3 / (float_X(2.0) * PI) * chi / gamma; - - if(enableQEDTerm) - { - // quantum - const float_X z = float_X(2.0/3.0) * delta / ((float_X(1.0) - delta) * chi); - - return factor * (float_X(1.0) - delta) / delta * - (this->curF_1[z] + float_X(1.5) * delta * chi * z * this->curF_2[z]); - } - else - { - // classical - const float_X z = float_X(2.0/3.0) * delta / chi; - - return factor / delta * this->curF_1[z]; - } - } - - /** Get the *scaled* photon emission probability - * - * The scaling avoids an infrared divergence. - * - * @param deltaScaled scaled and normalized (to the electron energy) photon energy - * @param chi quantum-nonlinearity parameter - * @param gamma electron gamma - */ - DINLINE float_X emission_prob_scaled( - const float_X deltaScaled, - const float_X chi, - const float_X gamma) const - { - const float_X delta = deltaScaled*deltaScaled*deltaScaled; - return float_X(3.0) * deltaScaled*deltaScaled * emission_prob(delta, chi, gamma); - } - - /** Return the number of target particles to create from each source particle. - * - * Called for each frame of the source species. - * - * @param sourceFrame Frame of the source species - * @param localIdx Index of the source particle within frame - * @return number of particle to be created from each source particle - */ - template< typename T_Acc > - DINLINE unsigned int numNewParticles(const T_Acc& acc, FrameType& sourceFrame, int localIdx) - { - using namespace pmacc::algorithms; - - auto particle = sourceFrame[localIdx]; - - /* particle position, used for field-to-particle interpolation */ - const floatD_X pos = particle[position_]; - const int particleCellIdx = particle[localCellIdx_]; - /* multi-dim coordinate of the local cell inside the super cell */ - DataSpace localCell(DataSpaceOperations::template map (particleCellIdx)); - /* interpolation of E-field on the particle position */ - const picongpu::traits::FieldPosition fieldPosE; - ValueType_E fieldE = Field2ParticleInterpolation() - (cachedE.shift(localCell).toCursor(), pos, fieldPosE()); - /* interpolation of B-field on the particle position */ - const picongpu::traits::FieldPosition fieldPosB; - ValueType_B fieldB = Field2ParticleInterpolation() - (cachedB.shift(localCell).toCursor(), pos, fieldPosB()); - - /* All computation below is in the single "real" particle picture. - * The macroparticle weighting factor is reintroduced at the end of this code block. */ - const float3_X mom = particle[momentum_] / particle[weighting_]; - const float_X mom2 = math::dot(mom, mom); - const float3_X mom_norm = mom * math::rsqrt(mom2); - const float_X mass = frame::getMass(); - - const float_X gamma = Gamma<>()(mom, mass); - const float3_X vel = mom / (gamma * mass); // low accuracy? - - const float3_X lorentzForceOverCharge = fieldE + math::cross(vel, fieldB); - const float_X lorentzForceOverCharge2 = math::dot(lorentzForceOverCharge, lorentzForceOverCharge); - const float_X fieldE_long = math::dot(mom_norm, fieldE); - - // effective magnetic strength (in cgs) - const float_X H_eff = math::sqrt(lorentzForceOverCharge2 - fieldE_long*fieldE_long); - - const float_X charge = math::abs(frame::getCharge()); - - const float_X c = SPEED_OF_LIGHT; - // Schwinger limit, unit: V/m (in cgs) - const float_X E_S = mass*mass * c*c*c / (charge * HBAR); - // quantum-nonlinearity parameter - const float_X chi = gamma * H_eff / E_S; - - const float_X deltaScaled = this->randomGen(acc); - - const float_X x = emission_prob_scaled(deltaScaled, chi, gamma); - - // raise a warning if the emission probability is too high. - if(picLog::log_level & picLog::CRITICAL::lvl) - { - if(x > float_X(SINGLE_EMISSION_PROB_LIMIT)) - { - const float_X delta = deltaScaled*deltaScaled*deltaScaled; - printf("[SynchrotronPhotons] warning: emission probability is too high: p = %g, at delta = %g, chi = %g, gamma = %g\n", - x, delta, chi, gamma); - } - } - - if(this->randomGen(acc) < x) + namespace synchrotronPhotons { - const float_X delta = deltaScaled*deltaScaled*deltaScaled; - const float_X photonMom_abs = delta * mass*c * gamma; - if(photonMom_abs > SOFT_PHOTONS_CUTOFF_MOM) + /** Functor creating photons from electrons according to synchrotron radiation. + * + * The numerical model is taken from: + * + * Gonoskov, A., et al. "Extended particle-in-cell schemes for physics + * in ultrastrong laser fields: Review and developments." + * Physical Review E 92.2 (2015): 023305. + * + * This functor is called by the general particle creation module. + * + * \tparam T_ElectronSpecies + * \tparam T_PhotonSpecies + */ + template + struct PhotonCreator { - this->photon_mom = mom_norm * photonMom_abs * particle[weighting_]; - return 1; - } - } - - return 0; - } - - /** Functor implementation: setting photon and electron properties - * - * Called once for each single particle creation. - * - * \tparam Electron type of electron which creates the photon - * \tparam Photon type of photon that is created - */ - template - DINLINE void operator()(const T_Acc& acc, Electron& electron, Photon& photon) const - { - namespace parOp = pmacc::particles::operations; - auto destPhoton = - parOp::deselect< - boost::mpl::vector< - multiMask, - momentum - > - >(photon); - parOp::assign( destPhoton, parOp::deselect(electron) ); - - photon[multiMask_] = 1; - photon[momentum_] = this->photon_mom; - electron[momentum_] -= this->photon_mom; - } -}; - -} // namespace synchrotronPhotons -} // namespace particles + using ElectronSpecies = T_ElectronSpecies; + using PhotonSpecies = T_PhotonSpecies; + + using FrameType = typename ElectronSpecies::FrameType; + + /* specify field to particle interpolation scheme */ + using Field2ParticleInterpolation = + typename pmacc::particles::traits::ResolveAliasFromSpecies>::type; + + /* margins around the supercell for the interpolation of the field on the cells */ + using LowerMargin = typename GetMargin::LowerMargin; + using UpperMargin = typename GetMargin::UpperMargin; + + /* relevant area of a block */ + using BlockArea = SuperCellDescription; + + BlockArea BlockDescription; + + using TVec = MappingDesc::SuperCellSize; + + using ValueType_E = FieldE::ValueType; + using ValueType_B = FieldB::ValueType; + + private: + /* global memory EM-field device databoxes */ + PMACC_ALIGN(eBox, FieldE::DataBoxType); + PMACC_ALIGN(bBox, FieldB::DataBoxType); + /* shared memory EM-field device databoxes */ + PMACC_ALIGN(cachedE, DataBox>); + PMACC_ALIGN(cachedB, DataBox>); + + PMACC_ALIGN(curF_1, SynchrotronFunctions::SyncFuncCursor); + PMACC_ALIGN(curF_2, SynchrotronFunctions::SyncFuncCursor); + + PMACC_ALIGN(photon_mom, float3_X); + + /* random number generator */ + using RNGFactory = pmacc::random::RNGProvider; + using Distribution = pmacc::random::distributions::Uniform; + using RandomGen = typename RNGFactory::GetRandomType::type; + RandomGen randomGen; + + public: + /* host constructor initializing member : random number generator */ + PhotonCreator( + const SynchrotronFunctions::SyncFuncCursor& curF_1, + const SynchrotronFunctions::SyncFuncCursor& curF_2) + : curF_1(curF_1) + , curF_2(curF_2) + , photon_mom(float3_X::create(0)) + , randomGen(RNGFactory::createRandom()) + { + DataConnector& dc = Environment<>::get().DataConnector(); + /* initialize pointers on host-side E-(B-)field databoxes */ + auto fieldE = dc.get(FieldE::getName(), true); + auto fieldB = dc.get(FieldB::getName(), true); + /* initialize device-side E-(B-)field databoxes */ + eBox = fieldE->getDeviceDataBox(); + bBox = fieldB->getDeviceDataBox(); + } + + /** cache fields used by this functor + * + * @warning this is a collective method and calls synchronize + * + * @tparam T_Acc alpaka accelerator type + * @tparam T_WorkerCfg pmacc::mappings::threads::WorkerCfg, configuration of the worker + * + * @param acc alpaka accelerator + * @param blockCell relative offset (in cells) to the local domain plus the guarding cells + * @param workerCfg configuration of the worker + */ + template + DINLINE void collectiveInit( + const T_Acc& acc, + const DataSpace& blockCell, + const T_WorkerCfg& workerCfg) + { + /* caching of E and B fields */ + cachedB = CachedBox::create<0, ValueType_B>(acc, BlockArea()); + cachedE = CachedBox::create<1, ValueType_E>(acc, BlockArea()); + + /* instance of nvidia assignment operator */ + nvidia::functors::Assign assign; + /* copy fields from global to shared */ + auto fieldBBlock = bBox.shift(blockCell); + ThreadCollective collective(workerCfg.getWorkerIdx()); + collective(acc, assign, cachedB, fieldBBlock); + /* copy fields from global to shared */ + auto fieldEBlock = eBox.shift(blockCell); + collective(acc, assign, cachedE, fieldEBlock); + + /* wait for shared memory to be initialized */ + cupla::__syncthreads(acc); + } + + /** Initialization function on device + * + * \brief Cache EM-fields on device + * and initialize possible prerequisites for ionization, like e.g. random number generator. + * + * This function will be called inline on the device which must happen BEFORE threads diverge + * during loop execution. The reason for this is the `cupla::__syncthreads( acc )` call which is + * necessary after initializing the E-/B-field shared boxes in shared memory. + */ + template + DINLINE void init( + T_Acc const& acc, + const DataSpace& blockCell, + const int& linearThreadIdx, + const DataSpace& localCellOffset) + { + /* initialize random number generator with the local cell index in the simulation */ + this->randomGen.init(localCellOffset); + } + + /** Get the photon emission probability + * + * @param delta normalized (to the electron energy) photon energy + * @param chi quantum-nonlinearity parameter + * @param gamma electron gamma + */ + DINLINE float_X emission_prob(const float_X delta, const float_X chi, const float_X gamma) const + { + // catch these special values because otherwise a NaN is returned whereas it should be a zero. + if(chi == float_X(0.0) || delta == float_X(0.0) || (float_X(1.0) - delta) == float_X(0.0)) + return float_X(0.0); + + const float_X mass = frame::getMass(); + const float_X charge = frame::getCharge(); + + const float_X sqrtOf3 = 1.7320508075688772; + const float_X factor = DELTA_T * charge * charge * mass * SPEED_OF_LIGHT + / (float_X(4.0) * PI * EPS0 * HBAR * HBAR) * sqrtOf3 / (float_X(2.0) * PI) * chi / gamma; + + if(enableQEDTerm) + { + // quantum + const float_X z = float_X(2.0 / 3.0) * delta / ((float_X(1.0) - delta) * chi); + + return factor * (float_X(1.0) - delta) / delta + * (this->curF_1[z] + float_X(1.5) * delta * chi * z * this->curF_2[z]); + } + else + { + // classical + const float_X z = float_X(2.0 / 3.0) * delta / chi; + + return factor / delta * this->curF_1[z]; + } + } + + /** Get the *scaled* photon emission probability + * + * The scaling avoids an infrared divergence. + * + * @param deltaScaled scaled and normalized (to the electron energy) photon energy + * @param chi quantum-nonlinearity parameter + * @param gamma electron gamma + */ + DINLINE float_X + emission_prob_scaled(const float_X deltaScaled, const float_X chi, const float_X gamma) const + { + const float_X delta = deltaScaled * deltaScaled * deltaScaled; + return float_X(3.0) * deltaScaled * deltaScaled * emission_prob(delta, chi, gamma); + } + + /** Return the number of target particles to create from each source particle. + * + * Called for each frame of the source species. + * + * @param sourceFrame Frame of the source species + * @param localIdx Index of the source particle within frame + * @return number of particle to be created from each source particle + */ + template + DINLINE unsigned int numNewParticles(const T_Acc& acc, FrameType& sourceFrame, int localIdx) + { + using namespace pmacc::algorithms; + + auto particle = sourceFrame[localIdx]; + + /* particle position, used for field-to-particle interpolation */ + const floatD_X pos = particle[position_]; + const int particleCellIdx = particle[localCellIdx_]; + /* multi-dim coordinate of the local cell inside the super cell */ + DataSpace localCell( + DataSpaceOperations::template map(particleCellIdx)); + /* interpolation of E-field on the particle position */ + const picongpu::traits::FieldPosition fieldPosE; + ValueType_E fieldE + = Field2ParticleInterpolation()(cachedE.shift(localCell).toCursor(), pos, fieldPosE()); + /* interpolation of B-field on the particle position */ + const picongpu::traits::FieldPosition fieldPosB; + ValueType_B fieldB + = Field2ParticleInterpolation()(cachedB.shift(localCell).toCursor(), pos, fieldPosB()); + + /* All computation below is in the single "real" particle picture. + * The macroparticle weighting factor is reintroduced at the end of this code block. */ + const float3_X mom = particle[momentum_] / particle[weighting_]; + const float_X mom2 = pmacc::math::dot(mom, mom); + const float3_X mom_norm = mom * math::rsqrt(mom2); + const float_X mass = frame::getMass(); + + const float_X gamma = Gamma<>()(mom, mass); + const float3_X vel = mom / (gamma * mass); // low accuracy? + + const float3_X lorentzForceOverCharge = fieldE + pmacc::math::cross(vel, fieldB); + const float_X lorentzForceOverCharge2 + = pmacc::math::dot(lorentzForceOverCharge, lorentzForceOverCharge); + const float_X fieldE_long = pmacc::math::dot(mom_norm, fieldE); + + // effective magnetic strength (in cgs) + const float_X H_eff = math::sqrt(lorentzForceOverCharge2 - fieldE_long * fieldE_long); + + const float_X charge = math::abs(frame::getCharge()); + + const float_X c = SPEED_OF_LIGHT; + // Schwinger limit, unit: V/m (in cgs) + const float_X E_S = mass * mass * c * c * c / (charge * HBAR); + // quantum-nonlinearity parameter + const float_X chi = gamma * H_eff / E_S; + + const float_X deltaScaled = this->randomGen(acc); + + const float_X x = emission_prob_scaled(deltaScaled, chi, gamma); + + // raise a warning if the emission probability is too high. + if(picLog::log_level & picLog::CRITICAL::lvl) + { + if(x > float_X(SINGLE_EMISSION_PROB_LIMIT)) + { + const float_X delta = deltaScaled * deltaScaled * deltaScaled; + printf( + "[SynchrotronPhotons] warning: emission probability is too high: p = %g, at delta = " + "%g, chi = %g, gamma = %g\n", + x, + delta, + chi, + gamma); + } + } + + if(this->randomGen(acc) < x) + { + const float_X delta = deltaScaled * deltaScaled * deltaScaled; + const float_X photonMom_abs = delta * mass * c * gamma; + if(photonMom_abs > SOFT_PHOTONS_CUTOFF_MOM) + { + this->photon_mom = mom_norm * photonMom_abs * particle[weighting_]; + return 1; + } + } + + return 0; + } + + /** Functor implementation: setting photon and electron properties + * + * Called once for each single particle creation. + * + * \tparam Electron type of electron which creates the photon + * \tparam Photon type of photon that is created + */ + template + DINLINE void operator()(const T_Acc& acc, Electron& electron, Photon& photon) const + { + namespace parOp = pmacc::particles::operations; + auto destPhoton = parOp::deselect>(photon); + parOp::assign(destPhoton, parOp::deselect(electron)); + + photon[multiMask_] = 1; + photon[momentum_] = this->photon_mom; + electron[momentum_] -= this->photon_mom; + } + }; + + } // namespace synchrotronPhotons + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/synchrotronPhotons/SynchrotronFunctions.hpp b/include/picongpu/particles/synchrotronPhotons/SynchrotronFunctions.hpp index fafab8ca0b..f1aefaefce 100644 --- a/include/picongpu/particles/synchrotronPhotons/SynchrotronFunctions.hpp +++ b/include/picongpu/particles/synchrotronPhotons/SynchrotronFunctions.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -32,97 +32,93 @@ namespace picongpu { -namespace particles -{ -namespace synchrotronPhotons -{ - -namespace detail -{ - -/** Map `x` to the internal lookup table and return the result of the - * first or the second synchrotron function for `x`. - */ -struct MapToLookupTable -{ - using LinInterpCursor = typename ::pmacc::result_of::Functor< - ::pmacc::cursor::tools::LinearInterp, - ::pmacc::cursor::BufferCursor - >::type; - - using type = float_X; - - LinInterpCursor linInterpCursor; - - /** constructor - * - * @param linInterpCursor lookup table of the first or the second - * synchrotron function. - */ - HDINLINE MapToLookupTable(LinInterpCursor linInterpCursor) - : linInterpCursor(linInterpCursor) {} - - /** Returns F_1(x) or F_2(x) - - * @param x position of the synchrotron function to be evaluated - */ - HDINLINE float_X operator()(const float_X x) const; -}; - -using SyncFuncCursor = ::pmacc::cursor::Cursor< - MapToLookupTable, - ::pmacc::cursor::PlusNavigator, - float_X ->; - -} // namespace detail - - -/** Lookup table for synchrotron functions. - * - * Provides cursors for the first and the second synchrotron function - */ -class SynchrotronFunctions -{ -public: - using SyncFuncCursor = detail::SyncFuncCursor; -private: - - using MyBuf = boost::shared_ptr >; - MyBuf dBuf_SyncFuncs[2]; // two synchrotron functions - - struct BesselK + namespace particles { - template - void operator()(const T_State &x, T_State &dxdt, T_Time t) const + namespace synchrotronPhotons { - dxdt[0] = boost::math::tr1::cyl_bessel_k(5.0/3.0, t); - } - }; - - /** First synchrotron function - */ - HINLINE float_64 F_1(const float_64 x) const; - /** Second synchrotron function - */ - HINLINE float_64 F_2(const float_64 x) const; - -public: - enum Select - { - first=0, second=1 - }; - - HINLINE void init(); - /** Return a cursor representing a synchrotron function - * - * @param syncFunction first or second synchrotron function - * @see: SynchrotronFunctions::Select - */ - HINLINE SyncFuncCursor getCursor(Select syncFunction) const; - -}; // class SynchrotronFunctions - -} // namespace synchrotronPhotons -} // namespace particles + namespace detail + { + /** Map `x` to the internal lookup table and return the result of the + * first or the second synchrotron function for `x`. + */ + struct MapToLookupTable + { + using LinInterpCursor = typename ::pmacc::result_of::Functor< + ::pmacc::cursor::tools::LinearInterp, + ::pmacc::cursor::BufferCursor>::type; + + using type = float_X; + + LinInterpCursor linInterpCursor; + + /** constructor + * + * @param linInterpCursor lookup table of the first or the second + * synchrotron function. + */ + HDINLINE MapToLookupTable(LinInterpCursor linInterpCursor) : linInterpCursor(linInterpCursor) + { + } + + /** Returns F_1(x) or F_2(x) + + * @param x position of the synchrotron function to be evaluated + */ + HDINLINE float_X operator()(const float_X x) const; + }; + + using SyncFuncCursor + = ::pmacc::cursor::Cursor; + + } // namespace detail + + + /** Lookup table for synchrotron functions. + * + * Provides cursors for the first and the second synchrotron function + */ + class SynchrotronFunctions + { + public: + using SyncFuncCursor = detail::SyncFuncCursor; + + private: + using MyBuf = boost::shared_ptr>; + MyBuf dBuf_SyncFuncs[2]; // two synchrotron functions + + struct BesselK + { + template + void operator()(const T_State& x, T_State& dxdt, T_Time t) const + { + dxdt[0] = boost::math::tr1::cyl_bessel_k(5.0 / 3.0, t); + } + }; + + /** First synchrotron function + */ + HINLINE float_64 F_1(const float_64 x) const; + /** Second synchrotron function + */ + HINLINE float_64 F_2(const float_64 x) const; + + public: + enum Select + { + first = 0, + second = 1 + }; + + HINLINE void init(); + /** Return a cursor representing a synchrotron function + * + * @param syncFunction first or second synchrotron function + * @see: SynchrotronFunctions::Select + */ + HINLINE SyncFuncCursor getCursor(Select syncFunction) const; + + }; // class SynchrotronFunctions + + } // namespace synchrotronPhotons + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/synchrotronPhotons/SynchrotronFunctions.tpp b/include/picongpu/particles/synchrotronPhotons/SynchrotronFunctions.tpp index 62b93c2bcd..15a8a0ff90 100644 --- a/include/picongpu/particles/synchrotronPhotons/SynchrotronFunctions.tpp +++ b/include/picongpu/particles/synchrotronPhotons/SynchrotronFunctions.tpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -22,127 +22,125 @@ #include "picongpu/particles/synchrotronPhotons/SynchrotronFunctions.hpp" #include "picongpu/simulation_defines.hpp" #include -#if( BOOST_VERSION == 106400 ) - /* `array_wrapper.hpp` must be included before `integrate.hpp` to avoid - * the error - * `boost/numeric/ublas/matrix.hpp(5977): error: namespace "boost::serialization" has no member "make_array"` - * in boost 1.64.0 - * see boost issue https://svn.boost.org/trac/boost/ticket/12516 - */ -# include +#if(BOOST_VERSION == 106400) +/* `array_wrapper.hpp` must be included before `integrate.hpp` to avoid + * the error + * `boost/numeric/ublas/matrix.hpp(5977): error: namespace "boost::serialization" has no member "make_array"` + * in boost 1.64.0 + * see boost issue https://svn.boost.org/trac/boost/ticket/12516 + */ +# include #endif #include namespace picongpu { -namespace particles -{ -namespace synchrotronPhotons -{ - -namespace detail -{ - -/** Returns F_1(x) or F_2(x) - - * @param x position of the synchrotron function to be evaluated - */ -HDINLINE float_X MapToLookupTable::operator()(const float_X x) const -{ - /* This mapping increases the sample point density for small values of x - * where the synchrotron functions have a divergent slope. Without this mapping - * the emission probabilty of low-energy photons is underestimated. - * - * This is the inverse mapping of the mapping in @see:`SynchrotronFunctions::init()` - */ - const float_X x_m = math::pow(x, float_X(1.0/3.0)); - - const float_X cutOff = static_cast(SYNC_FUNCS_CUTOFF); - - if(x_m >= cutOff) - return float_X(0.0); - else - return this->linInterpCursor[x_m / static_cast(SYNC_FUNCS_STEP_WIDTH)]; -} - -} // namespace detail - - -/** First synchrotron function - */ -float_64 SynchrotronFunctions::F_1(const float_64 x) const -{ - if(x == float_64(0.0)) - return float_64(0.0); - - using namespace boost::numeric::odeint; - using state_type = boost::array; - - state_type integral_result = {0.0}; - const float_64 upper_bound(SYNC_FUNCS_F1_INTEGRAL_BOUND); - const float_64 stepwidth(SYNC_FUNCS_BESSEL_INTEGRAL_STEPWIDTH); - integrate(BesselK(), integral_result, x, upper_bound, stepwidth); - - return x * integral_result[0]; -} -/** Second synchrotron function - */ -float_64 SynchrotronFunctions::F_2(const float_64 x) const -{ - if(x == float_64(0.0)) - return float_64(0.0); - - return x * boost::math::tr1::cyl_bessel_k(2.0/3.0, x); -} - - -void SynchrotronFunctions::init() -{ - const uint32_t numSamples = SYNC_FUNCS_NUM_SAMPLES; - - this->dBuf_SyncFuncs[first] = MyBuf(new pmacc::container::DeviceBuffer(numSamples)); - this->dBuf_SyncFuncs[second] = MyBuf(new pmacc::container::DeviceBuffer(numSamples)); - - pmacc::container::HostBuffer hBuf_F_1(numSamples); - pmacc::container::HostBuffer hBuf_F_2(numSamples); - - for(uint32_t sampleIdx = 0u; sampleIdx < numSamples; sampleIdx++) + namespace particles { - const float_64 x_m = float_64(sampleIdx) * SYNC_FUNCS_STEP_WIDTH; - /* This mapping increases the sample point density for small values of x - * where the synchrotron functions have a divergent slope. Without this mapping - * the emission probabilty of low-energy photons is underestimated. - */ - const float_64 x = x_m * x_m * x_m; - - hBuf_F_1.origin()[sampleIdx] = static_cast(this->F_1(x)); - hBuf_F_2.origin()[sampleIdx] = static_cast(this->F_2(x)); - } - - *this->dBuf_SyncFuncs[first] = hBuf_F_1; - *this->dBuf_SyncFuncs[second] = hBuf_F_2; -} - -/** Return a cursor representing a synchrotron function - * - * @param syncFunction first or second synchrotron function - * @see: SynchrotronFunctions::Select - */ -SynchrotronFunctions::SyncFuncCursor -SynchrotronFunctions::getCursor(SynchrotronFunctions::Select syncFunction) const -{ - using namespace pmacc; - - detail::MapToLookupTable::LinInterpCursor linInterpCursor = - cursor::tools::LinearInterp()(this->dBuf_SyncFuncs[syncFunction]->origin()); - - return cursor::make_Cursor( - detail::MapToLookupTable(linInterpCursor), - cursor::PlusNavigator(), - float_X(0.0)); -} - -} // namespace synchrotronPhotons -} // namespace particles + namespace synchrotronPhotons + { + namespace detail + { + /** Returns F_1(x) or F_2(x) + + * @param x position of the synchrotron function to be evaluated + */ + HDINLINE float_X MapToLookupTable::operator()(const float_X x) const + { + /* This mapping increases the sample point density for small values of x + * where the synchrotron functions have a divergent slope. Without this mapping + * the emission probabilty of low-energy photons is underestimated. + * + * This is the inverse mapping of the mapping in @see:`SynchrotronFunctions::init()` + */ + const float_X x_m = math::pow(x, float_X(1.0 / 3.0)); + + const float_X cutOff = static_cast(SYNC_FUNCS_CUTOFF); + + if(x_m >= cutOff) + return float_X(0.0); + else + return this->linInterpCursor[x_m / static_cast(SYNC_FUNCS_STEP_WIDTH)]; + } + + } // namespace detail + + + /** First synchrotron function + */ + float_64 SynchrotronFunctions::F_1(const float_64 x) const + { + if(x == float_64(0.0)) + return float_64(0.0); + + using namespace boost::numeric::odeint; + using state_type = boost::array; + + state_type integral_result = {0.0}; + const float_64 upper_bound(SYNC_FUNCS_F1_INTEGRAL_BOUND); + const float_64 stepwidth(SYNC_FUNCS_BESSEL_INTEGRAL_STEPWIDTH); + integrate(BesselK(), integral_result, x, upper_bound, stepwidth); + + return x * integral_result[0]; + } + /** Second synchrotron function + */ + float_64 SynchrotronFunctions::F_2(const float_64 x) const + { + if(x == float_64(0.0)) + return float_64(0.0); + + return x * boost::math::tr1::cyl_bessel_k(2.0 / 3.0, x); + } + + + void SynchrotronFunctions::init() + { + const uint32_t numSamples = SYNC_FUNCS_NUM_SAMPLES; + + this->dBuf_SyncFuncs[first] = MyBuf(new pmacc::container::DeviceBuffer(numSamples)); + this->dBuf_SyncFuncs[second] = MyBuf(new pmacc::container::DeviceBuffer(numSamples)); + + pmacc::container::HostBuffer hBuf_F_1(numSamples); + pmacc::container::HostBuffer hBuf_F_2(numSamples); + + for(uint32_t sampleIdx = 0u; sampleIdx < numSamples; sampleIdx++) + { + const float_64 x_m = float_64(sampleIdx) * SYNC_FUNCS_STEP_WIDTH; + /* This mapping increases the sample point density for small values of x + * where the synchrotron functions have a divergent slope. Without this mapping + * the emission probabilty of low-energy photons is underestimated. + */ + const float_64 x = x_m * x_m * x_m; + + hBuf_F_1.origin()[sampleIdx] = static_cast(this->F_1(x)); + hBuf_F_2.origin()[sampleIdx] = static_cast(this->F_2(x)); + } + + *this->dBuf_SyncFuncs[first] = hBuf_F_1; + *this->dBuf_SyncFuncs[second] = hBuf_F_2; + } + + /** Return a cursor representing a synchrotron function + * + * @param syncFunction first or second synchrotron function + * @see: SynchrotronFunctions::Select + */ + SynchrotronFunctions::SyncFuncCursor SynchrotronFunctions::getCursor( + SynchrotronFunctions::Select syncFunction) const + { + using namespace pmacc; + + detail::MapToLookupTable::LinInterpCursor linInterpCursor + = cursor::tools::LinearInterp()(this->dBuf_SyncFuncs[syncFunction]->origin()); + + return cursor::make_Cursor( + detail::MapToLookupTable(linInterpCursor), + cursor::PlusNavigator(), + float_X(0.0)); + } + + } // namespace synchrotronPhotons + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/traits/GenerateSolversIfSpeciesEligible.hpp b/include/picongpu/particles/traits/GenerateSolversIfSpeciesEligible.hpp index 62da48791a..581655db2e 100644 --- a/include/picongpu/particles/traits/GenerateSolversIfSpeciesEligible.hpp +++ b/include/picongpu/particles/traits/GenerateSolversIfSpeciesEligible.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -30,62 +30,47 @@ namespace picongpu { -namespace particles -{ -namespace traits -{ - /** Return a list of Solvers specialized to all matching species - * - * Solvers can define the trait SpeciesEligibleForSolver to check a - * particle species if it fulfills requirements of the solver. - * - * The compile-time factory here returns a list of particle solvers (of the - * same solver given by T_Solver), but fully specialized with matching - * particle species from a sequence of species (T_SeqSpecies). - * - * @tparam T_Solver a particle solver which shall be specialized for all - * eligible particle species - * @tparam T_SeqSpecies a sequence of particle species to check if they are - * eligible to specialize T_Solver, also allows a - * single type instead of a sequence - * @tparam T_Eligible allows to specialize a solver but only if the check - * of the T_Eligible class fulfills the - * SpeciesEligibleForSolver trait, per default the - * T_Solver argument is checked - */ - template< - typename T_Solver, - typename T_SeqSpecies, - typename T_Eligible = T_Solver - > - struct GenerateSolversIfSpeciesEligible + namespace particles { - // wrap single arguments to sequence - using SeqSpecies = typename pmacc::ToSeq< T_SeqSpecies >::type; - // unspecialized solver - using Solver = T_Solver; - - template< typename T_Species > - struct Op : bmpl::apply1< - Solver, - T_Species - > + namespace traits { - }; + /** Return a list of Solvers specialized to all matching species + * + * Solvers can define the trait SpeciesEligibleForSolver to check a + * particle species if it fulfills requirements of the solver. + * + * The compile-time factory here returns a list of particle solvers (of the + * same solver given by T_Solver), but fully specialized with matching + * particle species from a sequence of species (T_SeqSpecies). + * + * @tparam T_Solver a particle solver which shall be specialized for all + * eligible particle species + * @tparam T_SeqSpecies a sequence of particle species to check if they are + * eligible to specialize T_Solver, also allows a + * single type instead of a sequence + * @tparam T_Eligible allows to specialize a solver but only if the check + * of the T_Eligible class fulfills the + * SpeciesEligibleForSolver trait, per default the + * T_Solver argument is checked + */ + template + struct GenerateSolversIfSpeciesEligible + { + // wrap single arguments to sequence + using SeqSpecies = typename pmacc::ToSeq::type; + // unspecialized solver + using Solver = T_Solver; + + template + struct Op : bmpl::apply1 + { + }; - using SeqEligibleSpecies = typename bmpl::copy_if< - SeqSpecies, - particles::traits::SpeciesEligibleForSolver< - bmpl::_1, - T_Eligible - > - >::type; + using SeqEligibleSpecies = typename bmpl:: + copy_if>::type; - using type = typename bmpl::transform< - SeqEligibleSpecies, - Op< bmpl::_1 > - >::type; - }; -} // namespace traits -} // namespace particles + using type = typename bmpl::transform>::type; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/traits/GetAtomicNumbers.hpp b/include/picongpu/particles/traits/GetAtomicNumbers.hpp index 152c1c40d7..e1966efb0c 100644 --- a/include/picongpu/particles/traits/GetAtomicNumbers.hpp +++ b/include/picongpu/particles/traits/GetAtomicNumbers.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten, Rene Widera +/* Copyright 2015-2021 Marco Garten, Rene Widera * * This file is part of PIConGPU. * @@ -27,20 +27,20 @@ namespace picongpu { -namespace traits -{ -template -struct GetAtomicNumbers -{ - using FrameType = typename T_Species::FrameType; + namespace traits + { + template + struct GetAtomicNumbers + { + using FrameType = typename T_Species::FrameType; - using hasAtomicNumbers = typename HasFlag >::type; - /* throw static assert if species has no protons or neutrons */ - PMACC_CASSERT_MSG(This_species_has_no_atomic_numbers,hasAtomicNumbers::value==true); + using hasAtomicNumbers = typename HasFlag>::type; + /* throw static assert if species has no protons or neutrons */ + PMACC_CASSERT_MSG(This_species_has_no_atomic_numbers, hasAtomicNumbers::value == true); - using FoundAtomicNumbersAlias = typename GetFlagType >::type; - using type = typename pmacc::traits::Resolve::type; -}; -} //namespace traits + using FoundAtomicNumbersAlias = typename GetFlagType>::type; + using type = typename pmacc::traits::Resolve::type; + }; + } // namespace traits -}// namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/particles/traits/GetCurrentSolver.hpp b/include/picongpu/particles/traits/GetCurrentSolver.hpp index 9bd2caf73d..dde3027e65 100644 --- a/include/picongpu/particles/traits/GetCurrentSolver.hpp +++ b/include/picongpu/particles/traits/GetCurrentSolver.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -25,15 +25,14 @@ namespace picongpu { -namespace traits -{ -template -struct GetCurrentSolver -{ - using type = typename pmacc::traits::Resolve< - typename GetFlagType >::type - >::type; -}; -} //namespace traits + namespace traits + { + template + struct GetCurrentSolver + { + using type = typename pmacc::traits::Resolve< + typename GetFlagType>::type>::type; + }; + } // namespace traits -}// namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/particles/traits/GetDensityRatio.hpp b/include/picongpu/particles/traits/GetDensityRatio.hpp index 76f9ab6624..ede76820da 100644 --- a/include/picongpu/particles/traits/GetDensityRatio.hpp +++ b/include/picongpu/particles/traits/GetDensityRatio.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Richard Pausch +/* Copyright 2015-2021 Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -29,38 +29,30 @@ namespace picongpu { -namespace traits -{ - -namespace detail -{ - value_identifier(float_X, DefaultDensityRatio, 1.0); -} // namespace detail - - -/** get density ratio of a species - * - * ratio is set to 1.0 if no alias `densityRatio<>` is defined - * - * @treturn ::type `value_identifier` with the default density - */ -template -struct GetDensityRatio -{ - using FrameType = typename T_Species::FrameType; - typedef typename HasFlag >::type hasDensityRatio; - typedef typename pmacc::traits::Resolve< - typename GetFlagType< - FrameType, densityRatio<> - >::type - >::type DensityRatioOfSpecies; - - typedef typename bmpl::if_< - hasDensityRatio, - DensityRatioOfSpecies, - detail::DefaultDensityRatio - >::type type; -}; - -} // namespace traits + namespace traits + { + namespace detail + { + value_identifier(float_X, DefaultDensityRatio, 1.0); + } // namespace detail + + + /** get density ratio of a species + * + * ratio is set to 1.0 if no alias `densityRatio<>` is defined + * + * @treturn ::type `value_identifier` with the default density + */ + template + struct GetDensityRatio + { + using FrameType = typename T_Species::FrameType; + typedef typename HasFlag>::type hasDensityRatio; + typedef typename pmacc::traits::Resolve>::type>::type + DensityRatioOfSpecies; + + typedef typename bmpl::if_::type type; + }; + + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/particles/traits/GetEffectiveNuclearCharge.hpp b/include/picongpu/particles/traits/GetEffectiveNuclearCharge.hpp index fe673cf671..ab88a6f38c 100644 --- a/include/picongpu/particles/traits/GetEffectiveNuclearCharge.hpp +++ b/include/picongpu/particles/traits/GetEffectiveNuclearCharge.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten, Rene Widera, Axel Huebl +/* Copyright 2015-2021 Marco Garten, Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -28,30 +28,31 @@ namespace picongpu { -namespace traits -{ - template - struct GetEffectiveNuclearCharge + namespace traits { - using SpeciesType = T_Species; - using FrameType = typename SpeciesType::FrameType; + template + struct GetEffectiveNuclearCharge + { + using SpeciesType = T_Species; + using FrameType = typename SpeciesType::FrameType; - using hasEffectiveNuclearCharge = typename HasFlag >::type; - /* throw static assert if species has no predefined effective atomic numbers */ - PMACC_CASSERT_MSG(No_effective_atomic_numbers_are_defined_for_this_species,hasEffectiveNuclearCharge::value==true); + using hasEffectiveNuclearCharge = typename HasFlag>::type; + /* throw static assert if species has no predefined effective atomic numbers */ + PMACC_CASSERT_MSG( + No_effective_atomic_numbers_are_defined_for_this_species, + hasEffectiveNuclearCharge::value == true); - using FoundEffectiveNuclearChargeAlias = typename GetFlagType >::type; - /* Extract vector of effective atomic numbers */ - using type = typename pmacc::traits::Resolve::type; + using FoundEffectiveNuclearChargeAlias = typename GetFlagType>::type; + /* Extract vector of effective atomic numbers */ + using type = typename pmacc::traits::Resolve::type; - static constexpr int protonNumber = static_cast(GetAtomicNumbers::type::numberOfProtons); - /* length of the ionization energy vector */ - static constexpr int vecLength = type::dim; - /* assert that the number of arguments in the vector equal the proton number */ - PMACC_CASSERT_MSG( - __The_given_number_of_effective_atomic_numbers_Z_eff_should_be_exactly_the_proton_number_of_the_species__, - vecLength == protonNumber - ); - }; -} // namespace traits + static constexpr int protonNumber = static_cast(GetAtomicNumbers::type::numberOfProtons); + /* length of the ionization energy vector */ + static constexpr int vecLength = type::dim; + /* assert that the number of arguments in the vector equal the proton number */ + PMACC_CASSERT_MSG( + __The_given_number_of_effective_atomic_numbers_Z_eff_should_be_exactly_the_proton_number_of_the_species__, + vecLength == protonNumber); + }; + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/particles/traits/GetExchangeMemCfg.hpp b/include/picongpu/particles/traits/GetExchangeMemCfg.hpp index cac77438e4..47056999ea 100644 --- a/include/picongpu/particles/traits/GetExchangeMemCfg.hpp +++ b/include/picongpu/particles/traits/GetExchangeMemCfg.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -30,41 +30,31 @@ namespace picongpu { -namespace traits -{ - - /** get a memory configuration for species exchange buffer - * - * If exchangeMemCfg is not defined for a species than the default memory - * exchange size from the file memory.param are used. - * - * @tparam T_Species picongpu::Particles, type of the species - * @return class with buffer sizes for each direction - */ - template< typename T_Species > - struct GetExchangeMemCfg + namespace traits { - using FrameType = typename T_Species::FrameType; - using hasMemCfg = typename HasFlag< - FrameType, - exchangeMemCfg< > - >::type; - - using type = typename bmpl::if_< - hasMemCfg, - typename pmacc::traits::Resolve< - typename GetFlagType< - FrameType, - exchangeMemCfg< > - >::type - >::type, - ::picongpu::DefaultExchangeMemCfg - >::type; - }; - - //! short hand traits for GetExchangeMemCfg - template< typename T_Species > - using GetExchangeMemCfg_t = typename traits::GetExchangeMemCfg< T_Species >::type; - -} // namespace traits + /** get a memory configuration for species exchange buffer + * + * If exchangeMemCfg is not defined for a species than the default memory + * exchange size from the file memory.param are used. + * + * @tparam T_Species picongpu::Particles, type of the species + * @return class with buffer sizes for each direction + */ + template + struct GetExchangeMemCfg + { + using FrameType = typename T_Species::FrameType; + using hasMemCfg = typename HasFlag>::type; + + using type = typename bmpl::if_< + hasMemCfg, + typename pmacc::traits::Resolve>::type>::type, + ::picongpu::DefaultExchangeMemCfg>::type; + }; + + //! short hand traits for GetExchangeMemCfg + template + using GetExchangeMemCfg_t = typename traits::GetExchangeMemCfg::type; + + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/particles/traits/GetInterpolation.hpp b/include/picongpu/particles/traits/GetInterpolation.hpp index f9c0145688..f77e652ab4 100644 --- a/include/picongpu/particles/traits/GetInterpolation.hpp +++ b/include/picongpu/particles/traits/GetInterpolation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -25,16 +25,14 @@ namespace picongpu { -namespace traits -{ - -template -struct GetInterpolation -{ - using type = typename pmacc::traits::Resolve< - typename GetFlagType >::type - >::type; -}; -} //namespace traits + namespace traits + { + template + struct GetInterpolation + { + using type = typename pmacc::traits::Resolve< + typename GetFlagType>::type>::type; + }; + } // namespace traits -}// namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/particles/traits/GetIonizationEnergies.hpp b/include/picongpu/particles/traits/GetIonizationEnergies.hpp index dc8bcc247a..504cfb0f16 100644 --- a/include/picongpu/particles/traits/GetIonizationEnergies.hpp +++ b/include/picongpu/particles/traits/GetIonizationEnergies.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Marco Garten, Rene Widera +/* Copyright 2015-2021 Marco Garten, Rene Widera * * This file is part of PIConGPU. * @@ -27,31 +27,32 @@ namespace picongpu { -namespace traits -{ -template -struct GetIonizationEnergies -{ - using SpeciesType = T_Species; - using FrameType = typename SpeciesType::FrameType; + namespace traits + { + template + struct GetIonizationEnergies + { + using SpeciesType = T_Species; + using FrameType = typename SpeciesType::FrameType; - using hasIonizationEnergies = typename HasFlag >::type; - /* throw static assert if species has no protons or neutrons */ - PMACC_CASSERT_MSG(No_ionization_energies_are_defined_for_this_species,hasIonizationEnergies::value==true); + using hasIonizationEnergies = typename HasFlag>::type; + /* throw static assert if species has no protons or neutrons */ + PMACC_CASSERT_MSG( + No_ionization_energies_are_defined_for_this_species, + hasIonizationEnergies::value == true); - using FoundIonizationEnergiesAlias = typename GetFlagType >::type; - /* Extract ionization energy vector from AU namespace */ - using type = typename pmacc::traits::Resolve::type; + using FoundIonizationEnergiesAlias = typename GetFlagType>::type; + /* Extract ionization energy vector from AU namespace */ + using type = typename pmacc::traits::Resolve::type; - static constexpr int protonNumber = static_cast(GetAtomicNumbers::type::numberOfProtons); - /* length of the ionization energy vector */ - static constexpr int vecLength = type::dim; - /* assert that the number of arguments in the vector equal the proton number */ - PMACC_CASSERT_MSG( - __The_given_number_of_ionization_energies_should_be_exactly_the_proton_number_of_the_species__, - vecLength == protonNumber - ); -}; -} //namespace traits + static constexpr int protonNumber = static_cast(GetAtomicNumbers::type::numberOfProtons); + /* length of the ionization energy vector */ + static constexpr int vecLength = type::dim; + /* assert that the number of arguments in the vector equal the proton number */ + PMACC_CASSERT_MSG( + __The_given_number_of_ionization_energies_should_be_exactly_the_proton_number_of_the_species__, + vecLength == protonNumber); + }; + } // namespace traits -}// namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/particles/traits/GetIonizerList.hpp b/include/picongpu/particles/traits/GetIonizerList.hpp index 49f59c3b92..7b38c2aea1 100644 --- a/include/picongpu/particles/traits/GetIonizerList.hpp +++ b/include/picongpu/particles/traits/GetIonizerList.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Marco Garten, Axel Huebl +/* Copyright 2014-2021 Marco Garten, Axel Huebl * * This file is part of PIConGPU. * @@ -30,43 +30,36 @@ namespace picongpu { -namespace particles -{ -namespace traits -{ - /** Returns a sequence with ionizers for a species - * - * Several ionization methods can be assigned to a species which are called - * consecutively (in the same order as the user inputs them) within a single - * time step. - * - * @tparam T_SpeciesType ion species - */ - template< typename T_SpeciesType > - struct GetIonizerList + namespace particles { - using SpeciesType = T_SpeciesType; - using FrameType = typename SpeciesType::FrameType; + namespace traits + { + /** Returns a sequence with ionizers for a species + * + * Several ionization methods can be assigned to a species which are called + * consecutively (in the same order as the user inputs them) within a single + * time step. + * + * @tparam T_SpeciesType ion species + */ + template + struct GetIonizerList + { + using SpeciesType = T_SpeciesType; + using FrameType = typename SpeciesType::FrameType; - // the following line only fetches the alias - using FoundIonizersAlias = typename GetFlagType< - FrameType, - ionizers<> - >::type; + // the following line only fetches the alias + using FoundIonizersAlias = typename GetFlagType>::type; - // this now resolves the alias into the actual object type, a list of ionizers - using FoundIonizerList = typename pmacc::traits::Resolve< FoundIonizersAlias >::type; + // this now resolves the alias into the actual object type, a list of ionizers + using FoundIonizerList = typename pmacc::traits::Resolve::type; - using type = typename pmacc::OperateOnSeq< - FoundIonizerList, - bmpl::apply1< - bmpl::_1, - SpeciesType - >, - pmacc::meta::accessors::Type<> - >::type; - }; + using type = typename pmacc::OperateOnSeq< + FoundIonizerList, + bmpl::apply1, + pmacc::meta::accessors::Type<>>::type; + }; -} // namespace traits -} // namespace particles + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/traits/GetMarginPusher.hpp b/include/picongpu/particles/traits/GetMarginPusher.hpp index e789a33c64..e1ebb80645 100644 --- a/include/picongpu/particles/traits/GetMarginPusher.hpp +++ b/include/picongpu/particles/traits/GetMarginPusher.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Richard Pausch +/* Copyright 2015-2021 Richard Pausch, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -27,36 +27,80 @@ namespace picongpu { + namespace traits + { + /** Get margins of a pusher for species + * + * @tparam T_Species particle species type + * @tparam T_GetLowerMargin lower margin for pusher getter type + * @tparam T_GetUpperMargin upper margin for pusher getter type + */ + template< + typename T_Species, + typename T_GetLowerMargin = GetLowerMargin>, + typename T_GetUpperMargin = GetUpperMargin>> + struct GetMarginPusher + { + using AddLowerMargins = pmacc::math::CT::add>, T_GetLowerMargin>; + using LowerMargin = typename bmpl::apply::type; -namespace traits -{ -template -struct GetMarginPusher -{ - using AddLowerMargins = pmacc::math::CT::add< - GetLowerMargin< GetInterpolation< bmpl::_1 > >, - GetLowerMargin< GetPusher< bmpl::_1 > > - >; - using LowerMargin = typename bmpl::apply::type; - - using AddUpperMargins = pmacc::math::CT::add< - GetUpperMargin< GetInterpolation< bmpl::_1 > >, - GetUpperMargin< GetPusher< bmpl::_1 > > - >; - using UpperMargin = typename bmpl::apply::type; -}; - -template -struct GetLowerMarginPusher -{ - using type = typename traits::GetMarginPusher::LowerMargin; -}; + using AddUpperMargins = pmacc::math::CT::add>, T_GetUpperMargin>; + using UpperMargin = typename bmpl::apply::type; + }; -template -struct GetUpperMarginPusher -{ - using type = typename traits::GetMarginPusher::UpperMargin; -}; + /** Get lower margin of a pusher for species + * + * @tparam T_Species particle species type + */ + template + struct GetLowerMarginPusher + { + using type = typename traits::GetMarginPusher::LowerMargin; + }; + + /** Get lower margin of the given pusher for species + * + * Normally, the pusher does not have to be given explicitly. + * However, it is needed for composite pushers + * + * @tparam T_Species particle species type + * @tparam T_Pusher pusher type + */ + template + struct GetLowerMarginForPusher + { + using type = typename traits::GetMarginPusher< + T_Species, + typename GetLowerMargin::type, + typename GetUpperMargin::type>::LowerMargin; + }; + + /** Get upper margin of a pusher for species + * + * @tparam T_Species particle species type + */ + template + struct GetUpperMarginPusher + { + using type = typename traits::GetMarginPusher::UpperMargin; + }; + + /** Get upper margin of the given pusher for species + * + * Normally, the pusher does not have to be given explicitly. + * However, it is needed for composite pushers + * + * @tparam T_Species particle species type + * @tparam T_Pusher pusher type + */ + template + struct GetUpperMarginForPusher + { + using type = typename traits::GetMarginPusher< + T_Species, + typename GetLowerMargin::type, + typename GetUpperMargin::type>::UpperMargin; + }; -}// namespace traits -}// namespace picongpu + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/particles/traits/GetPhotonCreator.hpp b/include/picongpu/particles/traits/GetPhotonCreator.hpp index 77bd08f287..fc8777fc32 100644 --- a/include/picongpu/particles/traits/GetPhotonCreator.hpp +++ b/include/picongpu/particles/traits/GetPhotonCreator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -31,44 +31,33 @@ namespace picongpu { -namespace particles -{ -namespace traits -{ - - /** Get the functor to create photons from a species - * - * @tparam T_SpeciesType type or name as boost::mpl::string - */ - template< typename T_SpeciesType > - struct GetPhotonCreator + namespace particles { - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - using FrameType = typename SpeciesType::FrameType; - - // The following line only fetches the alias - using FoundSynchrotronPhotonsAlias = typename GetFlagType< - FrameType, - picongpu::synchrotronPhotons<> - >::type; - - // This now resolves the alias into the actual object type and select the species from the species list - using FoundPhotonSpecies = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - typename pmacc::traits::Resolve< FoundSynchrotronPhotonsAlias >::type - >; - - // This specifies the target species as the second template parameter of the photon creator - using type = synchrotronPhotons::PhotonCreator< - SpeciesType, - FoundPhotonSpecies - >; - - }; - -} // namespace traits -} // namespace particles + namespace traits + { + /** Get the functor to create photons from a species + * + * @tparam T_SpeciesType type or name as boost::mpl::string + */ + template + struct GetPhotonCreator + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + using FrameType = typename SpeciesType::FrameType; + + // The following line only fetches the alias + using FoundSynchrotronPhotonsAlias = + typename GetFlagType>::type; + + // This now resolves the alias into the actual object type and select the species from the species list + using FoundPhotonSpecies = pmacc::particles::meta::FindByNameOrType_t< + VectorAllSpecies, + typename pmacc::traits::Resolve::type>; + + // This specifies the target species as the second template parameter of the photon creator + using type = synchrotronPhotons::PhotonCreator; + }; + + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/traits/GetPusher.hpp b/include/picongpu/particles/traits/GetPusher.hpp index bcfe0d0ab4..e16bb2f50f 100644 --- a/include/picongpu/particles/traits/GetPusher.hpp +++ b/include/picongpu/particles/traits/GetPusher.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Richard Pausch +/* Copyright 2014-2021 Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -25,16 +25,14 @@ namespace picongpu { + namespace traits + { + template + struct GetPusher + { + using type = typename pmacc::traits::Resolve< + typename GetFlagType>::type>::type; + }; -namespace traits -{ -template -struct GetPusher -{ - using type = typename pmacc::traits::Resolve< - typename GetFlagType >::type - >::type; -}; - -}// namespace traits -}// namespace picongpu + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/particles/traits/GetShape.hpp b/include/picongpu/particles/traits/GetShape.hpp index 4a4194a4ba..f448ee0ec4 100644 --- a/include/picongpu/particles/traits/GetShape.hpp +++ b/include/picongpu/particles/traits/GetShape.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -25,17 +25,15 @@ namespace picongpu { -namespace traits -{ - -template -struct GetShape -{ - using type = typename pmacc::traits::Resolve< - typename GetFlagType >::type - >::type; -}; + namespace traits + { + template + struct GetShape + { + using type = typename pmacc::traits::Resolve< + typename GetFlagType>::type>::type; + }; -} //namespace traits + } // namespace traits -}// namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/particles/traits/GetSpeciesFlagName.hpp b/include/picongpu/particles/traits/GetSpeciesFlagName.hpp index bf808c9b58..9bce118077 100644 --- a/include/picongpu/particles/traits/GetSpeciesFlagName.hpp +++ b/include/picongpu/particles/traits/GetSpeciesFlagName.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Axel Huebl +/* Copyright 2016-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -29,49 +29,36 @@ namespace picongpu { -namespace traits -{ - /** Get the GetStringProperties "name" attribute of a Species' Flag - * - * Returns the "name" attribute of a species string attribute list as - * std::string and if not present, returns "none". - */ - template< - typename T_Species, - typename T_Flag, - bool T_hasFlag = HasFlag< - typename T_Species::FrameType, - T_Flag - >::type::value - > - struct - GetSpeciesFlagName + namespace traits { - using SpeciesFlag = typename pmacc::traits::Resolve< - typename GetFlagType< - typename T_Species::FrameType, - T_Flag - >::type - >::type; - - std::string operator()() const + /** Get the GetStringProperties "name" attribute of a Species' Flag + * + * Returns the "name" attribute of a species string attribute list as + * std::string and if not present, returns "none". + */ + template< + typename T_Species, + typename T_Flag, + bool T_hasFlag = HasFlag::type::value> + struct GetSpeciesFlagName { - GetStringProperties< SpeciesFlag > stringProps; - return stringProps["name"].value; - } - }; + using SpeciesFlag = typename pmacc::traits::Resolve< + typename GetFlagType::type>::type; - template< - typename T_Species, - typename T_Flag - > - struct - GetSpeciesFlagName - { - std::string operator()() const + std::string operator()() const + { + GetStringProperties stringProps; + return stringProps["name"].value; + } + }; + + template + struct GetSpeciesFlagName { - return "none"; - } - }; -} // namespace traits + std::string operator()() const + { + return "none"; + } + }; + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/particles/traits/HasIonizersWithRNG.hpp b/include/picongpu/particles/traits/HasIonizersWithRNG.hpp deleted file mode 100644 index c23fff39f5..0000000000 --- a/include/picongpu/particles/traits/HasIonizersWithRNG.hpp +++ /dev/null @@ -1,85 +0,0 @@ -/* Copyright 2017-2020 Axel Huebl, Marco Garten - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/simulation_defines.hpp" - -#include -#include -#include - -#include "picongpu/traits/UsesRNG.hpp" -#include "picongpu/particles/traits/GetIonizerList.hpp" - -#include -#include -#include - - -namespace picongpu -{ -namespace particles -{ -namespace traits -{ - /** Check Ionizers for RNG Need - * - * Checks all species for ionizers and within those if a random number generator is needed. - * Returns a true-valued boost integral constant in ::type if a RNG is needed. - * - * @tparam T_VectorSpecies sequence of (ion) species to check ionizers for - */ - template< typename T_VectorSpecies > - struct HasIonizersWithRNG - { - using VectorSpecies = T_VectorSpecies; - - // make a list of all species that can be ionized - using VectorSpeciesWithIonizer = typename pmacc::particles::traits::FilterByFlag< - VectorSpecies, - ionizers<> - >::type; - - // make a list of all ionizers that will be used by all species - using AllUsedIonizers = typename pmacc::MakeSeqFromNestedSeq< - typename pmacc::OperateOnSeq< - VectorSpeciesWithIonizer, - GetIonizerList< bmpl::_1 > - >::type - >::type; - - /* make a list of `boost::true_type`s and `boost::false_type`s for species that use or do - * not use the RNG during ionization - */ - using AllIonizersUsingRNG = typename pmacc::OperateOnSeq< - AllUsedIonizers, - picongpu::traits::UsesRNG< bmpl::_1 > - >::type; - - // check if at least one RNG is needed - using type = typename boost::mpl::contains< - AllIonizersUsingRNG, - boost::true_type - >::type; - }; - -} // namespace traits -} // namespace particles -} // namespace picongpu diff --git a/include/picongpu/particles/traits/MacroWeighted.hpp b/include/picongpu/particles/traits/MacroWeighted.hpp index efee5f473c..2fe8ba7d63 100644 --- a/include/picongpu/particles/traits/MacroWeighted.hpp +++ b/include/picongpu/particles/traits/MacroWeighted.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Axel Huebl +/* Copyright 2016-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -19,36 +19,36 @@ #pragma once -//include "simulation_defines.hpp" +// include "simulation_defines.hpp" #include namespace picongpu { -namespace traits -{ - /** Describe if a particle attribute describes the quantity of a macro - * particle - * - * Depending on the implementation of an attribute, it might be sometimes - * useful to return a quantity regarding one of the underlying real - * particles (false: "this attribute is not weighted accordingly for the - * whole ensemble of particles in its macro particle) or just handle the - * whole macro particle at once - * (true: "this attribute is already weighted"). - * - * This trait defines for each attribute if it needs to be scaled with the - * weighting. *How* the scaling with weighting is applied can be seen in - * \see WeightingPower - * \see http://www.openPMD.org - * \see http://dx.doi.org/10.5281/zenodo.33624 - * \see https://git.io/vwlWa - * - * \tparam T_Identifier any picongpu identifier - * \return \p bool ::get() as static public method - * - */ - template - struct MacroWeighted; + namespace traits + { + /** Describe if a particle attribute describes the quantity of a macro + * particle + * + * Depending on the implementation of an attribute, it might be sometimes + * useful to return a quantity regarding one of the underlying real + * particles (false: "this attribute is not weighted accordingly for the + * whole ensemble of particles in its macro particle) or just handle the + * whole macro particle at once + * (true: "this attribute is already weighted"). + * + * This trait defines for each attribute if it needs to be scaled with the + * weighting. *How* the scaling with weighting is applied can be seen in + * \see WeightingPower + * \see http://www.openPMD.org + * \see http://dx.doi.org/10.5281/zenodo.33624 + * \see https://git.io/vwlWa + * + * \tparam T_Identifier any picongpu identifier + * \return \p bool ::get() as static public method + * + */ + template + struct MacroWeighted; -} // namespace traits + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/particles/traits/SpeciesEligibleForSolver.hpp b/include/picongpu/particles/traits/SpeciesEligibleForSolver.hpp index 0f3603bd03..c27d52c2f7 100644 --- a/include/picongpu/particles/traits/SpeciesEligibleForSolver.hpp +++ b/include/picongpu/particles/traits/SpeciesEligibleForSolver.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -24,27 +24,24 @@ namespace picongpu { -namespace particles -{ -namespace traits -{ - /** Check if species fulfills requirements of a solver - * - * Defines a boost::mpl::bool_ true type is the particle species as all - * requirements fulfilled for a solver. - * - * @tparam T_Species Species to check - * @tparam T_Solver Solver with requirements - */ - template< - typename T_Species, - typename T_Solver - > - struct SpeciesEligibleForSolver + namespace particles { - using type = boost::mpl::bool_< true >; - }; + namespace traits + { + /** Check if species fulfills requirements of a solver + * + * Defines a boost::mpl::bool_ true type is the particle species as all + * requirements fulfilled for a solver. + * + * @tparam T_Species Species to check + * @tparam T_Solver Solver with requirements + */ + template + struct SpeciesEligibleForSolver + { + using type = boost::mpl::bool_; + }; -} // namespace traits -} // namespace particles + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/particles/traits/WeightingPower.hpp b/include/picongpu/particles/traits/WeightingPower.hpp index 507a68ac35..add45ad094 100644 --- a/include/picongpu/particles/traits/WeightingPower.hpp +++ b/include/picongpu/particles/traits/WeightingPower.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Axel Huebl +/* Copyright 2016-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -19,33 +19,33 @@ #pragma once -//include "simulation_defines.hpp" +// include "simulation_defines.hpp" #include namespace picongpu { -namespace traits -{ - /** Describe if a particle attribute describes the quantity of a macro - * particle - * - * Depending on the implementation of an attribute, it might be sometimes - * useful to return a quantity regarding one of the underlying real - * particles (\see MacroWeighted). - * - * This trait defines how each attribute needs to be scaled with the - * weighting (linear, quadratic, ...) to convert between "real" and "macro" - * particle attributes. - * \see http://www.openPMD.org - * \see http://dx.doi.org/10.5281/zenodo.33624 - * \see https://git.io/vwlWa - * - * \tparam T_Identifier any picongpu identifier - * \return \p float_64 ::get() as static public method - * - */ - template - struct WeightingPower; + namespace traits + { + /** Describe if a particle attribute describes the quantity of a macro + * particle + * + * Depending on the implementation of an attribute, it might be sometimes + * useful to return a quantity regarding one of the underlying real + * particles (\see MacroWeighted). + * + * This trait defines how each attribute needs to be scaled with the + * weighting (linear, quadratic, ...) to convert between "real" and "macro" + * particle attributes. + * \see http://www.openPMD.org + * \see http://dx.doi.org/10.5281/zenodo.33624 + * \see https://git.io/vwlWa + * + * \tparam T_Identifier any picongpu identifier + * \return \p float_64 ::get() as static public method + * + */ + template + struct WeightingPower; -} // namespace traits + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/plugins/BinEnergyParticles.hpp b/include/picongpu/plugins/BinEnergyParticles.hpp index 6c6073fd64..0da0bbf8a6 100644 --- a/include/picongpu/plugins/BinEnergyParticles.hpp +++ b/include/picongpu/plugins/BinEnergyParticles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, * Rene Widera, Richard Pausch * * This file is part of PIConGPU. @@ -58,688 +58,489 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -namespace po = boost::program_options; + namespace po = boost::program_options; -/** calculate a energy histogram of a species - * - * if a particle filter is given, only the filtered particles are counted - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelBinEnergyParticles -{ - /* sum up the energy of all particles - * - * the kinetic energy of all active particles will be calculated + /** calculate a energy histogram of a species * - * @tparam T_ParBox pmacc::ParticlesBox, particle box type - * @tparam T_BinBox pmacc::DataBox, box type for the histogram in global memory - * @tparam T_Mapping type of the mapper to map a cuda block to a supercell index - * @tparam T_Acc alpaka accelerator type + * if a particle filter is given, only the filtered particles are counted * - * @param acc alpaka accelerator - * @param pb box with access to the particles of the current used species - * @param gBins box with memory for resulting histogram - * @param numBins number of bins in the histogram (must be fit into the shared memory) - * @param minEnergy particle energy for the first bin - * @param maxEnergy particle energy for the last bin - * @param mapper functor to map a cuda block to a supercells index + * @tparam T_numWorkers number of workers */ - template< - typename T_ParBox, - typename T_BinBox, - typename T_Mapping, - typename T_Filter, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_ParBox pb, - T_BinBox gBins, - int const numBins, - float_X const minEnergy, - float_X const maxEnergy, - T_Mapping const mapper, - T_Filter filter - ) const + template + struct KernelBinEnergyParticles { - using namespace pmacc::mappings::threads; - using SuperCellSize = typename MappingDesc::SuperCellSize; - using FramePtr = typename T_ParBox::FramePtr; - constexpr uint32_t maxParticlesPerFrame = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; - - PMACC_SMEM( - acc, - frame, - FramePtr - ); - - PMACC_SMEM( - acc, - particlesInSuperCell, - lcellId_t - ); - - /* shBins index can go from 0 to (numBins+2)-1 - * 0 is for maxEnergy + /* sum up the energy of all particles + * + * the kinetic energy of all active particles will be calculated + * + * @tparam T_ParBox pmacc::ParticlesBox, particle box type + * @tparam T_BinBox pmacc::DataBox, box type for the histogram in global memory + * @tparam T_Mapping type of the mapper to map a cupla block to a supercell index + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param pb box with access to the particles of the current used species + * @param gBins box with memory for resulting histogram + * @param numBins number of bins in the histogram (must be fit into the shared memory) + * @param minEnergy particle energy for the first bin + * @param maxEnergy particle energy for the last bin + * @param mapper functor to map a cupla block to a supercells index */ - sharedMemExtern(shBin,float_X); /* size must be numBins+2 because we have max */ + template + DINLINE void operator()( + T_Acc const& acc, + T_ParBox pb, + T_BinBox gBins, + int const numBins, + float_X const minEnergy, + float_X const maxEnergy, + T_Mapping const mapper, + T_Filter filter) const + { + using namespace pmacc::mappings::threads; + using SuperCellSize = typename MappingDesc::SuperCellSize; + using FramePtr = typename T_ParBox::FramePtr; + constexpr uint32_t maxParticlesPerFrame = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; + PMACC_SMEM(acc, frame, FramePtr); - int const realNumBins = numBins + 2; + PMACC_SMEM(acc, particlesInSuperCell, lcellId_t); - uint32_t const workerIdx = threadIdx.x; + /* shBins index can go from 0 to (numBins+2)-1 + * 0 is for maxEnergy + */ + sharedMemExtern(shBin, float_X); /* size must be numBins+2 because we have max */ - using MasterOnly = IdxConfig< - 1, - numWorkers - >; - DataSpace< simDim > const superCellIdx( - mapper.getSuperCellIndex( DataSpace< simDim >( blockIdx ) ) - ); + int const realNumBins = numBins + 2; - ForEachIdx< MasterOnly >{ workerIdx }( - [&]( - uint32_t const, - uint32_t const - ) - { - frame = pb.getLastFrame( superCellIdx ); - particlesInSuperCell = pb.getSuperCell( superCellIdx ).getSizeLastFrame( ); - } - ); + uint32_t const workerIdx = cupla::threadIdx(acc).x; - ForEachIdx< - IdxConfig< - numWorkers, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { + using MasterOnly = IdxConfig<1, numWorkers>; + + DataSpace const superCellIdx(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + + ForEachIdx{workerIdx}([&](uint32_t const, uint32_t const) { + frame = pb.getLastFrame(superCellIdx); + particlesInSuperCell = pb.getSuperCell(superCellIdx).getSizeLastFrame(); + }); + + ForEachIdx>{workerIdx}([&](uint32_t const linearIdx, uint32_t const) { /* set all bins to 0 */ - for( int i = linearIdx; i < realNumBins; i += numWorkers ) - shBin[ i ] = float_X( 0. ); - } - ); + for(int i = linearIdx; i < realNumBins; i += numWorkers) + shBin[i] = float_X(0.); + }); - __syncthreads(); + cupla::__syncthreads(acc); - if( !frame.isValid( ) ) - return; /* end kernel if we have no frames */ + if(!frame.isValid()) + return; /* end kernel if we have no frames */ - auto accFilter = filter( - acc, - superCellIdx - mapper.getGuardingSuperCells( ), - WorkerCfg< numWorkers >{ workerIdx } - ); + auto accFilter + = filter(acc, superCellIdx - mapper.getGuardingSuperCells(), WorkerCfg{workerIdx}); - while( frame.isValid() ) - { - // move over all particles in a frame - ForEachIdx< - IdxConfig< - maxParticlesPerFrame, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - if( linearIdx < particlesInSuperCell ) - { - auto const particle = frame[ linearIdx ]; - if( - accFilter( - acc, - particle - ) - ) + while(frame.isValid()) + { + // move over all particles in a frame + ForEachIdx>{workerIdx}( + [&](uint32_t const linearIdx, uint32_t const) { + if(linearIdx < particlesInSuperCell) { - /* kinetic Energy for Particles: E^2 = p^2*c^2 + m^2*c^4 - * = c^2 * [p^2 + m^2*c^2] - */ - float3_X const mom = particle[ momentum_ ]; - float_X const weighting = particle[ weighting_ ]; - float_X const mass = attribute::getMass( - weighting, - particle - ); - - // calculate kinetic energy of the macro particle - float_X localEnergy = KinEnergy< >( )( - mom, - mass - ); - - localEnergy /= weighting; - - /* +1 move value from 1 to numBins+1 */ - int binNumber = math::floor( - ( localEnergy - minEnergy ) / - ( maxEnergy - minEnergy ) * static_cast< float_X >( numBins ) - ) + 1; - - int const maxBin = numBins + 1; - - /* all entries larger than maxEnergy go into bin maxBin */ - binNumber = binNumber < maxBin ? binNumber : maxBin; - - /* all entries smaller than minEnergy go into bin zero */ - binNumber = binNumber > 0 ? binNumber : 0; - - /*!\todo: we can't use 64bit type on this place (NVIDIA BUG?) - * COMPILER ERROR: ptxas /tmp/tmpxft_00005da6_00000000-2_main.ptx, line 4246; error : Global state space expected for instruction 'atom' - * I think this is a problem with extern shared mem and atmic (only on TESLA) - * NEXT BUG: don't do uint32_t w=__float2uint_rn(weighting); and use w for atomic, this create wrong results - * - * uses a normed float weighting to avoid an overflow of the floating point result - * for the reduced weighting if the particle weighting is very large - */ - float_X const normedWeighting = weighting / - float_X( particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE ); - atomicAdd( - &( shBin[ binNumber ] ), - normedWeighting, - ::alpaka::hierarchy::Threads{} - ); + auto const particle = frame[linearIdx]; + if(accFilter(acc, particle)) + { + /* kinetic Energy for Particles: E^2 = p^2*c^2 + m^2*c^4 + * = c^2 * [p^2 + m^2*c^2] + */ + float3_X const mom = particle[momentum_]; + float_X const weighting = particle[weighting_]; + float_X const mass = attribute::getMass(weighting, particle); + + // calculate kinetic energy of the macro particle + float_X localEnergy = KinEnergy<>()(mom, mass); + + localEnergy /= weighting; + + /* +1 move value from 1 to numBins+1 */ + int binNumber = math::floor( + (localEnergy - minEnergy) / (maxEnergy - minEnergy) + * static_cast(numBins)) + + 1; + + int const maxBin = numBins + 1; + + /* all entries larger than maxEnergy go into bin maxBin */ + binNumber = binNumber < maxBin ? binNumber : maxBin; + + /* all entries smaller than minEnergy go into bin zero */ + binNumber = binNumber > 0 ? binNumber : 0; + + /*!\todo: we can't use 64bit type on this place (NVIDIA BUG?) + * COMPILER ERROR: ptxas /tmp/tmpxft_00005da6_00000000-2_main.ptx, line 4246; error : + * Global state space expected for instruction 'atom' I think this is a problem with + * extern shared mem and atmic (only on TESLA) NEXT BUG: don't do uint32_t + * w=__float2uint_rn(weighting); and use w for atomic, this create wrong results + * + * uses a normed float weighting to avoid an overflow of the floating point result + * for the reduced weighting if the particle weighting is very large + */ + float_X const normedWeighting + = weighting / float_X(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE); + cupla::atomicAdd( + acc, + &(shBin[binNumber]), + normedWeighting, + ::alpaka::hierarchy::Threads{}); + } } - } - } - ); + }); - __syncthreads(); + cupla::__syncthreads(acc); - ForEachIdx< MasterOnly >{ workerIdx }( - [&]( - uint32_t const, - uint32_t const - ) - { - frame = pb.getPreviousFrame( frame ); + ForEachIdx{workerIdx}([&](uint32_t const, uint32_t const) { + frame = pb.getPreviousFrame(frame); particlesInSuperCell = maxParticlesPerFrame; - } - ); - __syncthreads(); - } - - ForEachIdx< - IdxConfig< - numWorkers, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - for( int i = linearIdx; i < realNumBins; i += numWorkers ) - atomicAdd( - &( gBins[ i ] ), - float_64( shBin[ i ] ), - ::alpaka::hierarchy::Blocks{} - ); + }); + cupla::__syncthreads(acc); } - ); - } -}; -template -class BinEnergyParticles : public plugins::multi::ISlave -{ -private: + ForEachIdx>{workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + for(int i = linearIdx; i < realNumBins; i += numWorkers) + cupla::atomicAdd(acc, &(gBins[i]), float_64(shBin[i]), ::alpaka::hierarchy::Blocks{}); + }); + } + }; - struct Help : public plugins::multi::IHelp + template + class BinEnergyParticles : public plugins::multi::ISlave { - - /** creates a instance of ISlave - * - * @tparam T_Slave type of the interface implementation (must inherit from ISlave) - * @param help plugin defined help - * @param id index of the plugin, range: [0;help->getNumPlugins()) - */ - std::shared_ptr< ISlave > create( - std::shared_ptr< IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) + private: + struct Help : public plugins::multi::IHelp { - return std::shared_ptr< ISlave >( - new BinEnergyParticles< ParticlesType >( - help, - id, - cellDescription - ) - ); - } + /** creates a instance of ISlave + * + * @tparam T_Slave type of the interface implementation (must inherit from ISlave) + * @param help plugin defined help + * @param id index of the plugin, range: [0;help->getNumPlugins()) + */ + std::shared_ptr create(std::shared_ptr& help, size_t const id, MappingDesc* cellDescription) + { + return std::shared_ptr(new BinEnergyParticles(help, id, cellDescription)); + } - // find all valid filter for the current used species - using EligibleFilters = typename MakeSeqFromNestedSeq< - typename bmpl::transform< + // find all valid filter for the current used species + using EligibleFilters = typename MakeSeqFromNestedSeq - >::type - >::type; - - //! periodicity of computing the particle energy - plugins::multi::Option< std::string > notifyPeriod = { - "period", - "enable plugin [for each n-th step]" - }; - plugins::multi::Option< std::string > filter = { - "filter", - "particle filter: " - }; - plugins::multi::Option< int > numBins = { - "binCount", - "number of bins for the energy range", - 1024 - }; - plugins::multi::Option< float_X > minEnergy_keV = { - "minEnergy", - "minEnergy[in keV]", - 0.0 - }; - plugins::multi::Option< float_X > maxEnergy_keV = { - "maxEnergy", - "maxEnergy[in keV]" - }; - - //! string list with all possible particle filters - std::string concatenatedFilterNames; - std::vector< std::string > allowedFilters; + particles::traits::GenerateSolversIfSpeciesEligible>::type>::type; + + //! periodicity of computing the particle energy + plugins::multi::Option notifyPeriod = {"period", "enable plugin [for each n-th step]"}; + plugins::multi::Option filter = {"filter", "particle filter: "}; + plugins::multi::Option numBins = {"binCount", "number of bins for the energy range", 1024}; + plugins::multi::Option minEnergy_keV = {"minEnergy", "minEnergy[in keV]", 0.0}; + plugins::multi::Option maxEnergy_keV = {"maxEnergy", "maxEnergy[in keV]"}; + + //! string list with all possible particle filters + std::string concatenatedFilterNames; + std::vector allowedFilters; + + ///! method used by plugin controller to get --help description + void registerHelp( + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) + { + meta::ForEach> getEligibleFilterNames; + getEligibleFilterNames(allowedFilters); - ///! method used by plugin controller to get --help description - void registerHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) - { - meta::ForEach< - EligibleFilters, - plugins::misc::AppendName< bmpl::_1 > - > getEligibleFilterNames; - getEligibleFilterNames( allowedFilters ); - - concatenatedFilterNames = plugins::misc::concatenateToString( - allowedFilters, - ", " - ); - - notifyPeriod.registerHelp( - desc, - masterPrefix + prefix - ); - filter.registerHelp( - desc, - masterPrefix + prefix, - std::string( "[" ) + concatenatedFilterNames + "]" - ); - numBins.registerHelp( - desc, - masterPrefix + prefix - ); - minEnergy_keV.registerHelp( - desc, - masterPrefix + prefix - ); - maxEnergy_keV.registerHelp( - desc, - masterPrefix + prefix - ); - } + concatenatedFilterNames = plugins::misc::concatenateToString(allowedFilters, ", "); - void expandHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) - { - } + notifyPeriod.registerHelp(desc, masterPrefix + prefix); + filter.registerHelp(desc, masterPrefix + prefix, std::string("[") + concatenatedFilterNames + "]"); + numBins.registerHelp(desc, masterPrefix + prefix); + minEnergy_keV.registerHelp(desc, masterPrefix + prefix); + maxEnergy_keV.registerHelp(desc, masterPrefix + prefix); + } + void expandHelp( + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) + { + } - void validateOptions() - { - if( notifyPeriod.size() != filter.size() ) - throw std::runtime_error( name + ": parameter filter and period are not used the same number of times" ); - if( notifyPeriod.size() != maxEnergy_keV.size() ) - throw std::runtime_error( name + ": parameter maxEnergy and period are not used the same number of times" ); - // check if user passed filter name are valid - for( auto const & filterName : filter) + void validateOptions() { - if( - std::find( - allowedFilters.begin(), - allowedFilters.end(), - filterName - ) == allowedFilters.end() - ) + if(notifyPeriod.size() != filter.size()) + throw std::runtime_error( + name + ": parameter filter and period are not used the same number of times"); + if(notifyPeriod.size() != maxEnergy_keV.size()) + throw std::runtime_error( + name + ": parameter maxEnergy and period are not used the same number of times"); + + // check if user passed filter name are valid + for(auto const& filterName : filter) { - throw std::runtime_error( name + ": unknown filter '" + filterName + "'" ); + if(std::find(allowedFilters.begin(), allowedFilters.end(), filterName) == allowedFilters.end()) + { + throw std::runtime_error(name + ": unknown filter '" + filterName + "'"); + } } } - } - - size_t getNumPlugins() const - { - return notifyPeriod.size(); - } - - std::string getDescription() const - { - return description; - } - - std::string getOptionPrefix() const - { - return prefix; - } - std::string getName() const - { - return name; - } + size_t getNumPlugins() const + { + return notifyPeriod.size(); + } - std::string const name = "BinEnergyParticles"; - //! short description of the plugin - std::string const description = "calculate a energy histogram of a species"; - //! prefix used for command line arguments - std::string const prefix = ParticlesType::FrameType::getName( ) + std::string( "_energyHistogram" ); - }; + std::string getDescription() const + { + return description; + } - GridBuffer *gBins = nullptr; - MappingDesc *m_cellDescription = nullptr; + std::string getOptionPrefix() const + { + return prefix; + } - std::string filename; + std::string getName() const + { + return name; + } - float_64 * binReduced = nullptr; + std::string const name = "BinEnergyParticles"; + //! short description of the plugin + std::string const description = "calculate a energy histogram of a species"; + //! prefix used for command line arguments + std::string const prefix = ParticlesType::FrameType::getName() + std::string("_energyHistogram"); + }; - int numBins; - int realNumBins; - /* variables for energy limits of the histogram in keV */ - float_X minEnergy_keV; - float_X maxEnergy_keV; + GridBuffer* gBins = nullptr; + MappingDesc* m_cellDescription = nullptr; - std::ofstream outFile; + std::string filename; - /* only rank 0 create a file */ - bool writeToFile = false; + float_64* binReduced = nullptr; - mpi::MPIReduce reduce; + int numBins; + int realNumBins; + /* variables for energy limits of the histogram in keV */ + float_X minEnergy_keV; + float_X maxEnergy_keV; - std::shared_ptr< Help > m_help; - size_t m_id; + std::ofstream outFile; -public: + /* only rank 0 create a file */ + bool writeToFile = false; - //! must be implemented by the user - static std::shared_ptr< plugins::multi::IHelp > getHelp() - { - return std::shared_ptr< plugins::multi::IHelp >( new Help{ } ); - } - - BinEnergyParticles( - std::shared_ptr< plugins::multi::IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) : - m_help( std::static_pointer_cast< Help >(help) ), - m_id( id ), - m_cellDescription( cellDescription ) - { - filename = m_help->getOptionPrefix() + "_" + m_help->filter.get( m_id ) + ".dat"; + mpi::MPIReduce reduce; - numBins = m_help->numBins.get( m_id ); + std::shared_ptr m_help; + size_t m_id; - if( numBins <= 0 ) + public: + //! must be implemented by the user + static std::shared_ptr getHelp() { - throw std::runtime_error( - std::string("[Plugin] [") + m_help->getOptionPrefix( ) + - "] error since " + m_help->getOptionPrefix( ) + - ".binCount) must be > 0 (input " + - std::to_string( numBins ) + " bins)" - ); + return std::shared_ptr(new Help{}); } - minEnergy_keV = m_help->minEnergy_keV.get( m_id ); - maxEnergy_keV = m_help->maxEnergy_keV.get( m_id ); + BinEnergyParticles(std::shared_ptr& help, size_t const id, MappingDesc* cellDescription) + : m_help(std::static_pointer_cast(help)) + , m_id(id) + , m_cellDescription(cellDescription) + { + filename = m_help->getOptionPrefix() + "_" + m_help->filter.get(m_id) + ".dat"; - realNumBins = numBins + 2; + numBins = m_help->numBins.get(m_id); - /* create an array of float_64 on gpu und host */ - gBins = new GridBuffer (DataSpace (realNumBins)); - binReduced = new float_64[realNumBins]; - for (int i = 0; i < realNumBins; ++i) - { - binReduced[i] = 0.0; - } + if(numBins <= 0) + { + throw std::runtime_error( + std::string("[Plugin] [") + m_help->getOptionPrefix() + "] error since " + + m_help->getOptionPrefix() + ".binCount) must be > 0 (input " + std::to_string(numBins) + + " bins)"); + } - writeToFile = reduce.hasResult(mpi::reduceMethods::Reduce()); - if( writeToFile ) - openNewFile(); + minEnergy_keV = m_help->minEnergy_keV.get(m_id); + maxEnergy_keV = m_help->maxEnergy_keV.get(m_id); - // set how often the plugin should be executed while PIConGPU is running - Environment<>::get( ).PluginConnector( ).setNotificationPeriod( - this, - m_help->notifyPeriod.get( id ) - ); + realNumBins = numBins + 2; - } + /* create an array of float_64 on gpu und host */ + gBins = new GridBuffer(DataSpace(realNumBins)); + binReduced = new float_64[realNumBins]; + for(int i = 0; i < realNumBins; ++i) + { + binReduced[i] = 0.0; + } - virtual ~BinEnergyParticles() - { - if (writeToFile) - { - outFile.flush(); - outFile << std::endl; /* now all data are written to file */ - if (outFile.fail()) - std::cerr << "Error on flushing file [" << filename << "]. " << std::endl; - outFile.close(); + writeToFile = reduce.hasResult(mpi::reduceMethods::Reduce()); + if(writeToFile) + openNewFile(); + + // set how often the plugin should be executed while PIConGPU is running + Environment<>::get().PluginConnector().setNotificationPeriod(this, m_help->notifyPeriod.get(id)); } - __delete(gBins); - __deleteArray(binReduced); - } + virtual ~BinEnergyParticles() + { + if(writeToFile) + { + outFile.flush(); + outFile << std::endl; /* now all data are written to file */ + if(outFile.fail()) + std::cerr << "Error on flushing file [" << filename << "]. " << std::endl; + outFile.close(); + } - void notify(uint32_t currentStep) - { - calBinEnergyParticles < CORE + BORDER > (currentStep); - } + __delete(gBins); + __deleteArray(binReduced); + } - void restart( - uint32_t restartStep, - std::string const & restartDirectory - ) - { - if( !writeToFile ) - return; - - writeToFile = restoreTxtFile( - outFile, - filename, - restartStep, - restartDirectory - ); - } - - void checkpoint( - uint32_t currentStep, - std::string const & checkpointDirectory - ) - { - if( !writeToFile ) - return; + void notify(uint32_t currentStep) + { + calBinEnergyParticles(currentStep); + } - checkpointTxtFile( - outFile, - filename, - currentStep, - checkpointDirectory - ); - } + void restart(uint32_t restartStep, std::string const& restartDirectory) + { + if(!writeToFile) + return; -private: + writeToFile = restoreTxtFile(outFile, filename, restartStep, restartDirectory); + } - /* Open a New Output File - * - * Must only be called by the rank with writeToFile == true - */ - void openNewFile() - { - outFile.open(filename.c_str(), std::ofstream::out | std::ostream::trunc); - if (!outFile) + void checkpoint(uint32_t currentStep, std::string const& checkpointDirectory) { - std::cerr << "[Plugin] [" << m_help->getOptionPrefix( ) - << "] Can't open file '" << filename - << "', output disabled" << std::endl; - writeToFile = false; + if(!writeToFile) + return; + + checkpointTxtFile(outFile, filename, currentStep, checkpointDirectory); } - else + + private: + /* Open a New Output File + * + * Must only be called by the rank with writeToFile == true + */ + void openNewFile() { - /* create header of the file */ - outFile << "#step <" << minEnergy_keV << " "; - float_X binEnergy = (maxEnergy_keV - minEnergy_keV) / (float_32) numBins; - for (int i = 1; i < realNumBins - 1; ++i) - outFile << minEnergy_keV + ((float_32) i * binEnergy) << " "; + outFile.open(filename.c_str(), std::ofstream::out | std::ostream::trunc); + if(!outFile) + { + std::cerr << "[Plugin] [" << m_help->getOptionPrefix() << "] Can't open file '" << filename + << "', output disabled" << std::endl; + writeToFile = false; + } + else + { + /* create header of the file */ + outFile << "#step <" << minEnergy_keV << " "; + float_X binEnergy = (maxEnergy_keV - minEnergy_keV) / (float_32) numBins; + for(int i = 1; i < realNumBins - 1; ++i) + outFile << minEnergy_keV + ((float_32) i * binEnergy) << " "; - outFile << ">" << maxEnergy_keV << " count" << std::endl; + outFile << ">" << maxEnergy_keV << " count" << std::endl; + } } - } - template< uint32_t AREA > - void calBinEnergyParticles(uint32_t currentStep) - { - gBins->getDeviceBuffer().setValue(0); - - DataConnector &dc = Environment<>::get().DataConnector(); - auto particles = dc.get< ParticlesType >( ParticlesType::FrameType::getName(), true ); - - /* convert energy values from keV to PIConGPU units */ - float_X const minEnergy = minEnergy_keV * UNITCONV_keV_to_Joule / UNIT_ENERGY; - float_X const maxEnergy = maxEnergy_keV * UNITCONV_keV_to_Joule / UNIT_ENERGY; - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - AreaMapping< - AREA, - MappingDesc - > mapper( *m_cellDescription ); - - auto kernel = PMACC_KERNEL( KernelBinEnergyParticles< numWorkers >{ } )( - mapper.getGridDim( ), - numWorkers, - realNumBins * sizeof( float_X ) - ); - - auto bindKernel = std::bind( - kernel, - particles->getDeviceParticlesBox( ), - gBins->getDeviceBuffer( ).getDataBox( ), - numBins, - minEnergy, - maxEnergy, - mapper, - std::placeholders::_1 - ); - - meta::ForEach< - typename Help::EligibleFilters, - plugins::misc::ExecuteIfNameIsEqual< bmpl::_1 > - >{ }( - m_help->filter.get( m_id ), - currentStep, - bindKernel - ); - - dc.releaseData( ParticlesType::FrameType::getName() ); - gBins->deviceToHost(); - - reduce(nvidia::functors::Add(), - binReduced, - gBins->getHostBuffer().getBasePointer(), - realNumBins, mpi::reduceMethods::Reduce()); - - - if (writeToFile) + template + void calBinEnergyParticles(uint32_t currentStep) { - using dbl = std::numeric_limits; + gBins->getDeviceBuffer().setValue(0); - outFile.precision(dbl::digits10); + DataConnector& dc = Environment<>::get().DataConnector(); + auto particles = dc.get(ParticlesType::FrameType::getName(), true); - /* write data to file */ - float_64 count_particles = 0.0; - outFile << currentStep << " " - << std::scientific; /* for floating points, ignored for ints */ + /* convert energy values from keV to PIConGPU units */ + float_X const minEnergy = minEnergy_keV * UNITCONV_keV_to_Joule / UNIT_ENERGY; + float_X const maxEnergy = maxEnergy_keV * UNITCONV_keV_to_Joule / UNIT_ENERGY; - for (int i = 0; i < realNumBins; ++i) + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + AreaMapping mapper(*m_cellDescription); + + auto kernel = PMACC_KERNEL(KernelBinEnergyParticles{})( + mapper.getGridDim(), + numWorkers, + realNumBins * sizeof(float_X)); + + auto bindKernel = std::bind( + kernel, + particles->getDeviceParticlesBox(), + gBins->getDeviceBuffer().getDataBox(), + numBins, + minEnergy, + maxEnergy, + mapper, + std::placeholders::_1); + + meta::ForEach>{}( + m_help->filter.get(m_id), + currentStep, + bindKernel); + + dc.releaseData(ParticlesType::FrameType::getName()); + gBins->deviceToHost(); + + reduce( + nvidia::functors::Add(), + binReduced, + gBins->getHostBuffer().getBasePointer(), + realNumBins, + mpi::reduceMethods::Reduce()); + + + if(writeToFile) { - count_particles += float_64( binReduced[i]); - outFile << std::scientific << (binReduced[i]) * float_64(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE) << " "; + using dbl = std::numeric_limits; + + outFile.precision(dbl::digits10); + + /* write data to file */ + float_64 count_particles = 0.0; + outFile << currentStep << " " << std::scientific; /* for floating points, ignored for ints */ + + for(int i = 0; i < realNumBins; ++i) + { + count_particles += float_64(binReduced[i]); + outFile << std::scientific + << (binReduced[i]) * float_64(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE) << " "; + } + /* endl: Flush any step to the file. + * Thus, we will have data if the program should crash. + */ + outFile << std::scientific + << count_particles * float_64(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE) << std::endl; } - /* endl: Flush any step to the file. - * Thus, we will have data if the program should crash. - */ - outFile << std::scientific << count_particles * float_64(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE) - << std::endl; } - } - -}; + }; -namespace particles -{ -namespace traits -{ - template< - typename T_Species, - typename T_UnspecifiedSpecies - > - struct SpeciesEligibleForSolver< - T_Species, - BinEnergyParticles< T_UnspecifiedSpecies > - > + namespace particles { - using FrameType = typename T_Species::FrameType; - - // this plugin needs at least the weighting and momentum attributes - using RequiredIdentifiers = MakeSeq_t< - weighting, - momentum - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - // and also a mass ratio for energy calculation from momentum - using SpeciesHasFlags = typename pmacc::traits::HasFlag< - FrameType, - massRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasFlags - >; - }; -} // namespace traits -} // namespace particles + namespace traits + { + template + struct SpeciesEligibleForSolver> + { + using FrameType = typename T_Species::FrameType; + + // this plugin needs at least the weighting and momentum attributes + using RequiredIdentifiers = MakeSeq_t; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + // and also a mass ratio for energy calculation from momentum + using SpeciesHasFlags = typename pmacc::traits::HasFlag>::type; + + using type = typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/plugins/ChargeConservation.hpp b/include/picongpu/plugins/ChargeConservation.hpp index 888fcb0f0a..d4cfafc334 100644 --- a/include/picongpu/plugins/ChargeConservation.hpp +++ b/include/picongpu/plugins/ChargeConservation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,82 +33,70 @@ namespace picongpu { -using namespace pmacc; - -namespace po = boost::program_options; - -/** - * @class ChargeConservation - * @brief maximum difference between electron charge density and div E - * - * WARNING: This plugin assumes a Yee-cell! - * Do not use it together with other field solvers like `directional splitting` or `Lehe` - */ -class ChargeConservation : public ISimulationPlugin -{ -private: - std::string name; - std::string prefix; - std::string notifyPeriod; - const std::string filename; - MappingDesc* cellDescription; - std::ofstream output_file; - - using AllGPU_reduce = boost::shared_ptr >; - AllGPU_reduce allGPU_reduce; - - HINLINE void restart(uint32_t restartStep, const std::string restartDirectory); - HINLINE void checkpoint(uint32_t currentStep, const std::string checkpointDirectory); - - HINLINE void pluginLoad(); -public: - HINLINE ChargeConservation(); - virtual ~ChargeConservation() {} - - HINLINE void notify(uint32_t currentStep); - HINLINE void setMappingDescription(MappingDesc*); - HINLINE void pluginRegisterHelp(po::options_description& desc); - HINLINE std::string pluginGetName() const; -}; - -namespace particles -{ -namespace traits -{ - template< - typename T_Species - > - struct SpeciesEligibleForSolver< - T_Species, - ChargeConservation - > + using namespace pmacc; + + namespace po = boost::program_options; + + /** + * @class ChargeConservation + * @brief maximum difference between electron charge density and div E + * + * WARNING: This plugin assumes a Yee-cell! + * Do not use it together with other field solvers like `directional splitting` or `Lehe` + */ + class ChargeConservation : public ISimulationPlugin { - using FrameType = typename T_Species::FrameType; - - // this plugin needs at least the weighting particle attribute - using RequiredIdentifiers = MakeSeq_t< - weighting - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - // and also a charge ratio for a charge density - using SpeciesHasFlags = typename pmacc::traits::HasFlag< - FrameType, - chargeRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasFlags - >; + private: + std::string name; + std::string prefix; + std::string notifyPeriod; + const std::string filename; + MappingDesc* cellDescription; + std::ofstream output_file; + + using AllGPU_reduce = boost::shared_ptr>; + AllGPU_reduce allGPU_reduce; + + HINLINE void restart(uint32_t restartStep, const std::string restartDirectory); + HINLINE void checkpoint(uint32_t currentStep, const std::string checkpointDirectory); + + HINLINE void pluginLoad(); + + public: + HINLINE ChargeConservation(); + virtual ~ChargeConservation() + { + } + + HINLINE void notify(uint32_t currentStep); + HINLINE void setMappingDescription(MappingDesc*); + HINLINE void pluginRegisterHelp(po::options_description& desc); + HINLINE std::string pluginGetName() const; }; -} // namespace traits -} // namespace particles + namespace particles + { + namespace traits + { + template + struct SpeciesEligibleForSolver + { + using FrameType = typename T_Species::FrameType; + + // this plugin needs at least the weighting particle attribute + using RequiredIdentifiers = MakeSeq_t; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + // and also a charge ratio for a charge density + using SpeciesHasFlags = typename pmacc::traits::HasFlag>::type; + + using type = typename bmpl::and_; + }; + + } // namespace traits + } // namespace particles } // namespace picongpu #include "ChargeConservation.tpp" diff --git a/include/picongpu/plugins/ChargeConservation.tpp b/include/picongpu/plugins/ChargeConservation.tpp index f4c623c7f4..f76a3bed5e 100644 --- a/include/picongpu/plugins/ChargeConservation.tpp +++ b/include/picongpu/plugins/ChargeConservation.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -26,8 +26,6 @@ #include #include #include -#include -#include #include #include #include @@ -50,254 +48,228 @@ namespace picongpu { - -ChargeConservation::ChargeConservation() - : name("ChargeConservation: Print the maximum charge deviation between particles and div E to textfile 'chargeConservation.dat'"), - prefix("chargeConservation"), filename("chargeConservation.dat"), - cellDescription(nullptr) -{ - Environment<>::get().PluginConnector().registerPlugin(this); -} - -void ChargeConservation::pluginRegisterHelp(po::options_description& desc) -{ - desc.add_options() - ((this->prefix + ".period").c_str(), - po::value (&this->notifyPeriod), "enable plugin [for each n-th step]"); -} - -std::string ChargeConservation::pluginGetName() const {return this->name;} - -void ChargeConservation::pluginLoad() -{ - if(this->notifyPeriod.empty()) - return; - - Environment<>::get().PluginConnector().setNotificationPeriod(this, this->notifyPeriod); - - pmacc::GridController& con = pmacc::Environment::get().GridController(); - using namespace pmacc::math; - Size_t gpuDim = (Size_t)con.getGpuNodes(); - zone::SphericZone zone_allGPUs(gpuDim); - this->allGPU_reduce = AllGPU_reduce(new pmacc::algorithm::mpi::Reduce(zone_allGPUs)); - - if(this->allGPU_reduce->root()) + ChargeConservation::ChargeConservation() + : name("ChargeConservation: Print the maximum charge deviation between particles and div E to textfile " + "'chargeConservation.dat'") + , prefix("chargeConservation") + , filename("chargeConservation.dat") + , cellDescription(nullptr) { - this->output_file.open(this->filename.c_str(), std::ios_base::app); - this->output_file << "#timestep max-charge-deviation unit[As]" << std::endl; + Environment<>::get().PluginConnector().registerPlugin(this); } -} - -void ChargeConservation::restart(uint32_t restartStep, const std::string restartDirectory) -{ - if(this->notifyPeriod.empty()) - return; - - if(!this->allGPU_reduce->root()) - return; - - restoreTxtFile( this->output_file, - this->filename, - restartStep, - restartDirectory ); -} - -void ChargeConservation::checkpoint(uint32_t currentStep, const std::string checkpointDirectory) -{ - if(this->notifyPeriod.empty()) - return; - - if(!this->allGPU_reduce->root()) - return; - - checkpointTxtFile( this->output_file, - this->filename, - currentStep, - checkpointDirectory ); -} - -void ChargeConservation::setMappingDescription(MappingDesc* cellDescription) -{ - this->cellDescription = cellDescription; -} -namespace detail -{ - -/** - * @class Div - * @brief divergence functor for 2D and 3D - * - * NOTE: This functor uses a Yee-cell stencil. - */ -template -struct Div; - -template -struct Div -{ - using result_type = ValueType; - - template - HDINLINE ValueType operator()(Field field) const + void ChargeConservation::pluginRegisterHelp(po::options_description& desc) { - const ValueType reciWidth = float_X(1.0) / cellSize.x(); - const ValueType reciHeight = float_X(1.0) / cellSize.y(); - const ValueType reciDepth = float_X(1.0) / cellSize.z(); - return ((*field).x() - (*field(-1,0,0)).x()) * reciWidth + - ((*field).y() - (*field(0,-1,0)).y()) * reciHeight + - ((*field).z() - (*field(0,0,-1)).z()) * reciDepth; + desc.add_options()( + (this->prefix + ".period").c_str(), + po::value(&this->notifyPeriod), + "enable plugin [for each n-th step]"); } -}; -template -struct Div -{ - using result_type = ValueType; - - template - HDINLINE ValueType operator()(Field field) const + std::string ChargeConservation::pluginGetName() const { - const ValueType reciWidth = float_X(1.0) / cellSize.x(); - const ValueType reciHeight = float_X(1.0) / cellSize.y(); - return ((*field).x() - (*field(-1,0)).x()) * reciWidth + - ((*field).y() - (*field(0,-1)).y()) * reciHeight; + return this->name; } -}; -// functor for all species to calculate density -template -struct ComputeChargeDensity -{ - using SpeciesType = pmacc::particles::meta::FindByNameOrType_t< - VectorAllSpecies, - T_SpeciesType - >; - static const uint32_t area = T_Area::value; - - HINLINE void operator()( FieldTmp* fieldTmp, - const uint32_t currentStep) const + void ChargeConservation::pluginLoad() { - DataConnector &dc = Environment<>::get().DataConnector(); + if(this->notifyPeriod.empty()) + return; + + Environment<>::get().PluginConnector().setNotificationPeriod(this, this->notifyPeriod); + + pmacc::GridController& con = pmacc::Environment::get().GridController(); + using namespace pmacc::math; + Size_t gpuDim = (Size_t) con.getGpuNodes(); + zone::SphericZone zone_allGPUs(gpuDim); + this->allGPU_reduce = AllGPU_reduce(new pmacc::algorithm::mpi::Reduce(zone_allGPUs)); + + if(this->allGPU_reduce->root()) + { + this->output_file.open(this->filename.c_str(), std::ios_base::app); + this->output_file << "#timestep max-charge-deviation unit[As]" << std::endl; + } + } - /* load species without copying the particle data to the host */ - auto speciesTmp = dc.get< SpeciesType >( SpeciesType::FrameType::getName(), true ); + void ChargeConservation::restart(uint32_t restartStep, const std::string restartDirectory) + { + if(this->notifyPeriod.empty()) + return; - /* run algorithm */ - using ChargeDensitySolver = typename particles::particleToGrid::CreateFieldTmpOperation_t< - SpeciesType, - particles::particleToGrid::derivedAttributes::ChargeDensity - >::Solver; + if(!this->allGPU_reduce->root()) + return; - fieldTmp->computeValue < area, ChargeDensitySolver > (*speciesTmp, currentStep); - dc.releaseData( SpeciesType::FrameType::getName() ); + restoreTxtFile(this->output_file, this->filename, restartStep, restartDirectory); } -}; -struct CalculateAndAssignChargeDeviation -{ - template - HDINLINE void operator()( - const T_Acc& acc, - T_Rho& rho, - const T_FieldE& fieldECursor - ) const + void ChargeConservation::checkpoint(uint32_t currentStep, const std::string checkpointDirectory) { - typedef Div MyDiv; + if(this->notifyPeriod.empty()) + return; - /* rho := | div E * eps_0 - rho | */ - rho.x() = math::abs((MyDiv{}(fieldECursor) * EPS0 - rho).x()); + if(!this->allGPU_reduce->root()) + return; + + checkpointTxtFile(this->output_file, this->filename, currentStep, checkpointDirectory); } -}; -} // namespace detail + void ChargeConservation::setMappingDescription(MappingDesc* cellDescription) + { + this->cellDescription = cellDescription; + } -void ChargeConservation::notify(uint32_t currentStep) -{ - typedef SuperCellSize BlockDim; - - DataConnector &dc = Environment<>::get().DataConnector(); - - /* load FieldTmp without copy data to host */ - PMACC_CASSERT_MSG( - _please_allocate_at_least_one_FieldTmp_in_memory_param, - fieldTmpNumSlots > 0 - ); - auto fieldTmp = dc.get< FieldTmp >( FieldTmp::getUniqueId( 0 ), true ); - /* reset density values to zero */ - fieldTmp->getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.0)); - - using EligibleSpecies = typename bmpl::copy_if< - VectorAllSpecies, - particles::traits::SpeciesEligibleForSolver< - bmpl::_1, - ChargeConservation - > - >::type; - - // todo: log species that are used / ignored in this plugin with INFO - - /* calculate and add the charge density values from all species in FieldTmp */ - meta::ForEach< - EligibleSpecies, - picongpu::detail::ComputeChargeDensity< - bmpl::_1, - bmpl::int_< CORE + BORDER > - >, - bmpl::_1 - > computeChargeDensity; - computeChargeDensity(fieldTmp.get(), currentStep); - - /* add results of all species that are still in GUARD to next GPUs BORDER */ - EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); - __setTransactionEvent(fieldTmpEvent); - - /* cast PMacc Buffer to cuSTL Buffer */ - auto fieldTmp_coreBorder = - fieldTmp->getGridBuffer(). - getDeviceBuffer().cartBuffer(). - view(this->cellDescription->getGuardingSuperCells()*BlockDim::toRT(), - this->cellDescription->getGuardingSuperCells()*-BlockDim::toRT()); - - /* cast PMacc Buffer to cuSTL Buffer */ - auto fieldE_coreBorder = - dc.get< FieldE >( FieldE::getName(), true )->getGridBuffer(). - getDeviceBuffer().cartBuffer(). - view(this->cellDescription->getGuardingSuperCells()*BlockDim::toRT(), - this->cellDescription->getGuardingSuperCells()*-BlockDim::toRT()); - - /* run calculation: fieldTmp = | div E * eps_0 - rho | */ - using namespace pmacc::math::math_functor; - typedef picongpu::detail::Div myDiv; - algorithm::kernel::Foreach()( - fieldTmp_coreBorder.zone(), - fieldTmp_coreBorder.origin(), - cursor::make_NestedCursor(fieldE_coreBorder.origin()), - ::picongpu::detail::CalculateAndAssignChargeDeviation() - ); - - /* reduce charge derivation (fieldTmp) to get the maximum value */ - typename FieldTmp::ValueType maxChargeDiff = - algorithm::kernel::Reduce() - (fieldTmp_coreBorder.origin(), fieldTmp_coreBorder.zone(), pmacc::nvidia::functors::Max()); - - /* reduce again across mpi cluster */ - container::HostBuffer maxChargeDiff_host(1); - *maxChargeDiff_host.origin() = maxChargeDiff; - container::HostBuffer maxChargeDiff_cluster(1); - (*this->allGPU_reduce)( - maxChargeDiff_cluster, - maxChargeDiff_host, - ::pmacc::algorithms::math::Max< - typename FieldTmp::ValueType, - typename FieldTmp::ValueType - >() - ); - - if(!this->allGPU_reduce->root()) return; - - this->output_file << currentStep << " " << (*maxChargeDiff_cluster.origin() * CELL_VOLUME).x() - << " " << UNIT_CHARGE << std::endl; -} + namespace detail + { + /** + * @class Div + * @brief divergence functor for 2D and 3D + * + * NOTE: This functor uses a Yee-cell stencil. + */ + template + struct Div; + + template + struct Div + { + using result_type = ValueType; + + template + HDINLINE ValueType operator()(Field field) const + { + const ValueType reciWidth = float_X(1.0) / cellSize.x(); + const ValueType reciHeight = float_X(1.0) / cellSize.y(); + const ValueType reciDepth = float_X(1.0) / cellSize.z(); + return ((*field).x() - (*field(-1, 0, 0)).x()) * reciWidth + + ((*field).y() - (*field(0, -1, 0)).y()) * reciHeight + + ((*field).z() - (*field(0, 0, -1)).z()) * reciDepth; + } + }; + + template + struct Div + { + using result_type = ValueType; + + template + HDINLINE ValueType operator()(Field field) const + { + const ValueType reciWidth = float_X(1.0) / cellSize.x(); + const ValueType reciHeight = float_X(1.0) / cellSize.y(); + return ((*field).x() - (*field(-1, 0)).x()) * reciWidth + + ((*field).y() - (*field(0, -1)).y()) * reciHeight; + } + }; + + // functor for all species to calculate density + template + struct ComputeChargeDensity + { + using SpeciesType = pmacc::particles::meta::FindByNameOrType_t; + static const uint32_t area = T_Area::value; + + HINLINE void operator()(FieldTmp* fieldTmp, const uint32_t currentStep) const + { + DataConnector& dc = Environment<>::get().DataConnector(); + + /* load species without copying the particle data to the host */ + auto speciesTmp = dc.get(SpeciesType::FrameType::getName(), true); + + /* run algorithm */ + using ChargeDensitySolver = typename particles::particleToGrid::CreateFieldTmpOperation_t< + SpeciesType, + particles::particleToGrid::derivedAttributes::ChargeDensity>::Solver; + + fieldTmp->computeValue(*speciesTmp, currentStep); + dc.releaseData(SpeciesType::FrameType::getName()); + } + }; + + struct CalculateAndAssignChargeDeviation + { + template + HDINLINE void operator()(const T_Acc& acc, T_Rho& rho, const T_FieldE& fieldECursor) const + { + typedef Div MyDiv; + + /* rho := | div E * eps_0 - rho | */ + rho.x() = math::abs((MyDiv{}(fieldECursor) *EPS0 - rho).x()); + } + }; + + } // namespace detail + + void ChargeConservation::notify(uint32_t currentStep) + { + typedef SuperCellSize BlockDim; + + DataConnector& dc = Environment<>::get().DataConnector(); + + /* load FieldTmp without copy data to host */ + PMACC_CASSERT_MSG(_please_allocate_at_least_one_FieldTmp_in_memory_param, fieldTmpNumSlots > 0); + auto fieldTmp = dc.get(FieldTmp::getUniqueId(0), true); + /* reset density values to zero */ + fieldTmp->getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.0)); + + using EligibleSpecies = typename bmpl:: + copy_if>::type; + + // todo: log species that are used / ignored in this plugin with INFO + + /* calculate and add the charge density values from all species in FieldTmp */ + meta::ForEach< + EligibleSpecies, + picongpu::detail::ComputeChargeDensity>, + bmpl::_1> + computeChargeDensity; + computeChargeDensity(fieldTmp.get(), currentStep); + + /* add results of all species that are still in GUARD to next GPUs BORDER */ + EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); + __setTransactionEvent(fieldTmpEvent); + + /* cast PMacc Buffer to cuSTL Buffer */ + auto fieldTmp_coreBorder = fieldTmp->getGridBuffer().getDeviceBuffer().cartBuffer().view( + this->cellDescription->getGuardingSuperCells() * BlockDim::toRT(), + this->cellDescription->getGuardingSuperCells() * -BlockDim::toRT()); + + /* cast PMacc Buffer to cuSTL Buffer */ + auto fieldE_coreBorder = dc.get(FieldE::getName(), true) + ->getGridBuffer() + .getDeviceBuffer() + .cartBuffer() + .view( + this->cellDescription->getGuardingSuperCells() * BlockDim::toRT(), + this->cellDescription->getGuardingSuperCells() * -BlockDim::toRT()); + + /* run calculation: fieldTmp = | div E * eps_0 - rho | */ + typedef picongpu::detail::Div myDiv; + algorithm::kernel::Foreach()( + fieldTmp_coreBorder.zone(), + fieldTmp_coreBorder.origin(), + cursor::make_NestedCursor(fieldE_coreBorder.origin()), + ::picongpu::detail::CalculateAndAssignChargeDeviation()); + + /* reduce charge derivation (fieldTmp) to get the maximum value */ + typename FieldTmp::ValueType maxChargeDiff = algorithm::kernel::Reduce()( + fieldTmp_coreBorder.origin(), + fieldTmp_coreBorder.zone(), + pmacc::nvidia::functors::Max()); + + /* reduce again across mpi cluster */ + container::HostBuffer maxChargeDiff_host(1); + *maxChargeDiff_host.origin() = maxChargeDiff; + container::HostBuffer maxChargeDiff_cluster(1); + (*this->allGPU_reduce)( + maxChargeDiff_cluster, + maxChargeDiff_host, + ::pmacc::math::Max()); + + if(!this->allGPU_reduce->root()) + return; + + this->output_file << currentStep << " " << (*maxChargeDiff_cluster.origin() * CELL_VOLUME).x() << " " + << UNIT_CHARGE << std::endl; + } } // namespace picongpu diff --git a/include/picongpu/plugins/Checkpoint.hpp b/include/picongpu/plugins/Checkpoint.hpp index 2a4f369fc2..0e02d87d0d 100644 --- a/include/picongpu/plugins/Checkpoint.hpp +++ b/include/picongpu/plugins/Checkpoint.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera, Franz Poeschel * * This file is part of PIConGPU. * @@ -25,10 +25,10 @@ #include "picongpu/plugins/ISimulationPlugin.hpp" #if(ENABLE_ADIOS == 1) -# include "picongpu/plugins/adios/ADIOSWriter.hpp" +# include "picongpu/plugins/adios/ADIOSWriter.hpp" #endif -#if(ENABLE_HDF5 == 1) -# include "picongpu/plugins/hdf5/HDF5Writer.hpp" +#if(ENABLE_OPENPMD == 1) +# include "picongpu/plugins/openPMD/openPMDWriter.hpp" #endif #include @@ -47,191 +47,151 @@ namespace picongpu class Checkpoint : public ISimulationPlugin { public: - - Checkpoint( ) : - checkpointFilename( "checkpoint" ), - restartChunkSize( 0u ) + Checkpoint() : checkpointFilename("checkpoint"), restartChunkSize(0u) { -#if(ENABLE_ADIOS == 1) - ioBackendsHelp[ "adios" ] = std::shared_ptr< plugins::multi::IHelp >( adios::ADIOSWriter::getHelp() ); +#if(ENABLE_OPENPMD == 1) + ioBackendsHelp["openPMD"] = std::shared_ptr(openPMD::openPMDWriter::getHelp()); #endif -#if(ENABLE_HDF5 == 1) - ioBackendsHelp[ "hdf5" ] = std::shared_ptr< plugins::multi::IHelp >( hdf5::HDF5Writer::getHelp() ); +#if(ENABLE_ADIOS == 1) + ioBackendsHelp["adios"] = std::shared_ptr(adios::ADIOSWriter::getHelp()); #endif // if adios is enabled the default is adios - if( !ioBackendsHelp.empty( ) ) + if(!ioBackendsHelp.empty()) { - checkpointBackendName = ioBackendsHelp.begin( )->first; - restartBackendName = ioBackendsHelp.begin( )->first; + checkpointBackendName = ioBackendsHelp.begin()->first; + restartBackendName = ioBackendsHelp.begin()->first; } uint32_t backendCount = 0u; - for( auto & backend : ioBackendsHelp ) + for(auto& backend : ioBackendsHelp) { - if( backendCount >= 1u ) + if(backendCount >= 1u) activeBackends += ", "; activeBackends += backend.first; ++backendCount; } - Environment<>::get( ).PluginConnector( ).registerPlugin( this ); + Environment<>::get().PluginConnector().registerPlugin(this); } - virtual ~Checkpoint( ) + virtual ~Checkpoint() { - } - void pluginRegisterHelp(boost::program_options::options_description & desc) + void pluginRegisterHelp(boost::program_options::options_description& desc) { - namespace po = boost::program_options; - if( ioBackendsHelp.empty( ) ) - desc.add_options( )( - "checkpoint", - "plugin disabled [compiled without dependency HDF5 or Adios]" - ); + if(ioBackendsHelp.empty()) + desc.add_options()("checkpoint", "plugin disabled [compiled without dependency HDF5 or Adios]"); else - desc.add_options( ) - ( - "checkpoint.backend", - po::value ( &checkpointBackendName ), - ( std::string( "Optional backend for checkpointing [" ) + activeBackends + "] default: " + checkpointBackendName ).c_str( ) - )( - "checkpoint.file", - po::value< std::string >( &checkpointFilename ), - "Optional checkpoint filename (prefix)" - )( - "checkpoint.restart.backend", - po::value< std::string >( &restartBackendName ), - ( std::string( "Optional backend for restarting [" ) + activeBackends + "] default: " + restartBackendName ).c_str( ) - )( - "checkpoint.restart.file", - po::value< std::string >( &restartFilename ), - "checkpoint restart filename (prefix)" - )( - /* 1,000,000 particles are around 3900 frames at 256 particles per frame - * and match ~30MiB with typical picongpu particles. - * The only reason why we use 1M particles per chunk is that we can get a - * frame overflow in our memory manager if we process all particles in one kernel. - **/ - "checkpoint.restart.chunkSize", - po::value< uint32_t >(&restartChunkSize)->default_value( 1000000u ), - "Number of particles processed in one kernel call during restart to prevent frame count blowup" - ); - - for( auto & backend : ioBackendsHelp ) - backend.second->expandHelp( - desc, - "checkpoint." - ); - + desc.add_options()( + "checkpoint.backend", + po::value(&checkpointBackendName), + (std::string("Optional backend for checkpointing [") + activeBackends + + "] default: " + checkpointBackendName) + .c_str())( + "checkpoint.file", + po::value(&checkpointFilename), + "Optional checkpoint filename (prefix)")( + "checkpoint.restart.backend", + po::value(&restartBackendName), + (std::string("Optional backend for restarting [") + activeBackends + + "] default: " + restartBackendName) + .c_str())( + "checkpoint.restart.file", + po::value(&restartFilename), + "checkpoint restart filename (prefix)")( + /* 1,000,000 particles are around 3900 frames at 256 particles per frame + * and match ~30MiB with typical picongpu particles. + * The only reason why we use 1M particles per chunk is that we can get a + * frame overflow in our memory manager if we process all particles in one kernel. + **/ + "checkpoint.restart.chunkSize", + po::value(&restartChunkSize)->default_value(1000000u), + "Number of particles processed in one kernel call during restart to prevent frame count blowup"); + + for(auto& backend : ioBackendsHelp) + backend.second->expandHelp(desc, "checkpoint."); } - std::string pluginGetName( ) const + std::string pluginGetName() const { return "Checkpoint"; } - void notify( uint32_t ) + void notify(uint32_t) { } - void setMappingDescription(MappingDesc *cellDescription) + void setMappingDescription(MappingDesc* cellDescription) { m_cellDescription = cellDescription; } - void checkpoint( - uint32_t currentStep, - const std::string checkpointDirectory - ) + void checkpoint(uint32_t currentStep, const std::string checkpointDirectory) { - auto cBackend = ioBackends.find( checkpointBackendName ); - if( cBackend != ioBackends.end( ) ) + auto cBackend = ioBackends.find(checkpointBackendName); + if(cBackend != ioBackends.end()) { - cBackend->second->dumpCheckpoint( - currentStep, - checkpointDirectory, - checkpointFilename - ); + cBackend->second->dumpCheckpoint(currentStep, checkpointDirectory, checkpointFilename); } } void restart(uint32_t restartStep, const std::string restartDirectory) { - auto rBackend = ioBackends.find( restartBackendName ); - if( rBackend != ioBackends.end( ) ) + auto rBackend = ioBackends.find(restartBackendName); + if(rBackend != ioBackends.end()) { - rBackend->second->doRestart( - restartStep, - restartDirectory, - restartFilename, - restartChunkSize - ); + rBackend->second->doRestart(restartStep, restartDirectory, restartFilename, restartChunkSize); } } private: - - void pluginLoad( ) + void pluginLoad() { - for( auto & backendHelp : ioBackendsHelp ) + for(auto& backendHelp : ioBackendsHelp) { - if( backendHelp.second->getNumPlugins() > 0u ) + if(backendHelp.second->getNumPlugins() > 0u) backendHelp.second->validateOptions(); - size_t const numSlaves = backendHelp.second->getNumPlugins( ); - if( numSlaves > 1u ) - throw std::runtime_error( pluginGetName() + ": is no a multi plugin, each option can only be selected once." ); + size_t const numSlaves = backendHelp.second->getNumPlugins(); + if(numSlaves > 1u) + throw std::runtime_error( + pluginGetName() + ": is no a multi plugin, each option can only be selected once."); } // create checkpoint creation backend - if( !ioBackendsHelp.empty( ) ) + if(!ioBackendsHelp.empty()) { - auto cBackendHelp = ioBackendsHelp.find( checkpointBackendName ); - if( cBackendHelp == ioBackendsHelp.end( ) ) - throw std::runtime_error( std::string( "IO-backend " ) + - checkpointBackendName + - " for checkpoints not found, possible backends: " + - activeBackends - ); + auto cBackendHelp = ioBackendsHelp.find(checkpointBackendName); + if(cBackendHelp == ioBackendsHelp.end()) + throw std::runtime_error( + std::string("IO-backend ") + checkpointBackendName + + " for checkpoints not found, possible backends: " + activeBackends); else - ioBackends[ checkpointBackendName ] = std::static_pointer_cast< IIOBackend >( - cBackendHelp->second->create( - cBackendHelp->second, - 0, - m_cellDescription - ) - ); + ioBackends[checkpointBackendName] = std::static_pointer_cast( + cBackendHelp->second->create(cBackendHelp->second, 0, m_cellDescription)); } // create restart backend - if( !ioBackendsHelp.empty( ) && checkpointBackendName != restartBackendName ) + if(!ioBackendsHelp.empty() && checkpointBackendName != restartBackendName) { - auto rBackend = ioBackendsHelp.find( restartBackendName ); - if( rBackend == ioBackendsHelp.end( ) ) - throw std::runtime_error( std::string( "IO-backend " ) + - restartBackendName + - " for restarts not found, possible backends: " + - activeBackends - ); + auto rBackend = ioBackendsHelp.find(restartBackendName); + if(rBackend == ioBackendsHelp.end()) + throw std::runtime_error( + std::string("IO-backend ") + restartBackendName + + " for restarts not found, possible backends: " + activeBackends); else - ioBackends[ restartBackendName ] = std::static_pointer_cast< IIOBackend >( - rBackend->second->create( - rBackend->second, - 0, - m_cellDescription - ) - ); + ioBackends[restartBackendName] = std::static_pointer_cast( + rBackend->second->create(rBackend->second, 0, m_cellDescription)); } - if( restartFilename.empty( ) ) + if(restartFilename.empty()) { restartFilename = checkpointFilename; } } - virtual void pluginUnload( ) + virtual void pluginUnload() { ioBackends.clear(); } @@ -255,16 +215,10 @@ namespace picongpu */ uint32_t restartChunkSize; - // can be "adios" and "hdf5" - std::map< - std::string, - std::shared_ptr< IIOBackend > - > ioBackends; + // can be "adios", "hdf5" and "openPMD" + std::map> ioBackends; - std::map< - std::string, - std::shared_ptr< plugins::multi::IHelp > - > ioBackendsHelp; + std::map> ioBackendsHelp; MappingDesc* m_cellDescription = nullptr; }; diff --git a/include/picongpu/plugins/CountParticles.hpp b/include/picongpu/plugins/CountParticles.hpp index 13686c48a6..3ac2ece70d 100644 --- a/include/picongpu/plugins/CountParticles.hpp +++ b/include/picongpu/plugins/CountParticles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -44,177 +44,165 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -template -class CountParticles : public ISimulationPlugin -{ -private: - typedef MappingDesc::SuperCellSize SuperCellSize; - - MappingDesc *cellDescription; - std::string notifyPeriod; - - std::string pluginName; - std::string pluginPrefix; - std::string filename; - - std::ofstream outFile; - /*only rank 0 create a file*/ - bool writeToFile; + template + class CountParticles : public ISimulationPlugin + { + private: + typedef MappingDesc::SuperCellSize SuperCellSize; - mpi::MPIReduce reduce; -public: + MappingDesc* cellDescription; + std::string notifyPeriod; - CountParticles() : - pluginName("CountParticles: count macro particles of a species"), - pluginPrefix(ParticlesType::FrameType::getName() + std::string("_macroParticlesCount")), - filename(pluginPrefix + ".dat"), - cellDescription(nullptr), - writeToFile(false) - { - Environment<>::get().PluginConnector().registerPlugin(this); - } + std::string pluginName; + std::string pluginPrefix; + std::string filename; - virtual ~CountParticles() - { + std::ofstream outFile; + /*only rank 0 create a file*/ + bool writeToFile; - } + mpi::MPIReduce reduce; - void notify(uint32_t currentStep) - { - countParticles < CORE + BORDER > (currentStep); - } + public: + CountParticles() + : pluginName("CountParticles: count macro particles of a species") + , pluginPrefix(ParticlesType::FrameType::getName() + std::string("_macroParticlesCount")) + , filename(pluginPrefix + ".dat") + , cellDescription(nullptr) + , writeToFile(false) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } - void pluginRegisterHelp(po::options_description& desc) - { - desc.add_options() - ((pluginPrefix + ".period").c_str(), - po::value (¬ifyPeriod), "enable plugin [for each n-th step]"); - } + virtual ~CountParticles() + { + } - std::string pluginGetName() const - { - return pluginName; - } + void notify(uint32_t currentStep) + { + countParticles(currentStep); + } - void setMappingDescription(MappingDesc *cellDescription) - { - this->cellDescription = cellDescription; - } + void pluginRegisterHelp(po::options_description& desc) + { + desc.add_options()( + (pluginPrefix + ".period").c_str(), + po::value(¬ifyPeriod), + "enable plugin [for each n-th step]"); + } -private: + std::string pluginGetName() const + { + return pluginName; + } - void pluginLoad() - { - if(!notifyPeriod.empty()) + void setMappingDescription(MappingDesc* cellDescription) { - writeToFile = reduce.hasResult(mpi::reduceMethods::Reduce()); + this->cellDescription = cellDescription; + } - if (writeToFile) + private: + void pluginLoad() + { + if(!notifyPeriod.empty()) { - outFile.open(filename.c_str(), std::ofstream::out | std::ostream::trunc); - if (!outFile) + writeToFile = reduce.hasResult(mpi::reduceMethods::Reduce()); + + if(writeToFile) { - std::cerr << "Can't open file [" << filename << "] for output, disable plugin output. " << std::endl; - writeToFile = false; + outFile.open(filename.c_str(), std::ofstream::out | std::ostream::trunc); + if(!outFile) + { + std::cerr << "Can't open file [" << filename << "] for output, disable plugin output. " + << std::endl; + writeToFile = false; + } + // create header of the file + outFile << "#step count" + << " \n"; } - //create header of the file - outFile << "#step count" << " \n"; - } - Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + } } - } - void pluginUnload() - { - if(!notifyPeriod.empty()) + void pluginUnload() { - if (writeToFile) + if(!notifyPeriod.empty()) { - outFile.flush(); - outFile << std::endl; //now all data are written to file - if (outFile.fail()) - std::cerr << "Error on flushing file [" << filename << "]. " << std::endl; - outFile.close(); + if(writeToFile) + { + outFile.flush(); + outFile << std::endl; // now all data are written to file + if(outFile.fail()) + std::cerr << "Error on flushing file [" << filename << "]. " << std::endl; + outFile.close(); + } } } - } - void restart(uint32_t restartStep, const std::string restartDirectory) - { - if( !writeToFile ) - return; + void restart(uint32_t restartStep, const std::string restartDirectory) + { + if(!writeToFile) + return; - writeToFile = restoreTxtFile( outFile, - filename, - restartStep, - restartDirectory ); - } + writeToFile = restoreTxtFile(outFile, filename, restartStep, restartDirectory); + } - void checkpoint(uint32_t currentStep, const std::string checkpointDirectory) - { - if( !writeToFile ) - return; + void checkpoint(uint32_t currentStep, const std::string checkpointDirectory) + { + if(!writeToFile) + return; - checkpointTxtFile( outFile, - filename, - currentStep, - checkpointDirectory ); - } + checkpointTxtFile(outFile, filename, currentStep, checkpointDirectory); + } - template< uint32_t AREA> - void countParticles(uint32_t currentStep) - { - uint64_cu size; + template + void countParticles(uint32_t currentStep) + { + uint64_cu size; - const SubGrid& subGrid = Environment::get().SubGrid(); - const DataSpace localSize(subGrid.getLocalDomain().size); + const SubGrid& subGrid = Environment::get().SubGrid(); + const DataSpace localSize(subGrid.getLocalDomain().size); - DataConnector &dc = Environment<>::get().DataConnector(); - auto particles = dc.get< ParticlesType >( ParticlesType::FrameType::getName(), true ); + DataConnector& dc = Environment<>::get().DataConnector(); + auto particles = dc.get(ParticlesType::FrameType::getName(), true); - // enforce that the filter interface is fulfilled - particles::filter::IUnary< particles::filter::All > parFilter{ currentStep }; + // enforce that the filter interface is fulfilled + particles::filter::IUnary parFilter{currentStep}; - /*count local particles*/ - size = pmacc::CountParticles::countOnDevice(*particles, - *cellDescription, - DataSpace(), - localSize, - parFilter); - dc.releaseData( ParticlesType::FrameType::getName() ); + /*count local particles*/ + size = pmacc::CountParticles::countOnDevice( + *particles, + *cellDescription, + DataSpace(), + localSize, + parFilter); + dc.releaseData(ParticlesType::FrameType::getName()); - uint64_cu reducedValueMax; - if (picLog::log_level & picLog::CRITICAL::lvl) - { - reduce(nvidia::functors::Max(), - &reducedValueMax, - &size, - 1, - mpi::reduceMethods::Reduce()); - } + uint64_cu reducedValueMax; + if(picLog::log_level & picLog::CRITICAL::lvl) + { + reduce(nvidia::functors::Max(), &reducedValueMax, &size, 1, mpi::reduceMethods::Reduce()); + } - uint64_cu reducedValue; - reduce(nvidia::functors::Add(), - &reducedValue, - &size, - 1, - mpi::reduceMethods::Reduce()); + uint64_cu reducedValue; + reduce(nvidia::functors::Add(), &reducedValue, &size, 1, mpi::reduceMethods::Reduce()); - if (writeToFile) - { - if (picLog::log_level & picLog::CRITICAL::lvl) + if(writeToFile) { - log ("maximum number of particles on a GPU : %d\n") % reducedValueMax; - } + if(picLog::log_level & picLog::CRITICAL::lvl) + { + log("maximum number of particles on a GPU : %d\n") % reducedValueMax; + } - outFile << currentStep << " " << reducedValue << " " << std::scientific << (float_64) reducedValue << std::endl; + outFile << currentStep << " " << reducedValue << " " << std::scientific << (float_64) reducedValue + << std::endl; + } } - } - -}; + }; } /* namespace picongpu */ diff --git a/include/picongpu/plugins/Emittance.hpp b/include/picongpu/plugins/Emittance.hpp index d4b3bc4465..318aa0bd30 100644 --- a/include/picongpu/plugins/Emittance.hpp +++ b/include/picongpu/plugins/Emittance.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, * Rene Widera, Richard Pausch, Benjamin Worpitz, * Sophie Rudat * @@ -62,13 +62,11 @@ namespace picongpu { - /** calculates the emittance in x direction along the y axis */ - template< uint32_t T_numWorkers > + template struct KernelCalcEmittance { - /** calculates the sum of x^2, ux^2 and x*ux and counts electrons * * @tparam T_ParBox pmacc::ParticlesBox, particle box type @@ -84,381 +82,245 @@ namespace picongpu * @param gCount_e global real particle counter * @param mapper functor to map a block to a supercell */ - template< - typename T_ParBox, - typename T_DBox, - typename T_Mapping, - typename T_Acc, - typename T_Filter - > - DINLINE void operator( )( - T_Acc const & acc, + template + DINLINE void operator()( + T_Acc const& acc, T_ParBox pb, T_DBox gSumMom2, T_DBox gSumPos2, T_DBox gSumMomPos, T_DBox gCount_e, - DataSpace< simDim > globalOffset, + DataSpace globalOffset, const int subGridY, T_Mapping mapper, - T_Filter filter - ) const + T_Filter filter) const { using namespace mappings::threads; constexpr uint32_t numWorkers = T_numWorkers; - constexpr uint32_t numParticlesPerFrame = pmacc::math::CT::volume< - typename T_ParBox::FrameType::SuperCellSize - >::type::value; + constexpr uint32_t numParticlesPerFrame + = pmacc::math::CT::volume::type::value; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; using FramePtr = typename T_ParBox::FramePtr; // shared sums of x^2, ux^2, x*ux, particle counter - PMACC_SMEM( - acc, - shSumMom2, - memory::Array< - float_X, - SuperCellSize::y::value - > - ); - PMACC_SMEM( - acc, - shSumPos2, - memory::Array< - float_X, - SuperCellSize::y::value - > - ); - PMACC_SMEM( - acc, - shSumMomPos, - memory::Array< - float_X, - SuperCellSize::y::value - > - ); - PMACC_SMEM( - acc, - shCount_e, - memory::Array< - float_X, - SuperCellSize::y::value - > - ); - - using ParticleDomCfg = IdxConfig< - numParticlesPerFrame, - numWorkers - >; - - using SuperCellYDom = IdxConfig< - SuperCellSize::y::value, - numWorkers - >; - - - ForEachIdx< SuperCellYDom >{ workerIdx }( - [ & ]( - uint32_t const linearIdx, - uint32_t const - ) - { - // set shared sums of x^2, ux^2, x*ux, particle counter to zero - shSumMom2[ linearIdx ] = 0.0_X; - shSumPos2[ linearIdx ] = 0.0_X; - shSumMomPos[ linearIdx ] = 0.0_X; - shCount_e[ linearIdx ] = 0.0_X; - } - ); - __syncthreads( ); + PMACC_SMEM(acc, shSumMom2, memory::Array); + PMACC_SMEM(acc, shSumPos2, memory::Array); + PMACC_SMEM(acc, shSumMomPos, memory::Array); + PMACC_SMEM(acc, shCount_e, memory::Array); + + using ParticleDomCfg = IdxConfig; + + using SuperCellYDom = IdxConfig; + - DataSpace< simDim > const superCellIdx( mapper.getSuperCellIndex( - DataSpace< simDim >( blockIdx ) - ) ); + ForEachIdx{workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + // set shared sums of x^2, ux^2, x*ux, particle counter to zero + shSumMom2[linearIdx] = 0.0_X; + shSumPos2[linearIdx] = 0.0_X; + shSumMomPos[linearIdx] = 0.0_X; + shCount_e[linearIdx] = 0.0_X; + }); + cupla::__syncthreads(acc); + + DataSpace const superCellIdx(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); // each virtual thread is working on an own frame - FramePtr frame = pb.getLastFrame( superCellIdx ); + FramePtr frame = pb.getLastFrame(superCellIdx); // end kernel if we have no frames within the supercell - if( !frame.isValid( ) ) + if(!frame.isValid()) return; - auto accFilter = filter( - acc, - superCellIdx - mapper.getGuardingSuperCells( ), - WorkerCfg< numWorkers >{ workerIdx } - ); - - memory::CtxArray< - typename FramePtr::type::ParticleType, - ParticleDomCfg - > - currentParticleCtx( + auto accFilter + = filter(acc, superCellIdx - mapper.getGuardingSuperCells(), WorkerCfg{workerIdx}); + + memory::CtxArray currentParticleCtx( workerIdx, - [ & ]( - uint32_t const linearIdx, - uint32_t const - ) - { - auto particle = frame[ linearIdx ]; + [&](uint32_t const linearIdx, uint32_t const) { + auto particle = frame[linearIdx]; /* - only particles from the last frame must be checked * - all other particles are always valid */ - if( particle[ multiMask_ ] != 1 ) - particle.setHandleInvalid( ); + if(particle[multiMask_] != 1) + particle.setHandleInvalid(); return particle; - } - ); + }); - while( frame.isValid( ) ) + while(frame.isValid()) { // loop over all particles in the frame - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); + ForEachIdx forEachParticle(workerIdx); - forEachParticle( - [ & ]( - uint32_t const, - uint32_t const idx - ) + forEachParticle([&](uint32_t const, uint32_t const idx) { + /* get one particle */ + auto& particle = currentParticleCtx[idx]; + if(accFilter(acc, particle)) { - /* get one particle */ - auto & particle = currentParticleCtx[ idx ]; - if( accFilter( acc, particle ) ) - { - float_X const weighting = particle[ weighting_ ]; - float_X const normedWeighting = weighting / float_X( particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE ); - float3_X const mom = particle[ momentum_ ] / weighting; - floatD_X const pos = particle[ position_ ]; - lcellId_t const cellIdx = particle[ localCellIdx_ ]; - DataSpace< simDim > const frameCellOffset( - DataSpaceOperations< simDim >::template - map< MappingDesc::SuperCellSize > ( cellIdx ) - ); - auto const localSupercellStart = ( - superCellIdx - - mapper.getGuardingSuperCells( ) - ) * MappingDesc::SuperCellSize::toRT( ); - int const index_y = frameCellOffset.y( ); - auto const globalCellOffset = globalOffset - + localSupercellStart - + frameCellOffset; - float_X const posX = ( float_X( globalCellOffset.x( ) ) + pos.x( ) ) * cellSize.x( ); - - atomicAdd( - &( shCount_e[ index_y ] ), - normedWeighting, - ::alpaka::hierarchy::Threads{ } - ); - //weighted sum of single Electron values (Momentum = particle_momentum/weighting) - atomicAdd( - &( shSumMom2[ index_y ] ), - mom.x( ) * mom.x( ) * normedWeighting, - ::alpaka::hierarchy::Threads{ } - ); - atomicAdd( - &( shSumPos2[ index_y ] ), - posX * posX * normedWeighting, - ::alpaka::hierarchy::Threads{ } - ); - atomicAdd( - &( shSumMomPos[ index_y ] ), - mom.x( ) * posX * normedWeighting, - ::alpaka::hierarchy::Threads{ } - ); - } + float_X const weighting = particle[weighting_]; + float_X const normedWeighting + = weighting / float_X(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE); + float3_X const mom = particle[momentum_] / weighting; + floatD_X const pos = particle[position_]; + lcellId_t const cellIdx = particle[localCellIdx_]; + DataSpace const frameCellOffset( + DataSpaceOperations::template map(cellIdx)); + auto const localSupercellStart + = (superCellIdx - mapper.getGuardingSuperCells()) * MappingDesc::SuperCellSize::toRT(); + int const index_y = frameCellOffset.y(); + auto const globalCellOffset = globalOffset + localSupercellStart + frameCellOffset; + float_X const posX = (float_X(globalCellOffset.x()) + pos.x()) * cellSize.x(); + + cupla::atomicAdd(acc, &(shCount_e[index_y]), normedWeighting, ::alpaka::hierarchy::Threads{}); + // weighted sum of single Electron values (Momentum = particle_momentum/weighting) + cupla::atomicAdd( + acc, + &(shSumMom2[index_y]), + mom.x() * mom.x() * normedWeighting, + ::alpaka::hierarchy::Threads{}); + cupla::atomicAdd( + acc, + &(shSumPos2[index_y]), + posX * posX * normedWeighting, + ::alpaka::hierarchy::Threads{}); + cupla::atomicAdd( + acc, + &(shSumMomPos[index_y]), + mom.x() * posX * normedWeighting, + ::alpaka::hierarchy::Threads{}); } - ); + }); // set frame to next particle frame - frame = pb.getPreviousFrame( frame ); - forEachParticle( - [ & ]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - /* Update particle for the next round. - * The frame list is traversed from the last to the first frame. - * Only the last frame can contain gaps therefore all following - * frames are fully filled with particles. - */ - currentParticleCtx[ idx ] = frame[ linearIdx ]; - } - ); + frame = pb.getPreviousFrame(frame); + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + /* Update particle for the next round. + * The frame list is traversed from the last to the first frame. + * Only the last frame can contain gaps therefore all following + * frames are fully filled with particles. + */ + currentParticleCtx[idx] = frame[linearIdx]; + }); } // wait that all virtual threads updated the shared memory - __syncthreads( ); - - const int gOffset = ( - ( - superCellIdx - - mapper.getGuardingSuperCells( ) - ) * - MappingDesc::SuperCellSize::toRT( ) ).y( ); - - ForEachIdx< SuperCellYDom >{ workerIdx }( - [ & ]( - uint32_t const linearIdx, - uint32_t const - ) - { - atomicAdd( - &( gSumMom2[ gOffset + linearIdx ] ), - static_cast< float_64 >( shSumMom2[ linearIdx ] ), - ::alpaka::hierarchy::Blocks{ } - ); - atomicAdd( - &( gSumPos2[ gOffset + linearIdx ] ), - static_cast< float_64 >( shSumPos2[ linearIdx ] ), - ::alpaka::hierarchy::Blocks{ } - ); - atomicAdd( - &( gSumMomPos[ gOffset + linearIdx ] ), - static_cast< float_64 >( shSumMomPos[ linearIdx ] ), - ::alpaka::hierarchy::Blocks{ } - ); - atomicAdd( - &( gCount_e[ gOffset + linearIdx ] ), - static_cast< float_64 >( shCount_e[ linearIdx ] ), - ::alpaka::hierarchy::Blocks{ } - ); - } - ); + cupla::__syncthreads(acc); + + const int gOffset + = ((superCellIdx - mapper.getGuardingSuperCells()) * MappingDesc::SuperCellSize::toRT()).y(); + + ForEachIdx{workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + cupla::atomicAdd( + acc, + &(gSumMom2[gOffset + linearIdx]), + static_cast(shSumMom2[linearIdx]), + ::alpaka::hierarchy::Blocks{}); + cupla::atomicAdd( + acc, + &(gSumPos2[gOffset + linearIdx]), + static_cast(shSumPos2[linearIdx]), + ::alpaka::hierarchy::Blocks{}); + cupla::atomicAdd( + acc, + &(gSumMomPos[gOffset + linearIdx]), + static_cast(shSumMomPos[linearIdx]), + ::alpaka::hierarchy::Blocks{}); + cupla::atomicAdd( + acc, + &(gCount_e[gOffset + linearIdx]), + static_cast(shCount_e[linearIdx]), + ::alpaka::hierarchy::Blocks{}); + }); } }; - template< typename ParticlesType > + template class CalcEmittance : public plugins::multi::ISlave { public: - struct Help : public plugins::multi::IHelp { - /** creates an instance of ISlave * * @tparam T_Slave type of the interface implementation (must inherit from ISlave) * @param help plugin defined help * @param id index of the plugin, range: [ 0;help->getNumPlugins( ) ) */ - std::shared_ptr< ISlave > create( - std::shared_ptr< IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) + std::shared_ptr create(std::shared_ptr& help, size_t const id, MappingDesc* cellDescription) { - return std::shared_ptr< ISlave >( - new CalcEmittance< ParticlesType >( - help, - id, - cellDescription - ) - ); + return std::shared_ptr(new CalcEmittance(help, id, cellDescription)); } // find all valid filter for the current used species - using EligibleFilters = typename MakeSeqFromNestedSeq< - typename bmpl::transform< - particles::filter::AllParticleFilters, - particles::traits::GenerateSolversIfSpeciesEligible< - bmpl::_1, - ParticlesType - > - >::type - >::type; + using EligibleFilters = typename MakeSeqFromNestedSeq>::type>::type; //! periodicity of computing the particle energy - plugins::multi::Option< std::string > notifyPeriod = { - "period", - "compute slice emittance[for each n-th step] enable plugin by setting a non-zero value" - }; - plugins::multi::Option< std::string > filter = { - "filter", - "particle filter: " - }; + plugins::multi::Option notifyPeriod + = {"period", "compute slice emittance[for each n-th step] enable plugin by setting a non-zero value"}; + plugins::multi::Option filter = {"filter", "particle filter: "}; //! string list with all possible particle filters std::string concatenatedFilterNames; - std::vector< std::string > allowedFilters; + std::vector allowedFilters; ///! method used by plugin controller to get --help description void registerHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) { - meta::ForEach< - EligibleFilters, - plugins::misc::AppendName< bmpl::_1 > - > getEligibleFilterNames; - getEligibleFilterNames( allowedFilters ); - - concatenatedFilterNames = plugins::misc::concatenateToString( - allowedFilters, - ", " - ); - - notifyPeriod.registerHelp( - desc, - masterPrefix + prefix - ); - filter.registerHelp( - desc, - masterPrefix + prefix, - std::string( "[" ) + concatenatedFilterNames + "]" - ); + meta::ForEach> getEligibleFilterNames; + getEligibleFilterNames(allowedFilters); + + concatenatedFilterNames = plugins::misc::concatenateToString(allowedFilters, ", "); + + notifyPeriod.registerHelp(desc, masterPrefix + prefix); + filter.registerHelp(desc, masterPrefix + prefix, std::string("[") + concatenatedFilterNames + "]"); } void expandHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) { } - void validateOptions( ) + void validateOptions() { - if( notifyPeriod.size( ) != filter.size( ) ) - throw std::runtime_error( name + ": parameter filter and period are not used the same number of times" ); + if(notifyPeriod.size() != filter.size()) + throw std::runtime_error( + name + ": parameter filter and period are not used the same number of times"); // check if user passed filter name is valid - for( auto const & filterName : filter ) + for(auto const& filterName : filter) { - if( - std::find( - allowedFilters.begin( ), - allowedFilters.end( ), - filterName - ) == allowedFilters.end( ) - ) + if(std::find(allowedFilters.begin(), allowedFilters.end(), filterName) == allowedFilters.end()) { - throw std::runtime_error( name + ": unknown filter '" + filterName + "'" ); + throw std::runtime_error(name + ": unknown filter '" + filterName + "'"); } } } - size_t getNumPlugins( ) const + size_t getNumPlugins() const { - return notifyPeriod.size( ); + return notifyPeriod.size(); } - std::string getDescription( ) const + std::string getDescription() const { return description; } - std::string getOptionPrefix( ) const + std::string getOptionPrefix() const { return prefix; } - std::string getName( ) const + std::string getName() const { return name; } @@ -467,25 +329,21 @@ namespace picongpu //! short description of the plugin std::string const description = "calculate the slice emittance of a species"; //! prefix used for command line arguments - std::string const prefix = ParticlesType::FrameType::getName( ) + std::string( "_emittance" ); + std::string const prefix = ParticlesType::FrameType::getName() + std::string("_emittance"); }; //! must be implemented by the user - static std::shared_ptr< plugins::multi::IHelp > getHelp( ) + static std::shared_ptr getHelp() { - return std::shared_ptr< plugins::multi::IHelp >( new Help{ } ); + return std::shared_ptr(new Help{}); } - CalcEmittance( - std::shared_ptr< plugins::multi::IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) : - m_help( std::static_pointer_cast< Help >( help ) ), - m_id( id ), - m_cellDescription( cellDescription ) + CalcEmittance(std::shared_ptr& help, size_t const id, MappingDesc* cellDescription) + : m_help(std::static_pointer_cast(help)) + , m_id(id) + , m_cellDescription(cellDescription) { - filename = m_help->getOptionPrefix( ) + "_" + m_help->filter.get( m_id ) + ".dat"; + filename = m_help->getOptionPrefix() + "_" + m_help->filter.get(m_id) + ".dat"; // reduce in same x-z plane constexpr uint32_t r_element = 1u; // y-direction @@ -496,462 +354,388 @@ namespace picongpu * spatial x and z direction to node with * lowest x and z position ("corner") and same x range */ - pmacc::GridController< simDim >& gc = pmacc::Environment< simDim >::get( ).GridController( ); - pmacc::math::Size_t< simDim > gpuDim = gc.getGpuNodes( ); - pmacc::math::Int< simDim > gpuPos = gc.getPosition( ); + pmacc::GridController& gc = pmacc::Environment::get().GridController(); + pmacc::math::Size_t gpuDim = gc.getGpuNodes(); + pmacc::math::Int gpuPos = gc.getPosition(); /* my plane means: the r_element I am calculating should be 1GPU in width */ - pmacc::math::Size_t< simDim > sizeTransversalPlane( gpuDim ); - sizeTransversalPlane[ r_element ] = 1; + pmacc::math::Size_t sizeTransversalPlane(gpuDim); + sizeTransversalPlane[r_element] = 1; // avoid deadlock for following, blocking MPI operations - __getTransactionEvent( ).waitForFinished( ); + __getTransactionEvent().waitForFinished(); - for( int planePos = 0; planePos <= ( int )gpuDim[ r_element ]; ++planePos ) + for(int planePos = 0; planePos <= (int) gpuDim[r_element]; ++planePos) { /* my plane means: the offset for the transversal plane to my r_element * should be zero */ - pmacc::math::Int< simDim > longOffset( pmacc::math::Int< simDim >::create( 0 ) ); - longOffset[ r_element ] = planePos; + pmacc::math::Int longOffset(pmacc::math::Int::create(0)); + longOffset[r_element] = planePos; - zone::SphericZone< simDim > zoneTransversalPlane( sizeTransversalPlane, longOffset ); + zone::SphericZone zoneTransversalPlane(sizeTransversalPlane, longOffset); /* Am I the lowest GPU in my plane? */ bool isGroupRoot = false; - bool isInGroup = ( gpuPos[ r_element ] == planePos ); - if( isInGroup ) + bool isInGroup = (gpuPos[r_element] == planePos); + if(isInGroup) { - pmacc::math::Int< simDim > inPlaneGPU( gpuPos ); - inPlaneGPU[ r_element ] = 0; - if( inPlaneGPU == pmacc::math::Int< simDim >::create( 0 ) ) + pmacc::math::Int inPlaneGPU(gpuPos); + inPlaneGPU[r_element] = 0; + if(inPlaneGPU == pmacc::math::Int::create(0)) isGroupRoot = true; } - algorithm::mpi::Reduce< simDim >* createReduce = - new algorithm::mpi::Reduce< simDim >( zoneTransversalPlane, - isGroupRoot ); - if( isInGroup ) + algorithm::mpi::Reduce* createReduce + = new algorithm::mpi::Reduce(zoneTransversalPlane, isGroupRoot); + if(isInGroup) { planeReduce = createReduce; isPlaneReduceRoot = isGroupRoot; } else - __delete( createReduce ); + __delete(createReduce); } /* Create communicator with ranks of each plane reduce root */ { /* Array with root ranks of the planeReduce operations */ - std::vector< int > planeReduceRootRanks( gc.getGlobalSize( ), -1 ); + std::vector planeReduceRootRanks(gc.getGlobalSize(), -1); /* Am I one of the planeReduce root ranks? my global rank : -1 */ - int myRootRank = gc.getGlobalRank( ) * isPlaneReduceRoot - - ( !isPlaneReduceRoot ); + int myRootRank = gc.getGlobalRank() * isPlaneReduceRoot - (!isPlaneReduceRoot); MPI_Group world_group, new_group; - MPI_CHECK( - MPI_Allgather( - &myRootRank, - 1, - MPI_INT, - planeReduceRootRanks.data( ), - 1, - MPI_INT, - gc.getCommunicator().getMPIComm() - ) - ); + MPI_CHECK(MPI_Allgather( + &myRootRank, + 1, + MPI_INT, + planeReduceRootRanks.data(), + 1, + MPI_INT, + gc.getCommunicator().getMPIComm())); /* remove all non-roots (-1 values) */ - std::sort( planeReduceRootRanks.begin( ), planeReduceRootRanks.end( ) ); - std::vector< int > ranks( - std::lower_bound( - planeReduceRootRanks.begin( ), - planeReduceRootRanks.end( ), - 0 - ), - planeReduceRootRanks.end( ) - ); - - MPI_CHECK( MPI_Comm_group( gc.getCommunicator().getMPIComm(), &world_group ) ); - MPI_CHECK( MPI_Group_incl( world_group, ranks.size( ), ranks.data( ), &new_group ) ); - MPI_CHECK( MPI_Comm_create( gc.getCommunicator().getMPIComm(), new_group, &commGather ) ); - MPI_CHECK( MPI_Group_free( &new_group ) ); - MPI_CHECK( MPI_Group_free( &world_group ) ); + std::sort(planeReduceRootRanks.begin(), planeReduceRootRanks.end()); + std::vector ranks( + std::lower_bound(planeReduceRootRanks.begin(), planeReduceRootRanks.end(), 0), + planeReduceRootRanks.end()); + + MPI_CHECK(MPI_Comm_group(gc.getCommunicator().getMPIComm(), &world_group)); + MPI_CHECK(MPI_Group_incl(world_group, ranks.size(), ranks.data(), &new_group)); + MPI_CHECK(MPI_Comm_create(gc.getCommunicator().getMPIComm(), new_group, &commGather)); + MPI_CHECK(MPI_Group_free(&new_group)); + MPI_CHECK(MPI_Group_free(&world_group)); } // decide which MPI-rank writes output int gatherRank = -1; - if( commGather != MPI_COMM_NULL ) - MPI_CHECK( MPI_Comm_rank( commGather, &gatherRank ) ); - writeToFile = ( gatherRank == 0 ); - - const SubGrid< simDim >& subGrid = Environment< simDim >::get( ).SubGrid( ); - gSumMom2 = new GridBuffer< - float_64, - DIM1 - >( DataSpace< DIM1 >( subGrid.getLocalDomain( ).size.y( ) ) ); - gSumPos2 = new GridBuffer< - float_64, - DIM1 - >( DataSpace< DIM1 >( subGrid.getLocalDomain( ).size.y( ) ) ); - gSumMomPos = new GridBuffer< - float_64, - DIM1 - >( DataSpace< DIM1 >( subGrid.getLocalDomain( ).size.y( ) ) ); - gCount_e = new GridBuffer< - float_64, - DIM1 - >( DataSpace< DIM1 >( subGrid.getLocalDomain( ).size.y( ) ) ); + if(commGather != MPI_COMM_NULL) + MPI_CHECK(MPI_Comm_rank(commGather, &gatherRank)); + writeToFile = (gatherRank == 0); + + const SubGrid& subGrid = Environment::get().SubGrid(); + gSumMom2 = new GridBuffer(DataSpace(subGrid.getLocalDomain().size.y())); + gSumPos2 = new GridBuffer(DataSpace(subGrid.getLocalDomain().size.y())); + gSumMomPos = new GridBuffer(DataSpace(subGrid.getLocalDomain().size.y())); + gCount_e = new GridBuffer(DataSpace(subGrid.getLocalDomain().size.y())); // only MPI rank that writes to file - if( writeToFile ) + if(writeToFile) { // open output file - outFile.open( - filename.c_str( ), - std::ofstream::out | std::ostream::trunc - ); + outFile.open(filename.c_str(), std::ofstream::out | std::ostream::trunc); // error handling - if( !outFile ) + if(!outFile) { - std::cerr << - "Can't open file [" << - filename << - "] for output, diasble plugin output. " << - std::endl; + std::cerr << "Can't open file [" << filename << "] for output, diasble plugin output. " + << std::endl; writeToFile = false; } } // set how often the plugin should be executed while PIConGPU is running - Environment< >::get( ).PluginConnector( ).setNotificationPeriod( - this, - m_help->notifyPeriod.get( id ) - ); + Environment<>::get().PluginConnector().setNotificationPeriod(this, m_help->notifyPeriod.get(id)); } - virtual ~CalcEmittance( ) + virtual ~CalcEmittance() { - if( writeToFile ) + if(writeToFile) { // flush cached data to file - outFile.flush( ) << std::endl; + outFile.flush() << std::endl; - if( outFile.fail( ) ) + if(outFile.fail()) std::cerr << "Error on flushing file [" << filename << "]. " << std::endl; - outFile.close( ); + outFile.close(); } // free global memory on GPU - __delete( gSumMom2 ); - __delete( gSumPos2 ); - __delete( gSumMomPos ); - __delete( gCount_e ); + __delete(gSumMom2); + __delete(gSumPos2); + __delete(gSumMomPos); + __delete(gCount_e); } /** this code is executed if the current time step is supposed to compute * gSumMom2, gSumPos2, gSumMomPos, gCount_e */ - void notify( uint32_t currentStep ) + void notify(uint32_t currentStep) { // call the method that calls the plugin kernel - calculateCalcEmittance < CORE + BORDER > ( currentStep ); + calculateCalcEmittance(currentStep); } - void restart( - uint32_t restartStep, - std::string const & restartDirectory - ) + void restart(uint32_t restartStep, std::string const& restartDirectory) { - if( !writeToFile ) + if(!writeToFile) return; - writeToFile = restoreTxtFile( - outFile, - filename, - restartStep, - restartDirectory - ); + writeToFile = restoreTxtFile(outFile, filename, restartStep, restartDirectory); } - void checkpoint( - uint32_t currentStep, - std::string const & checkpointDirectory - ) + void checkpoint(uint32_t currentStep, std::string const& checkpointDirectory) { - if( !writeToFile ) + if(!writeToFile) return; - checkpointTxtFile( - outFile, - filename, - currentStep, - checkpointDirectory - ); + checkpointTxtFile(outFile, filename, currentStep, checkpointDirectory); } private: //! method to call analysis and plugin-kernel calls - template< uint32_t AREA > - void calculateCalcEmittance( uint32_t currentStep ) + template + void calculateCalcEmittance(uint32_t currentStep) { - DataConnector &dc = Environment< >::get( ).DataConnector( ); + DataConnector& dc = Environment<>::get().DataConnector(); // use data connector to get particle data - auto particles = dc.get< ParticlesType >( - ParticlesType::FrameType::getName( ), - true - ); - - gSumMom2->getDeviceBuffer( ).setValue( 0.0 ); - gSumPos2->getDeviceBuffer( ).setValue( 0.0 ); - gSumMomPos->getDeviceBuffer( ).setValue( 0.0 ); - gCount_e->getDeviceBuffer( ).setValue( 0.0 ); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - AreaMapping< - AREA, - MappingDesc - > mapper( *m_cellDescription ); - - auto kernel = PMACC_KERNEL( KernelCalcEmittance< numWorkers >{ } )( - mapper.getGridDim( ), - numWorkers - ); + auto particles = dc.get(ParticlesType::FrameType::getName(), true); + + gSumMom2->getDeviceBuffer().setValue(0.0); + gSumPos2->getDeviceBuffer().setValue(0.0); + gSumMomPos->getDeviceBuffer().setValue(0.0); + gCount_e->getDeviceBuffer().setValue(0.0); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + AreaMapping mapper(*m_cellDescription); + + auto kernel = PMACC_KERNEL(KernelCalcEmittance{})(mapper.getGridDim(), numWorkers); // Some variables required so that it is possible for the kernel // to calculate the absolute position of the particles - DataSpace< simDim > localSize( m_cellDescription->getGridLayout( ).getDataSpaceWithoutGuarding( ) ); - const SubGrid< simDim >& subGrid = Environment< simDim >::get( ).SubGrid( ); - const int subGridY = subGrid.getGlobalDomain( ).size.y( ); - auto movingWindow = MovingWindow::getInstance( ).getWindow( currentStep ); - DataSpace< simDim > globalOffset( subGrid.getLocalDomain( ).offset ); + DataSpace localSize(m_cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); + const SubGrid& subGrid = Environment::get().SubGrid(); + const int subGridY = subGrid.getGlobalDomain().size.y(); + auto movingWindow = MovingWindow::getInstance().getWindow(currentStep); + DataSpace globalOffset(subGrid.getLocalDomain().offset); auto binaryKernel = std::bind( kernel, - particles->getDeviceParticlesBox( ), - gSumMom2->getDeviceBuffer( ).getDataBox( ), - gSumPos2->getDeviceBuffer( ).getDataBox( ), - gSumMomPos->getDeviceBuffer( ).getDataBox( ), - gCount_e->getDeviceBuffer( ).getDataBox( ), + particles->getDeviceParticlesBox(), + gSumMom2->getDeviceBuffer().getDataBox(), + gSumPos2->getDeviceBuffer().getDataBox(), + gSumMomPos->getDeviceBuffer().getDataBox(), + gCount_e->getDeviceBuffer().getDataBox(), globalOffset, subGridY, mapper, - std::placeholders::_1 - ); - - meta::ForEach< - typename Help::EligibleFilters, - plugins::misc::ExecuteIfNameIsEqual< bmpl::_1 > - >{ }( - m_help->filter.get( m_id ), + std::placeholders::_1); + + meta::ForEach>{}( + m_help->filter.get(m_id), currentStep, - binaryKernel - ); + binaryKernel); - dc.releaseData( ParticlesType::FrameType::getName( ) ); + dc.releaseData(ParticlesType::FrameType::getName()); // get gSum, ... from GPU - gSumMom2->deviceToHost( ); - gSumPos2->deviceToHost( ); - gSumMomPos->deviceToHost( ); - gCount_e->deviceToHost( ); - - container::HostBuffer< float_64, DIM1 > reducedSumMom2( subGrid.getLocalDomain( ).size.y( ) ); - container::HostBuffer< float_64, DIM1 > reducedSumPos2( subGrid.getLocalDomain( ).size.y( ) ); - container::HostBuffer< float_64, DIM1 > reducedSumMomPos( subGrid.getLocalDomain( ).size.y( ) ); - container::HostBuffer< float_64, DIM1 > reducedCount_e( subGrid.getLocalDomain( ).size.y( ) ); - reducedSumMom2.assign( 0.0 ); - reducedSumPos2.assign( 0.0 ); - reducedSumMomPos.assign( 0.0 ); - reducedCount_e.assign( 0.0 ); + gSumMom2->deviceToHost(); + gSumPos2->deviceToHost(); + gSumMomPos->deviceToHost(); + gCount_e->deviceToHost(); + + container::HostBuffer reducedSumMom2(subGrid.getLocalDomain().size.y()); + container::HostBuffer reducedSumPos2(subGrid.getLocalDomain().size.y()); + container::HostBuffer reducedSumMomPos(subGrid.getLocalDomain().size.y()); + container::HostBuffer reducedCount_e(subGrid.getLocalDomain().size.y()); + reducedSumMom2.assign(0.0); + reducedSumPos2.assign(0.0); + reducedSumMomPos.assign(0.0); + reducedCount_e.assign(0.0); // add gSum values from all GPUs using MPI - planeReduce->template operator( )( /* parameters: dest, source */ - reducedSumMom2, - gSumMom2->getHostBuffer( ).cartBuffer( ), - /* the functors return value will be written to dst */ - pmacc::algorithm::functor::Add( ) - ); - planeReduce->template operator( )( /* parameters: dest, source */ - reducedSumPos2, - gSumPos2->getHostBuffer( ).cartBuffer( ), - /* the functors return value will be written to dst */ - pmacc::algorithm::functor::Add( ) - ); - planeReduce->template operator( )( /* parameters: dest, source */ - reducedSumMomPos, - gSumMomPos->getHostBuffer( ).cartBuffer( ), - /* the functors return value will be written to dst */ - pmacc::algorithm::functor::Add( ) - ); - planeReduce->template operator( )( /* parameters: dest, source */ - reducedCount_e, - gCount_e->getHostBuffer( ).cartBuffer( ), - /* the functors return value will be written to dst */ - pmacc::algorithm::functor::Add( ) - ); + planeReduce->template operator()(/* parameters: dest, source */ + reducedSumMom2, + gSumMom2->getHostBuffer().cartBuffer(), + /* the functors return value will be written to dst */ + pmacc::algorithm::functor::Add()); + planeReduce->template operator()(/* parameters: dest, source */ + reducedSumPos2, + gSumPos2->getHostBuffer().cartBuffer(), + /* the functors return value will be written to dst */ + pmacc::algorithm::functor::Add()); + planeReduce->template operator()(/* parameters: dest, source */ + reducedSumMomPos, + gSumMomPos->getHostBuffer().cartBuffer(), + /* the functors return value will be written to dst */ + pmacc::algorithm::functor::Add()); + planeReduce->template operator()(/* parameters: dest, source */ + reducedCount_e, + gCount_e->getHostBuffer().cartBuffer(), + /* the functors return value will be written to dst */ + pmacc::algorithm::functor::Add()); /** all non-reduce-root processes are done now */ - if( ! isPlaneReduceRoot ) + if(!isPlaneReduceRoot) return; // gather to file writer - container::HostBuffer< float_64, DIM1 > globalSumMom2( subGrid.getGlobalDomain( ).size.y( ) ); - container::HostBuffer< float_64, DIM1 > globalSumPos2( subGrid.getGlobalDomain( ).size.y( ) ); - container::HostBuffer< float_64, DIM1 > globalSumMomPos( subGrid.getGlobalDomain( ).size.y( ) ); - container::HostBuffer< float_64, DIM1 > globalCount_e( subGrid.getGlobalDomain( ).size.y( ) ); + container::HostBuffer globalSumMom2(subGrid.getGlobalDomain().size.y()); + container::HostBuffer globalSumPos2(subGrid.getGlobalDomain().size.y()); + container::HostBuffer globalSumMomPos(subGrid.getGlobalDomain().size.y()); + container::HostBuffer globalCount_e(subGrid.getGlobalDomain().size.y()); // gather y offsets, so we can store our gathered data in the right order int gatherSize = -1; - MPI_CHECK( MPI_Comm_size( commGather, &gatherSize ) ); - std::vector< int > y_offsets( gatherSize ); - std::vector< int > y_sizes( gatherSize ); - long int const y_off = subGrid.getLocalDomain( ).offset.y( ); - int const y_siz = subGrid.getLocalDomain( ).size.y( ); - - MPI_CHECK( MPI_Gather( - &y_off, - 1, - MPI_INT, - y_offsets.data( ), - 1, - MPI_INT, - 0, - commGather - ) ); - MPI_CHECK( MPI_Gather( - &y_siz, - 1, - MPI_INT, - y_sizes.data( ), - 1, - MPI_INT, - 0, - commGather - ) ); + MPI_CHECK(MPI_Comm_size(commGather, &gatherSize)); + std::vector y_offsets(gatherSize); + std::vector y_sizes(gatherSize); + long int const y_off = subGrid.getLocalDomain().offset.y(); + int const y_siz = subGrid.getLocalDomain().size.y(); + MPI_CHECK(MPI_Gather(&y_off, 1, MPI_INT, y_offsets.data(), 1, MPI_INT, 0, commGather)); + MPI_CHECK(MPI_Gather(&y_siz, 1, MPI_INT, y_sizes.data(), 1, MPI_INT, 0, commGather)); - std::vector< int > recvcounts( gatherSize, 1 ); - MPI_CHECK( MPI_Gatherv( - reducedSumMom2.getDataPointer( ), - subGrid.getLocalDomain( ).size.y( ), + std::vector recvcounts(gatherSize, 1); + + MPI_CHECK(MPI_Gatherv( + reducedSumMom2.getDataPointer(), + subGrid.getLocalDomain().size.y(), MPI_DOUBLE, - globalSumMom2.getDataPointer( ), - y_sizes.data( ), - y_offsets.data( ), + globalSumMom2.getDataPointer(), + y_sizes.data(), + y_offsets.data(), MPI_DOUBLE, 0, - commGather - ) ); - MPI_CHECK( MPI_Gatherv( - reducedSumPos2.getDataPointer( ), - subGrid.getLocalDomain( ).size.y( ), + commGather)); + MPI_CHECK(MPI_Gatherv( + reducedSumPos2.getDataPointer(), + subGrid.getLocalDomain().size.y(), MPI_DOUBLE, - globalSumPos2.getDataPointer( ), - y_sizes.data( ), - y_offsets.data( ), + globalSumPos2.getDataPointer(), + y_sizes.data(), + y_offsets.data(), MPI_DOUBLE, 0, - commGather - ) ); - MPI_CHECK( MPI_Gatherv( - reducedSumMomPos.getDataPointer( ), - subGrid.getLocalDomain( ).size.y( ), + commGather)); + MPI_CHECK(MPI_Gatherv( + reducedSumMomPos.getDataPointer(), + subGrid.getLocalDomain().size.y(), MPI_DOUBLE, - globalSumMomPos.getDataPointer( ), - y_sizes.data( ), - y_offsets.data( ), + globalSumMomPos.getDataPointer(), + y_sizes.data(), + y_offsets.data(), MPI_DOUBLE, 0, - commGather - ) ); - MPI_CHECK( MPI_Gatherv( - reducedCount_e.getDataPointer( ), - subGrid.getLocalDomain( ).size.y( ), + commGather)); + MPI_CHECK(MPI_Gatherv( + reducedCount_e.getDataPointer(), + subGrid.getLocalDomain().size.y(), MPI_DOUBLE, - globalCount_e.getDataPointer( ), - y_sizes.data( ), - y_offsets.data( ), + globalCount_e.getDataPointer(), + y_sizes.data(), + y_offsets.data(), MPI_DOUBLE, 0, - commGather - ) ); + commGather)); /* print timestep, emittance to file: */ - if( writeToFile ) + if(writeToFile) { - using dbl = std::numeric_limits< float_64 >; - outFile.precision( dbl::digits10 ); - if ( currentStep > 0.0 ){ - int startWindow_y = movingWindow.globalDimensions.offset.y( ); - int endWindow_y = movingWindow.globalDimensions.size.y( ) + startWindow_y; - if ( fisttimestep == true ) + using dbl = std::numeric_limits; + outFile.precision(dbl::digits10); + if(currentStep > 0.0) + { + int startWindow_y = movingWindow.globalDimensions.offset.y(); + int endWindow_y = movingWindow.globalDimensions.size.y() + startWindow_y; + if(fisttimestep == true) { outFile << "#step emit_all" << std::scientific; - for ( int i = startWindow_y; i < ( endWindow_y + 10 ); i += 10 ) + for(int i = startWindow_y; i < (endWindow_y + 10); i += 10) { outFile << " " << i * SI::CELL_HEIGHT_SI; } outFile << std::endl; fisttimestep = false; } - outFile << currentStep << " " - << std::scientific; + outFile << currentStep << " " << std::scientific; long double numElec_all = 0.0; long double ux2_all = 0.0; long double pos2_SI_all = 0.0; long double xux_all = 0.0; - for ( int i = startWindow_y; i < endWindow_y; i++ ) + for(int i = startWindow_y; i < endWindow_y; i++) { - numElec_all += static_cast< long double >( globalCount_e.getDataPointer( )[ i ] ); - ux2_all += static_cast< long double >( globalSumMom2.getDataPointer( )[ i ] ) * UNIT_MASS * UNIT_MASS / ( SI::ELECTRON_MASS_SI * SI::ELECTRON_MASS_SI ); - pos2_SI_all += static_cast< long double >( globalSumPos2.getDataPointer( )[ i ] ) * UNIT_LENGTH * UNIT_LENGTH ; - xux_all += static_cast< long double >( globalSumMomPos.getDataPointer( )[ i ] ) * UNIT_MASS * UNIT_LENGTH / SI::ELECTRON_MASS_SI; + numElec_all += static_cast(globalCount_e.getDataPointer()[i]); + ux2_all += static_cast(globalSumMom2.getDataPointer()[i]) * UNIT_MASS * UNIT_MASS + / (SI::ELECTRON_MASS_SI * SI::ELECTRON_MASS_SI); + pos2_SI_all + += static_cast(globalSumPos2.getDataPointer()[i]) * UNIT_LENGTH * UNIT_LENGTH; + xux_all += static_cast(globalSumMomPos.getDataPointer()[i]) * UNIT_MASS + * UNIT_LENGTH / SI::ELECTRON_MASS_SI; } - /* the scaling with normalized weighting (weighting / particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE) - * is compendated by the division by (normalized) number of particles + /* the scaling with normalized weighting (weighting / + * particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE) is compendated by the division by + * (normalized) number of particles */ - float_64 emit_all = algorithms::math::sqrt( - static_cast< float_64 >( pos2_SI_all ) * static_cast< float_64 >( ux2_all ) - - static_cast< float_64 >( xux_all ) * static_cast< float_64 >( xux_all ) - ) / static_cast< float_64 >( numElec_all ); + float_64 emit_all = math::sqrt( + static_cast(pos2_SI_all) * static_cast(ux2_all) + - static_cast(xux_all) * static_cast(xux_all)) + / static_cast(numElec_all); - if ( emit_all > 0.0 ){ + if(emit_all > 0.0) + { outFile << emit_all << " "; } - else { + else + { outFile << "0.0 "; } - for ( int i = startWindow_y; i < endWindow_y; i += 10 ) + for(int i = startWindow_y; i < endWindow_y; i += 10) { - float_64 numElec = globalCount_e.getDataPointer( )[ i ]; - float_64 mom2_SI = globalSumMom2.getDataPointer( )[ i ] * UNIT_MASS * UNIT_SPEED * UNIT_MASS * UNIT_SPEED; - float_64 pos2_SI = globalSumPos2.getDataPointer( )[ i ] * UNIT_LENGTH * UNIT_LENGTH ; - float_64 mompos_SI = globalSumMomPos.getDataPointer( )[ i ] * UNIT_MASS * UNIT_SPEED * UNIT_LENGTH; - for ( int j = i + 1; j < i + 10 && j < endWindow_y; j++ ){ - numElec += globalCount_e.getDataPointer( )[ j ]; - mom2_SI += globalSumMom2.getDataPointer( )[ j ] * UNIT_MASS * UNIT_SPEED * UNIT_MASS * UNIT_SPEED; - pos2_SI += globalSumPos2.getDataPointer( )[ j ] * UNIT_LENGTH * UNIT_LENGTH; - mompos_SI += globalSumMomPos.getDataPointer( )[ j ] * UNIT_MASS * UNIT_SPEED * UNIT_LENGTH; + float_64 numElec = globalCount_e.getDataPointer()[i]; + float_64 mom2_SI + = globalSumMom2.getDataPointer()[i] * UNIT_MASS * UNIT_SPEED * UNIT_MASS * UNIT_SPEED; + float_64 pos2_SI = globalSumPos2.getDataPointer()[i] * UNIT_LENGTH * UNIT_LENGTH; + float_64 mompos_SI + = globalSumMomPos.getDataPointer()[i] * UNIT_MASS * UNIT_SPEED * UNIT_LENGTH; + for(int j = i + 1; j < i + 10 && j < endWindow_y; j++) + { + numElec += globalCount_e.getDataPointer()[j]; + mom2_SI + += globalSumMom2.getDataPointer()[j] * UNIT_MASS * UNIT_SPEED * UNIT_MASS * UNIT_SPEED; + pos2_SI += globalSumPos2.getDataPointer()[j] * UNIT_LENGTH * UNIT_LENGTH; + mompos_SI += globalSumMomPos.getDataPointer()[j] * UNIT_MASS * UNIT_SPEED * UNIT_LENGTH; } - float_64 ux2 = mom2_SI / ( UNIT_SPEED * UNIT_SPEED * SI::ELECTRON_MASS_SI * SI::ELECTRON_MASS_SI ); - float_64 xux = mompos_SI / ( UNIT_SPEED * SI::ELECTRON_MASS_SI ); - float_64 emit = algorithms::math::sqrt( ( pos2_SI * ux2 - xux * xux ) ) / numElec; - if( numElec < std::numeric_limits< float_64 >::epsilon( ) ){ + float_64 ux2 + = mom2_SI / (UNIT_SPEED * UNIT_SPEED * SI::ELECTRON_MASS_SI * SI::ELECTRON_MASS_SI); + float_64 xux = mompos_SI / (UNIT_SPEED * SI::ELECTRON_MASS_SI); + float_64 emit = math::sqrt((pos2_SI * ux2 - xux * xux)) / numElec; + if(numElec < std::numeric_limits::epsilon()) + { outFile << "0.0 "; } - else if( emit > 0.0 && emit < std::numeric_limits< float_64 >::max( ) ){ + else if(emit > 0.0 && emit < std::numeric_limits::max()) + { outFile << emit << " "; } - else{ + else + { outFile << "-0.0 "; } } @@ -960,25 +744,13 @@ namespace picongpu } } - GridBuffer< - float_64, - DIM1 - >* gSumMom2 = nullptr; + GridBuffer* gSumMom2 = nullptr; - GridBuffer< - float_64, - DIM1 - >* gSumPos2 = nullptr; + GridBuffer* gSumPos2 = nullptr; - GridBuffer< - float_64, - DIM1 - >* gSumMomPos = nullptr; + GridBuffer* gSumMomPos = nullptr; - GridBuffer< - float_64, - DIM1 - >* gCount_e = nullptr; + GridBuffer* gCount_e = nullptr; MappingDesc* m_cellDescription = nullptr; @@ -996,54 +768,37 @@ namespace picongpu bool fisttimestep = true; /** reduce functor to a single host per plane */ - pmacc::algorithm::mpi::Reduce< simDim >* planeReduce = nullptr; + pmacc::algorithm::mpi::Reduce* planeReduce = nullptr; bool isPlaneReduceRoot = false; /** MPI communicator that contains the root ranks of the \p planeReduce */ MPI_Comm commGather = MPI_COMM_NULL; - std::shared_ptr< Help > m_help; + std::shared_ptr m_help; size_t m_id; }; -namespace particles -{ -namespace traits -{ - template< - typename T_Species, - typename T_UnspecifiedSpecies - > - struct SpeciesEligibleForSolver< - T_Species, - CalcEmittance< T_UnspecifiedSpecies > - > + namespace particles { - using FrameType = typename T_Species::FrameType; - - // this plugin needs at least the weighting and momentum attributes - using RequiredIdentifiers = MakeSeq_t< - weighting, - momentum - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - // and also a mass ratio for energy calculation from momentum - using SpeciesHasFlags = typename pmacc::traits::HasFlag< - FrameType, - massRatio< > - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasFlags - >; - }; -} // namespace traits -} // namespace particles + namespace traits + { + template + struct SpeciesEligibleForSolver> + { + using FrameType = typename T_Species::FrameType; + + // this plugin needs at least the weighting and momentum attributes + using RequiredIdentifiers = MakeSeq_t; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + // and also a mass ratio for energy calculation from momentum + using SpeciesHasFlags = typename pmacc::traits::HasFlag>::type; + + using type = typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/plugins/EnergyFields.hpp b/include/picongpu/plugins/EnergyFields.hpp index 84ea8bde9d..764ff45498 100644 --- a/include/picongpu/plugins/EnergyFields.hpp +++ b/include/picongpu/plugins/EnergyFields.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Benjamin Worpitz, Richard Pausch * * This file is part of PIConGPU. @@ -45,236 +45,228 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -namespace po = boost::program_options; + namespace po = boost::program_options; -namespace energyFields -{ - -template -struct cast64Bit -{ - typedef typename TypeCast::result result; - - HDINLINE result operator()(const T_Type& value) const - { - return precisionCast(value); - } -}; - -template -struct squareComponentWise -{ - using result = T_Type; - - HDINLINE result operator()(const T_Type& value) const + namespace energyFields { - return value*value; - } -}; - -} - -class EnergyFields : public ISimulationPlugin -{ -private: - MappingDesc *cellDescription; - std::string notifyPeriod; - - std::string pluginName; - std::string pluginPrefix; - std::string filename; - std::ofstream outFile; - /*only rank 0 create a file*/ - bool writeToFile; - - mpi::MPIReduce mpiReduce; - - nvidia::reduce::Reduce* localReduce; - - typedef promoteType::type EneVectorType; + template + struct cast64Bit + { + typedef typename TypeCast::result result; -public: + HDINLINE result operator()(const T_Type& value) const + { + return precisionCast(value); + } + }; - EnergyFields() : - cellDescription(nullptr), - pluginName("EnergyFields: calculate the energy of the fields"), - pluginPrefix(std::string("fields_energy")), - filename(pluginPrefix + ".dat"), - writeToFile(false), - localReduce(nullptr) - { - Environment<>::get().PluginConnector().registerPlugin(this); - } + template + struct squareComponentWise + { + using result = T_Type; - virtual ~EnergyFields() - { + HDINLINE result operator()(const T_Type& value) const + { + return value * value; + } + }; - } + } // namespace energyFields - void notify(uint32_t currentStep) + class EnergyFields : public ISimulationPlugin { - getEnergyFields(currentStep); - } + private: + MappingDesc* cellDescription; + std::string notifyPeriod; + + std::string pluginName; + std::string pluginPrefix; + std::string filename; + std::ofstream outFile; + /*only rank 0 create a file*/ + bool writeToFile; + + mpi::MPIReduce mpiReduce; + + nvidia::reduce::Reduce* localReduce; + + typedef promoteType::type EneVectorType; + + public: + EnergyFields() + : cellDescription(nullptr) + , pluginName("EnergyFields: calculate the energy of the fields") + , pluginPrefix(std::string("fields_energy")) + , filename(pluginPrefix + ".dat") + , writeToFile(false) + , localReduce(nullptr) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } - void pluginRegisterHelp(po::options_description& desc) - { - desc.add_options() - ((pluginPrefix + ".period").c_str(), - po::value (¬ifyPeriod), "enable plugin [for each n-th step]"); - } + virtual ~EnergyFields() + { + } - std::string pluginGetName() const - { - return pluginName; - } + void notify(uint32_t currentStep) + { + getEnergyFields(currentStep); + } - void setMappingDescription(MappingDesc *cellDescription) - { - this->cellDescription = cellDescription; - } + void pluginRegisterHelp(po::options_description& desc) + { + desc.add_options()( + (pluginPrefix + ".period").c_str(), + po::value(¬ifyPeriod), + "enable plugin [for each n-th step]"); + } -private: + std::string pluginGetName() const + { + return pluginName; + } - void pluginLoad() - { - if(!notifyPeriod.empty()) + void setMappingDescription(MappingDesc* cellDescription) { - localReduce = new nvidia::reduce::Reduce(1024); - writeToFile = mpiReduce.hasResult(mpi::reduceMethods::Reduce()); + this->cellDescription = cellDescription; + } - if (writeToFile) + private: + void pluginLoad() + { + if(!notifyPeriod.empty()) { - outFile.open(filename.c_str(), std::ofstream::out | std::ostream::trunc); - if (!outFile) + localReduce = new nvidia::reduce::Reduce(1024); + writeToFile = mpiReduce.hasResult(mpi::reduceMethods::Reduce()); + + if(writeToFile) { - std::cerr << "Can't open file [" << filename << "] for output, disable plugin output. " << std::endl; - writeToFile = false; + outFile.open(filename.c_str(), std::ofstream::out | std::ostream::trunc); + if(!outFile) + { + std::cerr << "Can't open file [" << filename << "] for output, disable plugin output. " + << std::endl; + writeToFile = false; + } + // create header of the file + outFile << "#step total[Joule] Bx[Joule] By[Joule] Bz[Joule] Ex[Joule] Ey[Joule] Ez[Joule]" + << " \n"; } - //create header of the file - outFile << "#step total[Joule] Bx[Joule] By[Joule] Bz[Joule] Ex[Joule] Ey[Joule] Ez[Joule]" << " \n"; + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); } - Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); } - } - void pluginUnload() - { - if(!notifyPeriod.empty()) + void pluginUnload() { - if (writeToFile) + if(!notifyPeriod.empty()) { - outFile.flush(); - outFile << std::endl; //now all data are written to file - if (outFile.fail()) - std::cerr << "Error on flushing file [" << filename << "]. " << std::endl; - outFile.close(); + if(writeToFile) + { + outFile.flush(); + outFile << std::endl; // now all data are written to file + if(outFile.fail()) + std::cerr << "Error on flushing file [" << filename << "]. " << std::endl; + outFile.close(); + } + __delete(localReduce); } - __delete(localReduce); } - } - void restart(uint32_t restartStep, const std::string restartDirectory) - { - if( !writeToFile ) - return; + void restart(uint32_t restartStep, const std::string restartDirectory) + { + if(!writeToFile) + return; - writeToFile = restoreTxtFile( outFile, - filename, - restartStep, - restartDirectory ); - } + writeToFile = restoreTxtFile(outFile, filename, restartStep, restartDirectory); + } - void checkpoint(uint32_t currentStep, const std::string checkpointDirectory) - { - if( !writeToFile ) - return; + void checkpoint(uint32_t currentStep, const std::string checkpointDirectory) + { + if(!writeToFile) + return; - checkpointTxtFile( outFile, - filename, - currentStep, - checkpointDirectory ); - } + checkpointTxtFile(outFile, filename, currentStep, checkpointDirectory); + } - void getEnergyFields(uint32_t currentStep) - { - DataConnector &dc = Environment<>::get().DataConnector(); + void getEnergyFields(uint32_t currentStep) + { + DataConnector& dc = Environment<>::get().DataConnector(); - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); - auto fieldB = dc.get< FieldB >( FieldB::getName(), true ); + auto fieldE = dc.get(FieldE::getName(), true); + auto fieldB = dc.get(FieldB::getName(), true); - /* idx == 0 -> fieldB - * idx == 1 -> fieldE - */ - EneVectorType globalFieldEnergy[2]; - globalFieldEnergy[0]=EneVectorType::create(0.0); - globalFieldEnergy[1]=EneVectorType::create(0.0); + /* idx == 0 -> fieldB + * idx == 1 -> fieldE + */ + EneVectorType globalFieldEnergy[2]; + globalFieldEnergy[0] = EneVectorType::create(0.0); + globalFieldEnergy[1] = EneVectorType::create(0.0); - EneVectorType localReducedFieldEnergy[2]; - localReducedFieldEnergy[0] = reduceField(fieldB); - localReducedFieldEnergy[1] = reduceField(fieldE); + EneVectorType localReducedFieldEnergy[2]; + localReducedFieldEnergy[0] = reduceField(fieldB); + localReducedFieldEnergy[1] = reduceField(fieldE); - mpiReduce(nvidia::functors::Add(), - globalFieldEnergy, - localReducedFieldEnergy, - 2, - mpi::reduceMethods::Reduce()); + mpiReduce( + nvidia::functors::Add(), + globalFieldEnergy, + localReducedFieldEnergy, + 2, + mpi::reduceMethods::Reduce()); - float_64 energyFieldBReduced=0.0; - float_64 energyFieldEReduced=0.0; + float_64 energyFieldBReduced = 0.0; + float_64 energyFieldEReduced = 0.0; - for(int d=0; d; + if(writeToFile) + { + using dbl = std::numeric_limits; - outFile.precision(dbl::digits10); - outFile << currentStep << " " << std::scientific << globalEnergy * UNIT_ENERGY << " " - << (globalFieldEnergy[0] * UNIT_ENERGY).toString(" ","") << " " - << (globalFieldEnergy[1] * UNIT_ENERGY).toString(" ","") << std::endl; + outFile.precision(dbl::digits10); + outFile << currentStep << " " << std::scientific << globalEnergy * UNIT_ENERGY << " " + << (globalFieldEnergy[0] * UNIT_ENERGY).toString(" ", "") << " " + << (globalFieldEnergy[1] * UNIT_ENERGY).toString(" ", "") << std::endl; + } } - } -private: - - template - EneVectorType reduceField( std::shared_ptr< T_Field > field ) - { - /*define stacked DataBox's for reduce algorithm*/ - typedef DataBoxUnaryTransform TransformedBox; - typedef DataBoxUnaryTransform Box64bit; - using D1Box = DataBoxDim1Access; - - /* reduce field E*/ - DataSpace fieldSize = field->getGridLayout().getDataSpaceWithoutGuarding(); - DataSpace fieldGuard = field->getGridLayout().getGuard(); + private: + template + EneVectorType reduceField(std::shared_ptr field) + { + /*define stacked DataBox's for reduce algorithm*/ + typedef DataBoxUnaryTransform + TransformedBox; + typedef DataBoxUnaryTransform Box64bit; + using D1Box = DataBoxDim1Access; - TransformedBox fieldTransform(field->getDeviceDataBox().shift(fieldGuard)); - Box64bit field64bit(fieldTransform); - D1Box d1Access(field64bit, fieldSize); + /* reduce field E*/ + DataSpace fieldSize = field->getGridLayout().getDataSpaceWithoutGuarding(); + DataSpace fieldGuard = field->getGridLayout().getGuard(); - EneVectorType fieldEnergyReduced = (*localReduce)(nvidia::functors::Add(), - d1Access, - fieldSize.productOfComponents()); + TransformedBox fieldTransform(field->getDeviceDataBox().shift(fieldGuard)); + Box64bit field64bit(fieldTransform); + D1Box d1Access(field64bit, fieldSize); - return fieldEnergyReduced; - } + EneVectorType fieldEnergyReduced + = (*localReduce)(nvidia::functors::Add(), d1Access, fieldSize.productOfComponents()); -}; + return fieldEnergyReduced; + } + }; -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/plugins/EnergyParticles.hpp b/include/picongpu/plugins/EnergyParticles.hpp index 0c2881dbcf..974261195f 100644 --- a/include/picongpu/plugins/EnergyParticles.hpp +++ b/include/picongpu/plugins/EnergyParticles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, * Rene Widera, Richard Pausch, Benjamin Worpitz * * This file is part of PIConGPU. @@ -54,17 +54,15 @@ namespace picongpu { - /** accumulate the kinetic and total energy * * All energies are summed over all particles of a species. * * @tparam T_numWorkers number of workers */ - template< uint32_t T_numWorkers > + template struct KernelEnergyParticles { - /** accumulate particle energies * * @tparam T_ParBox pmacc::ParticlesBox, particle box type @@ -76,327 +74,192 @@ namespace picongpu * (two elements 0 == kinetic; 1 == total energy) * @param mapper functor to map a block to a supercell */ - template< - typename T_ParBox, - typename T_DBox, - typename T_Mapping, - typename T_Acc, - typename T_Filter - > - DINLINE void operator( )( - T_Acc const & acc, - T_ParBox pb, - T_DBox gEnergy, - T_Mapping mapper, - T_Filter filter - ) const + template + DINLINE void operator()(T_Acc const& acc, T_ParBox pb, T_DBox gEnergy, T_Mapping mapper, T_Filter filter) const { using namespace mappings::threads; constexpr uint32_t numWorkers = T_numWorkers; - constexpr uint32_t numParticlesPerFrame = pmacc::math::CT::volume< - typename T_ParBox::FrameType::SuperCellSize - >::type::value; + constexpr uint32_t numParticlesPerFrame + = pmacc::math::CT::volume::type::value; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; using FramePtr = typename T_ParBox::FramePtr; // shared kinetic energy - PMACC_SMEM( - acc, - shEnergyKin, - float_X - ); + PMACC_SMEM(acc, shEnergyKin, float_X); // shared total energy - PMACC_SMEM( - acc, - shEnergy, - float_X - ); + PMACC_SMEM(acc, shEnergy, float_X); - using ParticleDomCfg = IdxConfig< - numParticlesPerFrame, - numWorkers - >; + using ParticleDomCfg = IdxConfig; // sum kinetic energy for all particles touched by the virtual thread - float_X localEnergyKin( 0.0 ); - float_X localEnergy( 0.0 ); + float_X localEnergyKin(0.0); + float_X localEnergy(0.0); - using MasterOnly = IdxConfig< - 1, - numWorkers - >; + using MasterOnly = IdxConfig<1, numWorkers>; - ForEachIdx< MasterOnly >{ workerIdx }( - [&]( - uint32_t const, - uint32_t const - ) - { - // set shared kinetic energy to zero - shEnergyKin = float_X( 0.0 ); - // set shared total energy to zero - shEnergy = float_X( 0.0 ); - } - ); + ForEachIdx{workerIdx}([&](uint32_t const, uint32_t const) { + // set shared kinetic energy to zero + shEnergyKin = float_X(0.0); + // set shared total energy to zero + shEnergy = float_X(0.0); + }); - __syncthreads( ); + cupla::__syncthreads(acc); - DataSpace< simDim > const superCellIdx( mapper.getSuperCellIndex( - DataSpace< simDim >( blockIdx ) - )); + DataSpace const superCellIdx(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); // each virtual thread is working on an own frame - FramePtr frame = pb.getLastFrame( superCellIdx ); + FramePtr frame = pb.getLastFrame(superCellIdx); // end kernel if we have no frames within the supercell - if( !frame.isValid( ) ) + if(!frame.isValid()) return; - auto accFilter = filter( - acc, - superCellIdx - mapper.getGuardingSuperCells( ), - WorkerCfg< numWorkers >{ workerIdx } - ); - - memory::CtxArray< - typename FramePtr::type::ParticleType, - ParticleDomCfg - > - currentParticleCtx( + auto accFilter + = filter(acc, superCellIdx - mapper.getGuardingSuperCells(), WorkerCfg{workerIdx}); + + memory::CtxArray currentParticleCtx( workerIdx, - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - auto particle = frame[ linearIdx ]; + [&](uint32_t const linearIdx, uint32_t const) { + auto particle = frame[linearIdx]; /* - only particles from the last frame must be checked * - all other particles are always valid */ - if( particle[ multiMask_ ] != 1 ) - particle.setHandleInvalid( ); + if(particle[multiMask_] != 1) + particle.setHandleInvalid(); return particle; - } - ); + }); - while( frame.isValid( ) ) + while(frame.isValid()) { // loop over all particles in the frame - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); + ForEachIdx forEachParticle(workerIdx); - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + /* get one particle */ + auto& particle = currentParticleCtx[idx]; + if(accFilter(acc, particle)) { - /* get one particle */ - auto & particle = currentParticleCtx[ idx ]; - if( - accFilter( - acc, - particle - ) - ) - { - float3_X const mom = particle[ momentum_ ]; - // compute square of absolute momentum of the particle - float_X const mom2 = math::abs2( mom ); - float_X const weighting = particle[ weighting_ ]; - float_X const mass = attribute::getMass( - weighting, - particle - ); - float_X const c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; - - // calculate kinetic energy of the macro particle - localEnergyKin += KinEnergy<>( )( - mom, - mass - ); - - /* total energy for particles: - * E^2 = p^2*c^2 + m^2*c^4 - * = c^2 * [p^2 + m^2*c^2] - */ - localEnergy += algorithms::math::sqrt( - mom2 + - mass * mass * c2 - ) * SPEED_OF_LIGHT; - - } + float3_X const mom = particle[momentum_]; + // compute square of absolute momentum of the particle + float_X const mom2 = pmacc::math::abs2(mom); + float_X const weighting = particle[weighting_]; + float_X const mass = attribute::getMass(weighting, particle); + float_X const c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT; + + // calculate kinetic energy of the macro particle + localEnergyKin += KinEnergy<>()(mom, mass); + + /* total energy for particles: + * E^2 = p^2*c^2 + m^2*c^4 + * = c^2 * [p^2 + m^2*c^2] + */ + localEnergy += math::sqrt(mom2 + mass * mass * c2) * SPEED_OF_LIGHT; } - ); + }); // set frame to next particle frame frame = pb.getPreviousFrame(frame); - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - /* Update particle for the next round. - * The frame list is traverse from the last to the first frame. - * Only the last frame can contain gaps therefore all following - * frames are filled with fully particles. - */ - currentParticleCtx[ idx ] = frame[ linearIdx ]; - } - ); + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + /* Update particle for the next round. + * The frame list is traverse from the last to the first frame. + * Only the last frame can contain gaps therefore all following + * frames are filled with fully particles. + */ + currentParticleCtx[idx] = frame[linearIdx]; + }); } // each virtual thread adds the energies to the shared memory - atomicAdd( - &shEnergyKin, - localEnergyKin, - ::alpaka::hierarchy::Threads{} - ); - atomicAdd( - &shEnergy, - localEnergy, - ::alpaka::hierarchy::Threads{} - ); + cupla::atomicAdd(acc, &shEnergyKin, localEnergyKin, ::alpaka::hierarchy::Threads{}); + cupla::atomicAdd(acc, &shEnergy, localEnergy, ::alpaka::hierarchy::Threads{}); // wait that all virtual threads updated the shared memory energies - __syncthreads( ); + cupla::__syncthreads(acc); // add energies on global level using global memory - ForEachIdx< MasterOnly >{ workerIdx }( - [&]( - uint32_t const, - uint32_t const - ) - { - // add kinetic energy - atomicAdd( - &( gEnergy[ 0 ] ), - static_cast< float_64 >( shEnergyKin ), - ::alpaka::hierarchy::Blocks{} - ); - // add total energy - atomicAdd( - &( gEnergy[ 1 ] ), - static_cast< float_64 >( shEnergy ), - ::alpaka::hierarchy::Blocks{} - ); - } - ); + ForEachIdx{workerIdx}([&](uint32_t const, uint32_t const) { + // add kinetic energy + cupla::atomicAdd( + acc, + &(gEnergy[0]), + static_cast(shEnergyKin), + ::alpaka::hierarchy::Blocks{}); + // add total energy + cupla::atomicAdd(acc, &(gEnergy[1]), static_cast(shEnergy), ::alpaka::hierarchy::Blocks{}); + }); } }; - template< typename ParticlesType > + template class EnergyParticles : public plugins::multi::ISlave { public: - struct Help : public plugins::multi::IHelp { - /** creates an instance of ISlave * * @tparam T_Slave type of the interface implementation (must inherit from ISlave) * @param help plugin defined help * @param id index of the plugin, range: [0;help->getNumPlugins()) */ - std::shared_ptr< ISlave > create( - std::shared_ptr< IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) + std::shared_ptr create(std::shared_ptr& help, size_t const id, MappingDesc* cellDescription) { - return std::shared_ptr< ISlave >( - new EnergyParticles< ParticlesType >( - help, - id, - cellDescription - ) - ); + return std::shared_ptr(new EnergyParticles(help, id, cellDescription)); } // find all valid filter for the current used species - using EligibleFilters = typename MakeSeqFromNestedSeq< - typename bmpl::transform< - particles::filter::AllParticleFilters, - particles::traits::GenerateSolversIfSpeciesEligible< - bmpl::_1, - ParticlesType - > - >::type - >::type; + using EligibleFilters = typename MakeSeqFromNestedSeq>::type>::type; //! periodicity of computing the particle energy - plugins::multi::Option< std::string > notifyPeriod = { - "period", - "compute kinetic and total energy [for each n-th step] enable plugin by setting a non-zero value" - }; - plugins::multi::Option< std::string > filter = { - "filter", - "particle filter: " - }; + plugins::multi::Option notifyPeriod + = {"period", + "compute kinetic and total energy [for each n-th step] enable plugin by setting a non-zero value"}; + plugins::multi::Option filter = {"filter", "particle filter: "}; //! string list with all possible particle filters std::string concatenatedFilterNames; - std::vector< std::string > allowedFilters; + std::vector allowedFilters; ///! method used by plugin controller to get --help description void registerHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) { + meta::ForEach> getEligibleFilterNames; + getEligibleFilterNames(allowedFilters); - meta::ForEach< - EligibleFilters, - plugins::misc::AppendName< bmpl::_1 > - > getEligibleFilterNames; - getEligibleFilterNames( allowedFilters ); - - concatenatedFilterNames = plugins::misc::concatenateToString( - allowedFilters, - ", " - ); - - notifyPeriod.registerHelp( - desc, - masterPrefix + prefix - ); - filter.registerHelp( - desc, - masterPrefix + prefix, - std::string( "[" ) + concatenatedFilterNames + "]" - ); + concatenatedFilterNames = plugins::misc::concatenateToString(allowedFilters, ", "); + + notifyPeriod.registerHelp(desc, masterPrefix + prefix); + filter.registerHelp(desc, masterPrefix + prefix, std::string("[") + concatenatedFilterNames + "]"); } void expandHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) { } void validateOptions() { - if( notifyPeriod.size() != filter.size() ) - throw std::runtime_error( name + ": parameter filter and period are not used the same number of times" ); + if(notifyPeriod.size() != filter.size()) + throw std::runtime_error( + name + ": parameter filter and period are not used the same number of times"); // check if user passed filter name are valid - for( auto const & filterName : filter) + for(auto const& filterName : filter) { - if( - std::find( - allowedFilters.begin(), - allowedFilters.end(), - filterName - ) == allowedFilters.end() - ) + if(std::find(allowedFilters.begin(), allowedFilters.end(), filterName) == allowedFilters.end()) { - throw std::runtime_error( name + ": unknown filter '" + filterName + "'" ); + throw std::runtime_error(name + ": unknown filter '" + filterName + "'"); } } } @@ -425,204 +288,153 @@ namespace picongpu //! short description of the plugin std::string const description = "calculate the energy of a species"; //! prefix used for command line arguments - std::string const prefix = ParticlesType::FrameType::getName( ) + std::string( "_energy" ); + std::string const prefix = ParticlesType::FrameType::getName() + std::string("_energy"); }; //! must be implemented by the user - static std::shared_ptr< plugins::multi::IHelp > getHelp() + static std::shared_ptr getHelp() { - return std::shared_ptr< plugins::multi::IHelp >( new Help{ } ); + return std::shared_ptr(new Help{}); } - EnergyParticles( - std::shared_ptr< plugins::multi::IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) : - m_help( std::static_pointer_cast< Help >(help) ), - m_id( id ), - m_cellDescription( cellDescription ) + EnergyParticles(std::shared_ptr& help, size_t const id, MappingDesc* cellDescription) + : m_help(std::static_pointer_cast(help)) + , m_id(id) + , m_cellDescription(cellDescription) { - filename = m_help->getOptionPrefix() + "_" + m_help->filter.get( m_id ) + ".dat"; + filename = m_help->getOptionPrefix() + "_" + m_help->filter.get(m_id) + ".dat"; // decide which MPI-rank writes output - writeToFile = reduce.hasResult( mpi::reduceMethods::Reduce( ) ); + writeToFile = reduce.hasResult(mpi::reduceMethods::Reduce()); // create two ints on gpu and host - gEnergy = new GridBuffer< - float_64, - DIM1 - >( DataSpace< DIM1 >( 2 ) ); + gEnergy = new GridBuffer(DataSpace(2)); // only MPI rank that writes to file - if( writeToFile ) + if(writeToFile) { // open output file - outFile.open( - filename.c_str( ), - std::ofstream::out | std::ostream::trunc - ); + outFile.open(filename.c_str(), std::ofstream::out | std::ostream::trunc); // error handling - if( !outFile ) + if(!outFile) { - std::cerr << - "Can't open file [" << - filename << - "] for output, diasble plugin output. " << - std::endl; + std::cerr << "Can't open file [" << filename << "] for output, diasble plugin output. " + << std::endl; writeToFile = false; } // create header of the file - outFile << "#step Ekin_Joule E_Joule" << " \n"; + outFile << "#step Ekin_Joule E_Joule" + << " \n"; } // set how often the plugin should be executed while PIConGPU is running - Environment<>::get( ).PluginConnector( ).setNotificationPeriod( - this, - m_help->notifyPeriod.get( id ) - ); + Environment<>::get().PluginConnector().setNotificationPeriod(this, m_help->notifyPeriod.get(id)); } - virtual ~EnergyParticles( ) + virtual ~EnergyParticles() { - if( writeToFile ) + if(writeToFile) { - outFile.flush( ); + outFile.flush(); // flush cached data to file outFile << std::endl; - if( outFile.fail( ) ) + if(outFile.fail()) std::cerr << "Error on flushing file [" << filename << "]. " << std::endl; - outFile.close( ); + outFile.close(); } // free global memory on GPU - __delete( gEnergy ); + __delete(gEnergy); } /** this code is executed if the current time step is supposed to compute * the energy */ - void notify( uint32_t currentStep ) + void notify(uint32_t currentStep) { // call the method that calls the plugin kernel - calculateEnergyParticles < CORE + BORDER > ( currentStep ); + calculateEnergyParticles(currentStep); } - void restart( - uint32_t restartStep, - std::string const & restartDirectory - ) + void restart(uint32_t restartStep, std::string const& restartDirectory) { - if( !writeToFile ) + if(!writeToFile) return; - writeToFile = restoreTxtFile( - outFile, - filename, - restartStep, - restartDirectory - ); + writeToFile = restoreTxtFile(outFile, filename, restartStep, restartDirectory); } - void checkpoint( - uint32_t currentStep, - std::string const & checkpointDirectory - ) + void checkpoint(uint32_t currentStep, std::string const& checkpointDirectory) { - if( !writeToFile ) + if(!writeToFile) return; - checkpointTxtFile( - outFile, - filename, - currentStep, - checkpointDirectory - ); + checkpointTxtFile(outFile, filename, currentStep, checkpointDirectory); } + private: //! method to call analysis and plugin-kernel calls - template< uint32_t AREA > - void calculateEnergyParticles( uint32_t currentStep ) + template + void calculateEnergyParticles(uint32_t currentStep) { - DataConnector &dc = Environment<>::get( ).DataConnector( ); + DataConnector& dc = Environment<>::get().DataConnector(); // use data connector to get particle data - auto particles = dc.get< ParticlesType >( - ParticlesType::FrameType::getName( ), - true - ); + auto particles = dc.get(ParticlesType::FrameType::getName(), true); // initialize global energies with zero - gEnergy->getDeviceBuffer( ).setValue( 0.0 ); + gEnergy->getDeviceBuffer().setValue(0.0); - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; - AreaMapping< - AREA, - MappingDesc - > mapper( *m_cellDescription ); + AreaMapping mapper(*m_cellDescription); - auto kernel = PMACC_KERNEL( KernelEnergyParticles< numWorkers >{ } )( - mapper.getGridDim( ), - numWorkers - ); + auto kernel = PMACC_KERNEL(KernelEnergyParticles{})(mapper.getGridDim(), numWorkers); auto binaryKernel = std::bind( kernel, - particles->getDeviceParticlesBox( ), - gEnergy->getDeviceBuffer( ).getDataBox( ), + particles->getDeviceParticlesBox(), + gEnergy->getDeviceBuffer().getDataBox(), mapper, - std::placeholders::_1 - ); - - meta::ForEach< - typename Help::EligibleFilters, - plugins::misc::ExecuteIfNameIsEqual< bmpl::_1 > - >{ }( - m_help->filter.get( m_id ), + std::placeholders::_1); + + meta::ForEach>{}( + m_help->filter.get(m_id), currentStep, - binaryKernel - ); + binaryKernel); - dc.releaseData( ParticlesType::FrameType::getName( ) ); + dc.releaseData(ParticlesType::FrameType::getName()); // get energy from GPU - gEnergy->deviceToHost( ); + gEnergy->deviceToHost(); // create storage for the global reduced result float_64 reducedEnergy[2]; // add energies from all GPUs using MPI reduce( - nvidia::functors::Add( ), + nvidia::functors::Add(), reducedEnergy, - gEnergy->getHostBuffer( ).getBasePointer( ), + gEnergy->getHostBuffer().getBasePointer(), 2, - mpi::reduceMethods::Reduce( ) - ); + mpi::reduceMethods::Reduce()); /* print timestep, kinetic energy and total energy to file: */ - if( writeToFile ) + if(writeToFile) { - using dbl = std::numeric_limits< float_64 >; + using dbl = std::numeric_limits; - outFile.precision( dbl::digits10 ); - outFile << currentStep << " " - << std::scientific - << reducedEnergy[ 0 ] * UNIT_ENERGY << " " - << reducedEnergy[ 1 ] * UNIT_ENERGY << std::endl; + outFile.precision(dbl::digits10); + outFile << currentStep << " " << std::scientific << reducedEnergy[0] * UNIT_ENERGY << " " + << reducedEnergy[1] * UNIT_ENERGY << std::endl; } } //! energy values (global on GPU) - GridBuffer< - float_64, - DIM1 - > * gEnergy = nullptr; + GridBuffer* gEnergy = nullptr; MappingDesc* m_cellDescription; @@ -641,47 +453,30 @@ namespace picongpu //! MPI reduce to add all energies over several GPUs mpi::MPIReduce reduce; - std::shared_ptr< Help > m_help; + std::shared_ptr m_help; size_t m_id; }; -namespace particles -{ -namespace traits -{ - template< - typename T_Species, - typename T_UnspecifiedSpecies - > - struct SpeciesEligibleForSolver< - T_Species, - EnergyParticles< T_UnspecifiedSpecies > - > + namespace particles { - using FrameType = typename T_Species::FrameType; - - // this plugin needs at least the weighting and momentum attributes - using RequiredIdentifiers = MakeSeq_t< - weighting, - momentum - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - // and also a mass ratio for energy calculation from momentum - using SpeciesHasFlags = typename pmacc::traits::HasFlag< - FrameType, - massRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasFlags - >; - }; -} // namespace traits -} // namespace particles + namespace traits + { + template + struct SpeciesEligibleForSolver> + { + using FrameType = typename T_Species::FrameType; + + // this plugin needs at least the weighting and momentum attributes + using RequiredIdentifiers = MakeSeq_t; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + // and also a mass ratio for energy calculation from momentum + using SpeciesHasFlags = typename pmacc::traits::HasFlag>::type; + + using type = typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/plugins/ILightweightPlugin.hpp b/include/picongpu/plugins/ILightweightPlugin.hpp index 1922621d71..4185209c74 100644 --- a/include/picongpu/plugins/ILightweightPlugin.hpp +++ b/include/picongpu/plugins/ILightweightPlugin.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Felix Schmitt +/* Copyright 2014-2021 Felix Schmitt * * This file is part of PIConGPU. * @@ -42,7 +42,6 @@ namespace picongpu virtual ~ILightweightPlugin() { - } }; -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/plugins/ISimulationPlugin.hpp b/include/picongpu/plugins/ISimulationPlugin.hpp index 118b90de14..fc9d1a47e5 100644 --- a/include/picongpu/plugins/ISimulationPlugin.hpp +++ b/include/picongpu/plugins/ISimulationPlugin.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once #include "picongpu/simulation_defines.hpp" @@ -35,11 +34,10 @@ namespace picongpu class ISimulationPlugin : public IPlugin { public: - virtual void setMappingDescription(MappingDesc *cellDescription) = 0; + virtual void setMappingDescription(MappingDesc* cellDescription) = 0; virtual ~ISimulationPlugin() { } }; -} - +} // namespace picongpu diff --git a/include/picongpu/plugins/IntensityPlugin.hpp b/include/picongpu/plugins/IntensityPlugin.hpp index 71aa9ad84c..e28b14c7fa 100644 --- a/include/picongpu/plugins/IntensityPlugin.hpp +++ b/include/picongpu/plugins/IntensityPlugin.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Benjamin Worpitz, Richard Pausch * * This file is part of PIConGPU. @@ -43,364 +43,365 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -/* count particles in an area - * is not optimized, it checks any particle position if it is really a particle - */ -struct KernelIntensity -{ - template< - typename FieldBox, - typename BoxMax, - typename BoxIntegral, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - FieldBox field, - DataSpace cellsCount, - BoxMax boxMax, - BoxIntegral integralBox - ) const + /* count particles in an area + * is not optimized, it checks any particle position if it is really a particle + */ + struct KernelIntensity { - - typedef MappingDesc::SuperCellSize SuperCellSize; - PMACC_SMEM( acc, s_integrated, memory::Array< float_X,SuperCellSize::y::value > ); - PMACC_SMEM( acc, s_max, memory::Array< float_X, SuperCellSize::y::value > ); + template + DINLINE void operator()( + T_Acc const& acc, + FieldBox field, + DataSpace cellsCount, + BoxMax boxMax, + BoxIntegral integralBox) const + { + typedef MappingDesc::SuperCellSize SuperCellSize; + PMACC_SMEM(acc, s_integrated, memory::Array); + PMACC_SMEM(acc, s_max, memory::Array); - /*descripe size of a worker block for cached memory*/ - typedef SuperCellDescription< - pmacc::math::CT::Int - > SuperCell2D; + /*descripe size of a worker block for cached memory*/ + typedef SuperCellDescription> + SuperCell2D; - auto s_field = CachedBox::create< - 0, - float_32 - >( - acc, - SuperCell2D() - ); + auto s_field = CachedBox::create<0, float_32>(acc, SuperCell2D()); - int y = blockIdx.y * SuperCellSize::y::value + threadIdx.y; - int yGlobal = y + GuardSize::y::value * SuperCellSize::y::value; - const DataSpace threadId(threadIdx); + int y = cupla::blockIdx(acc).y * SuperCellSize::y::value + cupla::threadIdx(acc).y; + int yGlobal = y + GuardSize::y::value * SuperCellSize::y::value; + const DataSpace threadId(cupla::threadIdx(acc)); - if (threadId.x() == 0) - { - // clear destination arrays - s_integrated[threadId.y()] = float_X(0.0); - s_max[threadId.y()] = float_X(0.0); - } - __syncthreads(); + if(threadId.x() == 0) + { + // clear destination arrays + s_integrated[threadId.y()] = float_X(0.0); + s_max[threadId.y()] = float_X(0.0); + } + cupla::__syncthreads(acc); - // move cell-wise over z direction (without guarding cells) - for (int z = GuardSize::z::value * SuperCellSize::z::value; z < cellsCount.z() - GuardSize::z::value * SuperCellSize::z::value; ++z) - { - // move supercell-wise over x direction without guarding - for (int x = GuardSize::x::value * SuperCellSize::x::value + threadId.x(); x < cellsCount.x() - GuardSize::x::value * SuperCellSize::x::value; x += SuperCellSize::x::value) + // move cell-wise over z direction (without guarding cells) + for(int z = GuardSize::z::value * SuperCellSize::z::value; + z < cellsCount.z() - GuardSize::z::value * SuperCellSize::z::value; + ++z) { - const float3_X field_at_point(field(DataSpace (x, yGlobal, z))); - s_field(threadId) = math::abs2(field_at_point); - __syncthreads(); - if (threadId.x() == 0) + // move supercell-wise over x direction without guarding + for(int x = GuardSize::x::value * SuperCellSize::x::value + threadId.x(); + x < cellsCount.x() - GuardSize::x::value * SuperCellSize::x::value; + x += SuperCellSize::x::value) { - // master thread moves cell-wise over 2D supercell - for (int x_local = 0; x_local < SuperCellSize::x::value; ++x_local) + const float3_X field_at_point(field(DataSpace(x, yGlobal, z))); + s_field(threadId) = pmacc::math::abs2(field_at_point); + cupla::__syncthreads(acc); + if(threadId.x() == 0) { - DataSpace localId(x_local, threadId.y()); - s_integrated[threadId.y()] += s_field(localId); - s_max[threadId.y()] = fmaxf(s_max[threadId.y()], s_field(localId)); - + // master thread moves cell-wise over 2D supercell + for(int x_local = 0; x_local < SuperCellSize::x::value; ++x_local) + { + DataSpace localId(x_local, threadId.y()); + s_integrated[threadId.y()] += s_field(localId); + s_max[threadId.y()] = fmaxf(s_max[threadId.y()], s_field(localId)); + } } } } - } - __syncthreads(); + cupla::__syncthreads(acc); - if (threadId.x() == 0) - { - /*copy result to global array*/ - integralBox[y] = s_integrated[threadId.y()]; - boxMax[y] = s_max[threadId.y()]; + if(threadId.x() == 0) + { + /*copy result to global array*/ + integralBox[y] = s_integrated[threadId.y()]; + boxMax[y] = s_max[threadId.y()]; + } } + }; - - } -}; - -class IntensityPlugin : public ILightweightPlugin -{ -private: - typedef MappingDesc::SuperCellSize SuperCellSize; - - - GridBuffer *localMaxIntensity; - GridBuffer *localIntegratedIntensity; - MappingDesc *cellDescription; - std::string notifyPeriod; - - std::string pluginName; - std::string pluginPrefix; - - std::ofstream outFileMax; - std::ofstream outFileIntegrated; - /*only rank 0 create a file*/ - bool writeToFile; -public: - - /*! Calculate the max und integrated E-Field energy over laser propagation direction (in our case Y) - * max is only the SI value of the amplitude (V/m) - * integrated is the integral of amplidude of X and Z on Y position (is V/m in cell volume) - */ - IntensityPlugin() : - pluginName("IntensityPlugin: calculate the maximum and integrated E-Field energy\nover laser propagation direction"), - pluginPrefix(FieldE::getName() + std::string("_intensity")), - localMaxIntensity(nullptr), - localIntegratedIntensity(nullptr), - cellDescription(nullptr), - writeToFile(false) + class IntensityPlugin : public ILightweightPlugin { - Environment<>::get().PluginConnector().registerPlugin(this); - } + private: + typedef MappingDesc::SuperCellSize SuperCellSize; - virtual ~IntensityPlugin() - { - } + GridBuffer* localMaxIntensity; + GridBuffer* localIntegratedIntensity; + MappingDesc* cellDescription; + std::string notifyPeriod; + + std::string pluginName; + std::string pluginPrefix; + + std::ofstream outFileMax; + std::ofstream outFileIntegrated; + /*only rank 0 create a file*/ + bool writeToFile; + + public: + /*! Calculate the max und integrated E-Field energy over laser propagation direction (in our case Y) + * max is only the SI value of the amplitude (V/m) + * integrated is the integral of amplidude of X and Z on Y position (is V/m in cell volume) + */ + IntensityPlugin() + : pluginName("IntensityPlugin: calculate the maximum and integrated E-Field energy\nover laser " + "propagation direction") + , pluginPrefix(FieldE::getName() + std::string("_intensity")) + , localMaxIntensity(nullptr) + , localIntegratedIntensity(nullptr) + , cellDescription(nullptr) + , writeToFile(false) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } - void notify(uint32_t currentStep) - { - calcIntensity(currentStep); - combineData(currentStep); - } + virtual ~IntensityPlugin() + { + } - void pluginRegisterHelp(po::options_description& desc) - { - desc.add_options() - ((pluginPrefix + ".period").c_str(), - po::value (¬ifyPeriod), "enable plugin [for each n-th step]"); - } + void notify(uint32_t currentStep) + { + calcIntensity(currentStep); + combineData(currentStep); + } - std::string pluginGetName() const - { - return pluginName; - } + void pluginRegisterHelp(po::options_description& desc) + { + desc.add_options()( + (pluginPrefix + ".period").c_str(), + po::value(¬ifyPeriod), + "enable plugin [for each n-th step]"); + } - void setMappingDescription(MappingDesc *cellDescription) - { - this->cellDescription = cellDescription; - } + std::string pluginGetName() const + { + return pluginName; + } -private: + void setMappingDescription(MappingDesc* cellDescription) + { + this->cellDescription = cellDescription; + } - void pluginLoad() - { - if(!notifyPeriod.empty()) + private: + void pluginLoad() { - writeToFile = Environment::get().GridController().getGlobalRank() == 0; - int yCells = cellDescription->getGridLayout().getDataSpaceWithoutGuarding().y(); + if(!notifyPeriod.empty()) + { + writeToFile = Environment::get().GridController().getGlobalRank() == 0; + int yCells = cellDescription->getGridLayout().getDataSpaceWithoutGuarding().y(); - localMaxIntensity = new GridBuffer (DataSpace (yCells)); //create one int on gpu und host - localIntegratedIntensity = new GridBuffer (DataSpace (yCells)); //create one int on gpu und host + localMaxIntensity + = new GridBuffer(DataSpace(yCells)); // create one int on gpu und host + localIntegratedIntensity + = new GridBuffer(DataSpace(yCells)); // create one int on gpu und host - if (writeToFile) - { - createFile(pluginPrefix + "_max.dat", outFileMax); - createFile(pluginPrefix + "_integrated.dat", outFileIntegrated); - } + if(writeToFile) + { + createFile(pluginPrefix + "_max.dat", outFileMax); + createFile(pluginPrefix + "_integrated.dat", outFileIntegrated); + } - Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + } } - } - void pluginUnload() - { - if(!notifyPeriod.empty()) + void pluginUnload() { - if (writeToFile) + if(!notifyPeriod.empty()) { - flushAndCloseFile(outFileIntegrated); - flushAndCloseFile(outFileMax); + if(writeToFile) + { + flushAndCloseFile(outFileIntegrated); + flushAndCloseFile(outFileMax); + } + __delete(localMaxIntensity); + __delete(localIntegratedIntensity); } - __delete(localMaxIntensity); - __delete(localIntegratedIntensity); } - } - -private: - - /* reduce data from all gpus to one array - * @param currentStep simulation step - */ - void combineData(uint32_t currentStep) - { - - const DataSpace localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); - Window window(MovingWindow::getInstance().getWindow( currentStep)); - const SubGrid& subGrid = Environment::get().SubGrid(); - - const int yGlobalSize = subGrid.getGlobalDomain().size.y(); - const int yLocalSize = localSize.y(); - - const int gpus = Environment::get().GridController().getGpuNodes().productOfComponents(); - - - /**\todo: fixme I cant work with not regular domains (use mpi_gatherv)*/ - DataSpace globalRootCell(subGrid.getLocalDomain().offset); - int yOffset = globalRootCell.y(); - int* yOffsetsAll = new int[gpus]; - float_32* maxAll = new float_32[yGlobalSize]; - float_32* maxAllTmp = new float_32[yLocalSize * gpus]; - memset(maxAll, 0, sizeof (float_32) *yGlobalSize); - float_32* integretedAll = new float_32[yGlobalSize]; - float_32* integretedAllTmp = new float_32[yLocalSize * gpus]; - memset(integretedAll, 0, sizeof (float_32) *yGlobalSize); - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Gather(&yOffset, 1, MPI_INT, yOffsetsAll, 1, - MPI_INT, 0, MPI_COMM_WORLD)); - - MPI_CHECK(MPI_Gather(localMaxIntensity->getHostBuffer().getBasePointer(), yLocalSize, MPI_FLOAT, - maxAllTmp, yLocalSize, MPI_FLOAT, - 0, MPI_COMM_WORLD)); - MPI_CHECK(MPI_Gather(localIntegratedIntensity->getHostBuffer().getBasePointer(), yLocalSize, MPI_FLOAT, - integretedAllTmp, yLocalSize, MPI_FLOAT, - 0, MPI_COMM_WORLD)); - - if (writeToFile) + private: + /* reduce data from all gpus to one array + * @param currentStep simulation step + */ + void combineData(uint32_t currentStep) { - for (int i = 0; i < gpus; ++i) + const DataSpace localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); + Window window(MovingWindow::getInstance().getWindow(currentStep)); + + const SubGrid& subGrid = Environment::get().SubGrid(); + + const int yGlobalSize = subGrid.getGlobalDomain().size.y(); + const int yLocalSize = localSize.y(); + + const int gpus = Environment::get().GridController().getGpuNodes().productOfComponents(); + + + /**\todo: fixme I cant work with not regular domains (use mpi_gatherv)*/ + DataSpace globalRootCell(subGrid.getLocalDomain().offset); + int yOffset = globalRootCell.y(); + int* yOffsetsAll = new int[gpus]; + float_32* maxAll = new float_32[yGlobalSize]; + float_32* maxAllTmp = new float_32[yLocalSize * gpus]; + memset(maxAll, 0, sizeof(float_32) * yGlobalSize); + float_32* integretedAll = new float_32[yGlobalSize]; + float_32* integretedAllTmp = new float_32[yLocalSize * gpus]; + memset(integretedAll, 0, sizeof(float_32) * yGlobalSize); + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Gather(&yOffset, 1, MPI_INT, yOffsetsAll, 1, MPI_INT, 0, MPI_COMM_WORLD)); + + MPI_CHECK(MPI_Gather( + localMaxIntensity->getHostBuffer().getBasePointer(), + yLocalSize, + MPI_FLOAT, + maxAllTmp, + yLocalSize, + MPI_FLOAT, + 0, + MPI_COMM_WORLD)); + MPI_CHECK(MPI_Gather( + localIntegratedIntensity->getHostBuffer().getBasePointer(), + yLocalSize, + MPI_FLOAT, + integretedAllTmp, + yLocalSize, + MPI_FLOAT, + 0, + MPI_COMM_WORLD)); + + if(writeToFile) { - int gOffset = yOffsetsAll[i]; - int tmpOff = yLocalSize*i; - for (int y = 0; y < yLocalSize; ++y) + for(int i = 0; i < gpus; ++i) { - maxAll[gOffset + y] = std::max(maxAllTmp[tmpOff + y], maxAll[gOffset + y]); - integretedAll[gOffset + y] += integretedAllTmp[tmpOff + y]; + int gOffset = yOffsetsAll[i]; + int tmpOff = yLocalSize * i; + for(int y = 0; y < yLocalSize; ++y) + { + maxAll[gOffset + y] = std::max(maxAllTmp[tmpOff + y], maxAll[gOffset + y]); + integretedAll[gOffset + y] += integretedAllTmp[tmpOff + y]; + } } + + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + size_t physicelYCellOffset = numSlides * yLocalSize + window.globalDimensions.offset.y(); + writeFile( + currentStep, + maxAll + window.globalDimensions.offset.y(), + window.globalDimensions.size.y(), + physicelYCellOffset, + outFileMax, + UNIT_EFIELD); + + float_64 unit = UNIT_EFIELD * CELL_VOLUME * SI::EPS0_SI; + for(uint32_t i = 0; i < simDim; ++i) + unit *= UNIT_LENGTH; + + writeFile( + currentStep, + integretedAll + window.globalDimensions.offset.y(), + window.globalDimensions.size.y(), + physicelYCellOffset, + outFileIntegrated, + unit); } - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); - size_t physicelYCellOffset = numSlides * yLocalSize + window.globalDimensions.offset.y(); - writeFile(currentStep, - maxAll + window.globalDimensions.offset.y(), - window.globalDimensions.size.y(), - physicelYCellOffset, - outFileMax, - UNIT_EFIELD - ); - - float_64 unit=UNIT_EFIELD*CELL_VOLUME*SI::EPS0_SI; - for(uint32_t i=0;i::get().DataConnector(); - - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); + /* run calculation of intensity + * sync all result data to host side + * + * @param currenstep simulation step + */ + void calcIntensity(uint32_t) + { + DataConnector& dc = Environment<>::get().DataConnector(); - /*start only worker for any supercell in laser propagation direction*/ - DataSpace grid(1,cellDescription->getGridSuperCells().y() - cellDescription->getGuardingSuperCells().y()); - /*use only 2D slice XY for supercell handling*/ - typedef typename MappingDesc::SuperCellSize SuperCellSize; - auto block = pmacc::math::CT::Vector::toRT(); + auto fieldE = dc.get(FieldE::getName(), true); - PMACC_KERNEL(KernelIntensity{}) - (grid, block) - ( - fieldE->getDeviceDataBox(), - fieldE->getGridLayout().getDataSpace(), - localMaxIntensity->getDeviceBuffer().getDataBox(), - localIntegratedIntensity->getDeviceBuffer().getDataBox() - ); + /*start only worker for any supercell in laser propagation direction*/ + DataSpace grid( + 1, + cellDescription->getGridSuperCells().y() - cellDescription->getGuardingSuperCells().y()); + /*use only 2D slice XY for supercell handling*/ + typedef typename MappingDesc::SuperCellSize SuperCellSize; + auto block = pmacc::math::CT::Vector::toRT(); - dc.releaseData( FieldE::getName() ); + PMACC_KERNEL(KernelIntensity{}) + (grid, block)( + fieldE->getDeviceDataBox(), + fieldE->getGridLayout().getDataSpace(), + localMaxIntensity->getDeviceBuffer().getDataBox(), + localIntegratedIntensity->getDeviceBuffer().getDataBox()); - localMaxIntensity->deviceToHost(); - localIntegratedIntensity->deviceToHost(); + dc.releaseData(FieldE::getName()); - } + localMaxIntensity->deviceToHost(); + localIntegratedIntensity->deviceToHost(); + } - /*create a file with given filename - * @param filename name of the output file - * @param stream ref on a stream object - */ - void createFile(std::string filename, std::ofstream& stream) - { - stream.open(filename.c_str(), std::ofstream::out | std::ostream::trunc); - if (!stream) + /*create a file with given filename + * @param filename name of the output file + * @param stream ref on a stream object + */ + void createFile(std::string filename, std::ofstream& stream) { - std::cerr << "Can't open file [" << filename << "] for output, diasble plugin output. " << std::endl; - writeToFile = false; + stream.open(filename.c_str(), std::ofstream::out | std::ostream::trunc); + if(!stream) + { + std::cerr << "Can't open file [" << filename << "] for output, diasble plugin output. " << std::endl; + writeToFile = false; + } + stream << "#step position_in_laser_propagation_direction" << std::endl; + stream << "#step amplitude_data[*]" << std::endl; } - stream << "#step position_in_laser_propagation_direction" << std::endl; - stream << "#step amplitude_data[*]" << std::endl; - } - /* close and flash a file stream object - * @param stream stream which must closed - */ - void flushAndCloseFile(std::ofstream& stream) - { - stream.flush(); - stream << std::endl; //now all data are written to file - if (stream.fail()) - std::cerr << "Error on flushing file in IntensityPlugin. " << std::endl; - stream.close(); - } - -}; - -} + /* close and flash a file stream object + * @param stream stream which must closed + */ + void flushAndCloseFile(std::ofstream& stream) + { + stream.flush(); + stream << std::endl; // now all data are written to file + if(stream.fail()) + std::cerr << "Error on flushing file in IntensityPlugin. " << std::endl; + stream.close(); + } + }; +} // namespace picongpu diff --git a/include/picongpu/plugins/IsaacPlugin.hpp b/include/picongpu/plugins/IsaacPlugin.hpp index ee4dce3617..88bf4f9ded 100644 --- a/include/picongpu/plugins/IsaacPlugin.hpp +++ b/include/picongpu/plugins/IsaacPlugin.hpp @@ -1,26 +1,26 @@ /* -* Copyright 2013-2020 Alexander Matthes, -* -* This file is part of PIConGPU. -* -* PIConGPU is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, either version 3 of the License, or -* (at your option) any later version. -* -* PIConGPU is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with PIConGPU. -* If not, see . -*/ + * Copyright 2013-2021 Alexander Matthes, + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ #pragma once -//Needs to be the very first +// Needs to be the very first #include #include "picongpu/plugins/ILightweightPlugin.hpp" @@ -41,587 +41,636 @@ namespace picongpu { -namespace isaacP -{ - - -using namespace pmacc; -using namespace ::isaac; + namespace isaacP + { + using namespace pmacc; + using namespace ::isaac; -ISAAC_NO_HOST_DEVICE_WARNING -template < typename FieldType > -class TFieldSource -{ - public: - static const size_t feature_dim = 3; - static const bool has_guard = bmpl::not_ >::value; - static const bool persistent = bmpl::not_ >::value; - typename FieldType::DataBoxType shifted; - MappingDesc *cellDescription; - bool movingWindow; - TFieldSource() : cellDescription(nullptr), movingWindow(false) {} - - void init(MappingDesc *cellDescription, bool movingWindow) + ISAAC_NO_HOST_DEVICE_WARNING + template + class TFieldSource { - this->cellDescription = cellDescription; - this->movingWindow = movingWindow; - } + public: + static const size_t feature_dim = 3; + static const bool has_guard = bmpl::not_>::value; + static const bool persistent = bmpl::not_>::value; + typename FieldType::DataBoxType shifted; + MappingDesc* cellDescription; + bool movingWindow; + TFieldSource() : cellDescription(nullptr), movingWindow(false) + { + } - static std::string getName() - { - return FieldType::getName() + std::string(" field"); - } + void init(MappingDesc* cellDescription, bool movingWindow) + { + this->cellDescription = cellDescription; + this->movingWindow = movingWindow; + } - void update(bool enabled, void* pointer) - { - if(enabled) + static std::string getName() { - const SubGrid& subGrid = Environment< simDim >::get().SubGrid(); - DataConnector &dc = Environment< simDim >::get().DataConnector(); - auto pField = dc.get< FieldType >( FieldType::getName(), true ); - DataSpace< simDim > guarding = SuperCellSize::toRT() * cellDescription->getGuardingSuperCells(); - if (movingWindow) + return FieldType::getName() + std::string(" field"); + } + + void update(bool enabled, void* pointer) + { + if(enabled) { - GridController &gc = Environment::get().GridController(); - if (gc.getPosition()[1] == 0) //first gpu + const SubGrid& subGrid = Environment::get().SubGrid(); + DataConnector& dc = Environment::get().DataConnector(); + auto pField = dc.get(FieldType::getName(), true); + DataSpace guarding = SuperCellSize::toRT() * cellDescription->getGuardingSuperCells(); + if(movingWindow) { - uint32_t* currentStep = (uint32_t*)pointer; - Window window( MovingWindow::getInstance().getWindow( *currentStep ) ); - guarding += subGrid.getLocalDomain().size - window.localDimensions.size; + GridController& gc = Environment::get().GridController(); + if(gc.getPosition()[1] == 0) // first gpu + { + uint32_t* currentStep = (uint32_t*) pointer; + Window window(MovingWindow::getInstance().getWindow(*currentStep)); + guarding += subGrid.getLocalDomain().size - window.localDimensions.size; + } } + typename FieldType::DataBoxType dataBox = pField->getDeviceDataBox(); + shifted = dataBox.shift(guarding); + dc.releaseData(FieldType::getName()); + /* avoid deadlock between not finished pmacc tasks and potential blocking operations + * within ISAAC + */ + __getTransactionEvent().waitForFinished(); } - typename FieldType::DataBoxType dataBox = pField->getDeviceDataBox(); - shifted = dataBox.shift( guarding ); - dc.releaseData( FieldType::getName() ); - /* avoid deadlock between not finished pmacc tasks and potential blocking operations - * within ISAAC - */ - __getTransactionEvent().waitForFinished(); } - } + ISAAC_NO_HOST_DEVICE_WARNING + ISAAC_HOST_DEVICE_INLINE isaac_float_dim operator[](const isaac_int3& nIndex) const + { + auto value = shifted[nIndex.z][nIndex.y][nIndex.x]; + return isaac_float_dim(value.x(), value.y(), value.z()); + } + }; ISAAC_NO_HOST_DEVICE_WARNING - ISAAC_HOST_DEVICE_INLINE isaac_float_dim< feature_dim > operator[] (const isaac_int3& nIndex) const + template + class TFieldSource> { - auto value = shifted[nIndex.z][nIndex.y][nIndex.x]; - isaac_float_dim< feature_dim > result = + public: + static const size_t feature_dim = 1; + static const bool has_guard = false; + static const bool persistent = false; + typename FieldTmp::DataBoxType shifted; + MappingDesc* cellDescription; + bool movingWindow; + + TFieldSource() : cellDescription(nullptr), movingWindow(false) { - isaac_float( value.x() ), - isaac_float( value.y() ), - isaac_float( value.z() ) - }; - return result; - } -}; - -ISAAC_NO_HOST_DEVICE_WARNING -template< typename FrameSolver, typename ParticleType > -class TFieldSource< FieldTmpOperation< FrameSolver, ParticleType > > -{ - public: - static const size_t feature_dim = 1; - static const bool has_guard = false; - static const bool persistent = false; - typename FieldTmp::DataBoxType shifted; - MappingDesc *cellDescription; - bool movingWindow; - - TFieldSource() : cellDescription(nullptr), movingWindow(false) {} + } - void init(MappingDesc *cellDescription, bool movingWindow) - { - this->cellDescription = cellDescription; - this->movingWindow = movingWindow; - } + void init(MappingDesc* cellDescription, bool movingWindow) + { + this->cellDescription = cellDescription; + this->movingWindow = movingWindow; + } - static std::string getName() - { - return ParticleType::FrameType::getName() + std::string(" ") + FrameSolver().getName(); - } + static std::string getName() + { + return ParticleType::FrameType::getName() + std::string(" ") + FrameSolver().getName(); + } - void update(bool enabled, void* pointer) - { - if (enabled) + void update(bool enabled, void* pointer) { - uint32_t* currentStep = (uint32_t*)pointer; - const SubGrid& subGrid = Environment< simDim >::get().SubGrid(); - DataConnector &dc = Environment< simDim >::get().DataConnector(); + if(enabled) + { + uint32_t* currentStep = (uint32_t*) pointer; + const SubGrid& subGrid = Environment::get().SubGrid(); + DataConnector& dc = Environment::get().DataConnector(); - PMACC_CASSERT_MSG( - _please_allocate_at_least_one_FieldTmp_in_memory_param, - fieldTmpNumSlots > 0 - ); - auto fieldTmp = dc.get< FieldTmp >( FieldTmp::getUniqueId( 0 ), true ); - auto particles = dc.get< ParticleType >( ParticleType::FrameType::getName(), true ); + PMACC_CASSERT_MSG(_please_allocate_at_least_one_FieldTmp_in_memory_param, fieldTmpNumSlots > 0); + auto fieldTmp = dc.get(FieldTmp::getUniqueId(0), true); + auto particles = dc.get(ParticleType::FrameType::getName(), true); - fieldTmp->getGridBuffer().getDeviceBuffer().setValue( FieldTmp::ValueType(0.0) ); - fieldTmp->template computeValue < CORE + BORDER, FrameSolver > (*particles, *currentStep); - EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); + fieldTmp->getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType(0.0)); + fieldTmp->template computeValue(*particles, *currentStep); + EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); - __setTransactionEvent(fieldTmpEvent); - __getTransactionEvent().waitForFinished(); + __setTransactionEvent(fieldTmpEvent); + __getTransactionEvent().waitForFinished(); - dc.releaseData( ParticleType::FrameType::getName() ); + dc.releaseData(ParticleType::FrameType::getName()); - DataSpace< simDim > guarding = SuperCellSize::toRT() * cellDescription->getGuardingSuperCells(); - if (movingWindow) - { - GridController &gc = Environment::get().GridController(); - if (gc.getPosition()[1] == 0) //first gpu + DataSpace guarding = SuperCellSize::toRT() * cellDescription->getGuardingSuperCells(); + if(movingWindow) { - Window window(MovingWindow::getInstance().getWindow( *currentStep )); - guarding += subGrid.getLocalDomain().size - window.localDimensions.size; + GridController& gc = Environment::get().GridController(); + if(gc.getPosition()[1] == 0) // first gpu + { + Window window(MovingWindow::getInstance().getWindow(*currentStep)); + guarding += subGrid.getLocalDomain().size - window.localDimensions.size; + } } + typename FieldTmp::DataBoxType dataBox = fieldTmp->getDeviceDataBox(); + shifted = dataBox.shift(guarding); + dc.releaseData(FieldTmp::getUniqueId(0)); } - typename FieldTmp::DataBoxType dataBox = fieldTmp->getDeviceDataBox(); - shifted = dataBox.shift( guarding ); - dc.releaseData( FieldTmp::getUniqueId( 0 ) ); } - } - - ISAAC_NO_HOST_DEVICE_WARNING - ISAAC_HOST_DEVICE_INLINE isaac_float_dim< feature_dim > operator[] (const isaac_int3& nIndex) const - { - auto value = shifted[nIndex.z][nIndex.y][nIndex.x]; - isaac_float_dim< feature_dim > result = { isaac_float( value.x() ) }; - return result; - } -}; + ISAAC_NO_HOST_DEVICE_WARNING + ISAAC_HOST_DEVICE_INLINE isaac_float_dim operator[](const isaac_int3& nIndex) const + { + auto value = shifted[nIndex.z][nIndex.y][nIndex.x]; + return isaac_float_dim(value.x()); + } + }; -template -class ParticleIterator -{ - public: - using FramePtr = typename ParticlesBoxType::FramePtr; - // size of the particle list - size_t size; - ISAAC_NO_HOST_DEVICE_WARNING - ISAAC_HOST_DEVICE_INLINE ParticleIterator(size_t size, ParticlesBoxType pb, FramePtr firstFrame, int frameSize) : - size(size), - pb(pb), - frame(firstFrame), - frameSize(frameSize), - i(0) - {} - - ISAAC_HOST_DEVICE_INLINE void next() + template + class ParticleIterator { - // iterate particles look for next frame - i++; - if(i >= frameSize) + public: + using FramePtr = typename ParticlesBoxType::FramePtr; + // size of the particle list + size_t size; + + ISAAC_NO_HOST_DEVICE_WARNING + ISAAC_HOST_DEVICE_INLINE ParticleIterator( + size_t size, + ParticlesBoxType pb, + FramePtr firstFrame, + int frameSize) + : size(size) + , pb(pb) + , frame(firstFrame) + , frameSize(frameSize) + , i(0) { - frame = pb.getNextFrame(frame); - i = 0; } - } - - // returns current particle position - ISAAC_HOST_DEVICE_INLINE isaac_float3 getPosition() const - { - auto const particle = frame[ i ]; - - // storage number in the actual frame - const auto frameCellNr = particle[ localCellIdx_]; - // offset in the actual superCell = cell offset in the supercell - const DataSpace frameCellOffset(DataSpaceOperations::template map (frameCellNr)); - - // added offsets - float3_X const absoluteOffset(particle[ position_ ] + float3_X(frameCellOffset)); + ISAAC_HOST_DEVICE_INLINE void next() + { + // iterate particles look for next frame + ++i; + if(i >= frameSize) + { + frame = pb.getNextFrame(frame); + i = 0; + } + } - // calculate scaled position - float3_X const pos( - absoluteOffset.x() * (1._X / float_X(MappingDesc::SuperCellSize::x::value)), - absoluteOffset.y() * (1._X / float_X(MappingDesc::SuperCellSize::y::value)), - absoluteOffset.z() * (1._X / float_X(MappingDesc::SuperCellSize::z::value)) + // returns current particle position + ISAAC_HOST_DEVICE_INLINE isaac_float3 getPosition() const + { + auto const particle = frame[i]; - ); + // storage number in the actual frame + const auto frameCellNr = particle[localCellIdx_]; - return {pos[0], pos[1], pos[2]}; - } + // offset in the actual superCell = cell offset in the supercell + const DataSpace frameCellOffset( + DataSpaceOperations::template map(frameCellNr)); - // returns particle momentum as color attribute - ISAAC_HOST_DEVICE_INLINE isaac_float_dim getAttribute() const - { - auto const particle = frame[ i ]; - float3_X const mom = particle[ momentum_ ]; - return {mom[0], mom[1], mom[2]}; - } + // added offsets + float3_X const absoluteOffset(particle[position_] + float3_X(frameCellOffset)); + // calculate scaled position + isaac_float3 const pos( + absoluteOffset.x() * (1._X / float_X(MappingDesc::SuperCellSize::x::value)), + absoluteOffset.y() * (1._X / float_X(MappingDesc::SuperCellSize::y::value)), + absoluteOffset.z() * (1._X / float_X(MappingDesc::SuperCellSize::z::value))); - // returns constant radius - ISAAC_HOST_DEVICE_INLINE isaac_float getRadius() const - { - return 0.2f; - } + return pos; + } + // returns particle momentum as color attribute + ISAAC_HOST_DEVICE_INLINE isaac_float_dim getAttribute() const + { + auto const particle = frame[i]; + float3_X const mom = particle[momentum_]; + return isaac_float_dim(mom[0], mom[1], mom[2]); + } - private: - ParticlesBoxType pb; - FramePtr frame; - int i; - int frameSize; -}; + // returns constant radius + ISAAC_HOST_DEVICE_INLINE isaac_float getRadius() const + { + return 0.2f; + } -ISAAC_NO_HOST_DEVICE_WARNING -template< typename ParticlesType > -class ParticleSource -{ + private: + ParticlesBoxType pb; + FramePtr frame; + int i; + int frameSize; + }; - using ParticlesBoxType = typename ParticlesType::ParticlesBoxType; - using FramePtr = typename ParticlesBoxType::FramePtr; - using FrameType = typename ParticlesBoxType::FrameType; - public: - static const size_t feature_dim = 3; - bool movingWindow; - DataSpace< simDim > guarding; ISAAC_NO_HOST_DEVICE_WARNING - ParticleSource () - {} - - ISAAC_HOST_INLINE static std::string getName() + template + class ParticleSource { - return ParticlesType::FrameType::getName() + std::string(" particle"); - } + using ParticlesBoxType = typename ParticlesType::ParticlesBoxType; + using FramePtr = typename ParticlesBoxType::FramePtr; + using FrameType = typename ParticlesBoxType::FrameType; + + public: + static const size_t feature_dim = 3; + bool movingWindow; + DataSpace guarding; + ISAAC_NO_HOST_DEVICE_WARNING + ParticleSource() + { + } - pmacc::memory::Array pb; + ISAAC_HOST_INLINE static std::string getName() + { + return ParticlesType::FrameType::getName() + std::string(" particle"); + } - void init(bool movingWindow) - { - this->movingWindow = movingWindow; - } + pmacc::memory::Array pb; - void update(bool enabled, void* pointer) - { - // update movingWindow cells - if (enabled) + void init(bool movingWindow) { - uint32_t* currentStep = (uint32_t*)pointer; - DataConnector &dc = Environment<>::get().DataConnector(); - auto particles = dc.get< ParticlesType >( ParticlesType::FrameType::getName(), true ); - pb[0] = particles->getDeviceParticlesBox(); - - const SubGrid& subGrid = Environment< simDim >::get().SubGrid(); - guarding = GuardSize::toRT(); - if (movingWindow) + this->movingWindow = movingWindow; + } + + void update(bool enabled, void* pointer) + { + // update movingWindow cells + if(enabled) { - GridController &gc = Environment::get().GridController(); - if (gc.getPosition()[1] == 0) //first gpu + uint32_t* currentStep = (uint32_t*) pointer; + DataConnector& dc = Environment<>::get().DataConnector(); + auto particles = dc.get(ParticlesType::FrameType::getName(), true); + pb[0] = particles->getDeviceParticlesBox(); + + const SubGrid& subGrid = Environment::get().SubGrid(); + guarding = GuardSize::toRT(); + if(movingWindow) { - Window window(MovingWindow::getInstance().getWindow( *currentStep )); - for(uint i = 0; i < simDim; i++) - guarding[i] += int(math::ceil((subGrid.getLocalDomain().size[i] - window.localDimensions.size[i]) / (float)MappingDesc::SuperCellSize::toRT()[i])); + GridController& gc = Environment::get().GridController(); + if(gc.getPosition()[1] == 0) // first gpu + { + Window window(MovingWindow::getInstance().getWindow(*currentStep)); + for(uint32_t i = 0; i < simDim; i++) + guarding[i] += int(math::ceil( + (subGrid.getLocalDomain().size[i] - window.localDimensions.size[i]) + / (float) MappingDesc::SuperCellSize::toRT()[i])); + } } + dc.releaseData(ParticlesType::FrameType::getName()); } - dc.releaseData( ParticlesType::FrameType::getName() ); } - } - // returns particleIterator with correct feature_dim and cell specific particlebox - ISAAC_NO_HOST_DEVICE_WARNING - ISAAC_HOST_DEVICE_INLINE ParticleIterator getIterator(const isaac_uint3& local_grid_coord) const + // returns particleIterator with correct feature_dim and cell specific particlebox + ISAAC_NO_HOST_DEVICE_WARNING + ISAAC_HOST_DEVICE_INLINE ParticleIterator getIterator( + const isaac_uint3& local_grid_coord) const + { + constexpr uint32_t frameSize = pmacc::math::CT::volume::type::value; + DataSpace const superCellIdx( + local_grid_coord.x + guarding[0], + local_grid_coord.y + guarding[1], + local_grid_coord.z + guarding[2]); + const auto& superCell = pb[0].getSuperCell(superCellIdx); + size_t size = superCell.getNumParticles(); + FramePtr currentFrame = pb[0].getFirstFrame(superCellIdx); + return ParticleIterator(size, pb[0], currentFrame, frameSize); + } + }; + + template + struct Transformoperator { - constexpr uint32_t frameSize = pmacc::math::CT::volume< typename FrameType::SuperCellSize >::type::value; - DataSpace< simDim > const superCellIdx( local_grid_coord.x + guarding[0], local_grid_coord.y + guarding[1], local_grid_coord.z + guarding[2] ); - const auto & superCell = pb[0].getSuperCell( superCellIdx ); - size_t size = superCell.getNumParticles(); - FramePtr currentFrame = pb[0].getFirstFrame( superCellIdx ); - return ParticleIterator( size, pb[0], currentFrame, frameSize ); - } -}; - -template< typename T > -struct Transformoperator -{ - typedef TFieldSource< T > type; -}; -template< typename T > -struct ParticleTransformoperator -{ - typedef ParticleSource< T > type; -}; + typedef TFieldSource type; + }; + template + struct ParticleTransformoperator + { + typedef ParticleSource type; + }; -struct SourceInitIterator -{ - template - < - typename TSource, - typename TCellDescription, - typename TMovingWindow - > - void operator()( const int I, TSource& s, TCellDescription& c, TMovingWindow& w) const - { - s.init(c,w); - } -}; + struct SourceInitIterator + { + template + void operator()(const int I, TSource& s, TCellDescription& c, TMovingWindow& w) const + { + s.init(c, w); + } + }; -struct ParticleSourceInitIterator -{ - template - < - typename TParticleSource, - typename TMovingWindow - > - void operator()( const int I, TParticleSource& s, TMovingWindow& w) const - { - s.init(w); - } -}; + struct ParticleSourceInitIterator + { + template + void operator()(const int I, TParticleSource& s, TMovingWindow& w) const + { + s.init(w); + } + }; -class IsaacPlugin : public ILightweightPlugin -{ -public: - typedef boost::mpl::int_< simDim > SimDim; - static const size_t textureDim = 1024; - using SourceList = bmpl::transform::type,Transformoperator>::type; - // create compile time particle list - using ParticleList = bmpl::transform::type,ParticleTransformoperator>::type; - using VisualizationType = IsaacVisualization - < - cupla::AccHost, - cupla::Acc, - cupla::AccStream, - cupla::KernelDim, - SimDim, - ParticleList, - SourceList, - DataSpace< simDim >, - textureDim, - float3_X, -#if( ISAAC_STEREO == 0 ) - isaac::DefaultController, - isaac::DefaultCompositor + class IsaacPlugin : public ILightweightPlugin + { + public: + static const ISAAC_IDX_TYPE textureDim = 1024; + using SourceList = bmpl:: + transform::type, Transformoperator>::type; + // create compile time particle list + using ParticleList = bmpl::transform< + boost::fusion::result_of::as_list::type, + ParticleTransformoperator>::type; + using VisualizationType = IsaacVisualization< + cupla::AccHost, + cupla::Acc, + cupla::AccStream, + cupla::KernelDim, + ParticleList, + SourceList, + textureDim, +#if(ISAAC_STEREO == 0) + isaac::DefaultController, + isaac::DefaultCompositor #else - isaac::StereoController, -# if( ISAAC_STEREO == 1 ) + isaac::StereoController, +# if(ISAAC_STEREO == 1) isaac::StereoCompositorSideBySide -# else - isaac::StereoCompositorAnaglyph -# endif +# else + isaac::StereoCompositorAnaglyph +# endif #endif - >; - VisualizationType * visualization; - - IsaacPlugin() : - visualization(nullptr), - cellDescription(nullptr), - movingWindow(false), - render_interval(1), - step(0), - drawing_time(0), - cell_count(0), - particle_count(0), - last_notify(0) - { - Environment<>::get().PluginConnector().registerPlugin(this); - } + >; + VisualizationType* visualization; + + IsaacPlugin() + : visualization(nullptr) + , cellDescription(nullptr) + , movingWindow(false) + , render_interval(1) + , step(0) + , drawing_time(0) + , cell_count(0) + , particle_count(0) + , last_notify(0) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } - std::string pluginGetName() const - { - return "IsaacPlugin"; - } + std::string pluginGetName() const + { + return "IsaacPlugin"; + } - void notify(uint32_t currentStep) - { - uint64_t simulation_time = visualization->getTicksUs() - last_notify; - step++; - if (step >= render_interval) - { - step = 0; - bool pause = false; - do + void notify(uint32_t currentStep) { - //update of the position for moving window simulations - if ( movingWindow ) + uint64_t simulation_time = visualization->getTicksUs() - last_notify; + step++; + if(step >= render_interval) { - Window window(MovingWindow::getInstance().getWindow( currentStep )); - visualization->updatePosition( window.localDimensions.offset ); - visualization->updateLocalSize( window.localDimensions.size ); - visualization->updateLocalParticleSize( window.localDimensions.size / MappingDesc::SuperCellSize::toRT()); - visualization->updateBounding(); - } - if (rank == 0 && visualization->kernel_time) - { - json_object_set_new( visualization->getJsonMetaRoot(), "time step", json_integer( currentStep ) ); - json_object_set_new( visualization->getJsonMetaRoot(), "drawing_time" , json_integer( drawing_time ) ); - json_object_set_new( visualization->getJsonMetaRoot(), "simulation_time", json_integer( simulation_time ) ); - simulation_time = 0; - json_object_set_new( visualization->getJsonMetaRoot(), "cell count", json_integer( cell_count ) ); - json_object_set_new( visualization->getJsonMetaRoot(), "particle count", json_integer( particle_count ) ); - } - uint64_t start = visualization->getTicksUs(); - json_t* meta = visualization->doVisualization(META_MASTER, ¤tStep, !pause); - drawing_time = visualization->getTicksUs() - start; - json_t* json_pause = nullptr; - if ( meta && (json_pause = json_object_get(meta, "pause")) && json_boolean_value( json_pause ) ) - pause = !pause; - if ( meta && json_integer_value( json_object_get(meta, "exit") ) ) - exit(1); - json_t* js; - if ( meta && ( js = json_object_get(meta, "interval") ) ) - { - render_interval = math::max( int(1), int( json_integer_value ( js ) ) ); - //Feedback for other clients than the changing one - if (rank == 0) - json_object_set_new( visualization->getJsonMetaRoot(), "interval", json_integer( render_interval ) ); - } - json_decref( meta ); - if (direct_pause) - { - pause = true; - direct_pause = false; + step = 0; + bool pause = false; + do + { + // update of the position for moving window simulations + if(movingWindow) + { + Window window(MovingWindow::getInstance().getWindow(currentStep)); + isaac_size3 position; + isaac_size3 local_size; + isaac_size3 particle_size; + + for(ISAAC_IDX_TYPE i = 0; i < 3; ++i) + { + position[i] = window.localDimensions.offset[i]; + local_size[i] = window.localDimensions.size[i]; + particle_size[i] + = window.localDimensions.size[i] / MappingDesc::SuperCellSize::toRT()[i]; + } + visualization->updatePosition(position); + visualization->updateLocalSize(local_size); + visualization->updateLocalParticleSize(particle_size); + visualization->updateBounding(); + } + if(rank == 0 && visualization->kernel_time) + { + json_object_set_new( + visualization->getJsonMetaRoot(), + "time step", + json_integer(currentStep)); + json_object_set_new( + visualization->getJsonMetaRoot(), + "drawing_time", + json_integer(drawing_time)); + json_object_set_new( + visualization->getJsonMetaRoot(), + "simulation_time", + json_integer(simulation_time)); + simulation_time = 0; + json_object_set_new( + visualization->getJsonMetaRoot(), + "cell count", + json_integer(cell_count)); + json_object_set_new( + visualization->getJsonMetaRoot(), + "particle count", + json_integer(particle_count)); + } + uint64_t start = visualization->getTicksUs(); + json_t* meta = visualization->doVisualization(META_MASTER, ¤tStep, !pause); + drawing_time = visualization->getTicksUs() - start; + json_t* json_pause = nullptr; + if(meta && (json_pause = json_object_get(meta, "pause")) && json_boolean_value(json_pause)) + pause = !pause; + if(meta && json_integer_value(json_object_get(meta, "exit"))) + exit(1); + json_t* js; + if(meta && (js = json_object_get(meta, "interval"))) + { + render_interval = math::max(int(1), int(json_integer_value(js))); + // Feedback for other clients than the changing one + if(rank == 0) + json_object_set_new( + visualization->getJsonMetaRoot(), + "interval", + json_integer(render_interval)); + } + json_decref(meta); + if(direct_pause) + { + pause = true; + direct_pause = false; + } + } while(pause); } + last_notify = visualization->getTicksUs(); } - while (pause); - } - last_notify = visualization->getTicksUs(); - } - void pluginRegisterHelp(po::options_description& desc) - { - /* register command line parameters for your plugin */ - desc.add_options() - ("isaac.period", po::value< std::string > (¬ifyPeriod), - "Enable IsaacPlugin [for each n-th step].") - ("isaac.name", po::value< std::string > (&name)->default_value("default"), - "The name of the simulation. Default is \"default\".") - ("isaac.url", po::value< std::string > (&url)->default_value("localhost"), - "The url of the isaac server to connect to. Default is \"localhost\".") - ("isaac.port", po::value< uint16_t > (&port)->default_value(2460), - "The port of the isaac server to connect to. Default is 2460.") - ("isaac.width", po::value< uint32_t > (&width)->default_value(1024), - "The width per isaac framebuffer. Default is 1024.") - ("isaac.height", po::value< uint32_t > (&height)->default_value(768), - "The height per isaac framebuffer. Default is 768.") - ("isaac.directPause", po::value< bool > (&direct_pause)->default_value(false), - "Direct pausing after starting simulation. Default is false.") - ("isaac.quality", po::value< uint32_t > (&jpeg_quality)->default_value(90), - "JPEG quality. Default is 90.") - ("isaac.reconnect", po::value< bool > (&reconnect)->default_value(true), - "Trying to reconnect every time an image is rendered if the connection is lost or could never established at all.") - ; - } - - void setMappingDescription(MappingDesc *cellDescription) - { - this->cellDescription = cellDescription; - } - -private: - MappingDesc *cellDescription; - std::string notifyPeriod; - std::string url; - std::string name; - uint16_t port; - uint32_t count; - uint32_t width; - uint32_t height; - uint32_t jpeg_quality; - int rank; - int numProc; - bool movingWindow; - ParticleList particleSources; - SourceList sources; - /** render interval within the notify period - * - * render each n-th time step within an interval defined by notifyPeriod - */ - uint32_t render_interval; - uint32_t step; - int drawing_time; - bool direct_pause; - int cell_count; - int particle_count; - uint64_t last_notify; - bool reconnect; - - void pluginLoad() - { - if(!notifyPeriod.empty()) - { - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &numProc); - if ( MovingWindow::getInstance().isEnabled() ) - movingWindow = true; - float_X minCellSize = math::min( cellSize[0], math::min( cellSize[1], cellSize[2] ) ); - float3_X cellSizeFactor = cellSize / minCellSize; - - const SubGrid& subGrid = Environment< simDim >::get().SubGrid(); - - isaac_size2 framebuffer_size = + void pluginRegisterHelp(po::options_description& desc) { - cupla::IdxType(width), - cupla::IdxType(height) - }; - - isaac_for_each_params( sources, SourceInitIterator(), cellDescription, movingWindow ); - isaac_for_each_params( particleSources, ParticleSourceInitIterator(), movingWindow); - - visualization = new VisualizationType ( - cupla::manager::Device< cupla::AccHost >::get().current( ), - cupla::manager::Device< cupla::AccDev >::get().current( ), - cupla::manager::Stream< cupla::AccDev, cupla::AccStream >::get().stream( ), - name, - 0, - url, - port, - framebuffer_size, - MovingWindow::getInstance().getWindow( 0 ).globalDimensions.size, - subGrid.getLocalDomain().size, - subGrid.getLocalDomain().size / SuperCellSize::toRT(), - subGrid.getLocalDomain().offset, - particleSources, - sources, - cellSizeFactor - ); - visualization->setJpegQuality(jpeg_quality); - //Defining the later periodicly sent meta data - if (rank == 0) + /* register command line parameters for your plugin */ + desc.add_options()( + "isaac.period", + po::value(¬ifyPeriod), + "Enable IsaacPlugin [for each n-th step].")( + "isaac.name", + po::value(&name)->default_value("default"), + "The name of the simulation. Default is \"default\".")( + "isaac.url", + po::value(&url)->default_value("localhost"), + "The url of the isaac server to connect to. Default is \"localhost\".")( + "isaac.port", + po::value(&port)->default_value(2460), + "The port of the isaac server to connect to. Default is 2460.")( + "isaac.width", + po::value(&width)->default_value(1024), + "The width per isaac framebuffer. Default is 1024.")( + "isaac.height", + po::value(&height)->default_value(768), + "The height per isaac framebuffer. Default is 768.")( + "isaac.directPause", + po::value(&direct_pause)->default_value(false), + "Direct pausing after starting simulation. Default is false.")( + "isaac.quality", + po::value(&jpeg_quality)->default_value(90), + "JPEG quality. Default is 90.")( + "isaac.reconnect", + po::value(&reconnect)->default_value(true), + "Trying to reconnect every time an image is rendered if the connection is lost or could never " + "established at all."); + } + + void setMappingDescription(MappingDesc* cellDescription) { - json_object_set_new( visualization->getJsonMetaRoot(), "time step", json_string( "Time step" ) ); - json_object_set_new( visualization->getJsonMetaRoot(), "drawing time", json_string( "Drawing time in us" ) ); - json_object_set_new( visualization->getJsonMetaRoot(), "simulation time", json_string( "Simulation time in us" ) ); - json_object_set_new( visualization->getJsonMetaRoot(), "cell count", json_string( "Total numbers of cells" ) ); - json_object_set_new( visualization->getJsonMetaRoot(), "particle count", json_string( "Total numbers of particles" ) ); + this->cellDescription = cellDescription; } - CommunicatorSetting communicatorBehaviour = reconnect ? RetryEverySend : ReturnAtError; - if (visualization->init( communicatorBehaviour ) != 0) + + private: + MappingDesc* cellDescription; + std::string notifyPeriod; + std::string url; + std::string name; + uint16_t port; + uint32_t count; + uint32_t width; + uint32_t height; + uint32_t jpeg_quality; + int rank; + int numProc; + bool movingWindow; + ParticleList particleSources; + SourceList sources; + /** render interval within the notify period + * + * render each n-th time step within an interval defined by notifyPeriod + */ + uint32_t render_interval; + uint32_t step; + int drawing_time; + bool direct_pause; + int cell_count; + int particle_count; + uint64_t last_notify; + bool reconnect; + + void pluginLoad() { - if (rank == 0) - log ("ISAAC Init failed, disable plugin"); - notifyPeriod = ""; + if(!notifyPeriod.empty()) + { + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &numProc); + if(MovingWindow::getInstance().isEnabled()) + movingWindow = true; + isaac_float minCellSize = math::min(cellSize[0], math::min(cellSize[1], cellSize[2])); + isaac_float3 cellSizeFactor( + cellSize[0] / minCellSize, + cellSize[1] / minCellSize, + cellSize[2] / minCellSize); + + const SubGrid& subGrid = Environment::get().SubGrid(); + + isaac_size2 framebuffer_size = {cupla::IdxType(width), cupla::IdxType(height)}; + + isaac_for_each_params(sources, SourceInitIterator(), cellDescription, movingWindow); + isaac_for_each_params(particleSources, ParticleSourceInitIterator(), movingWindow); + + isaac_size3 global_size; + isaac_size3 local_size; + isaac_size3 particle_size; + isaac_size3 position; + for(ISAAC_IDX_TYPE i = 0; i < 3; ++i) + { + global_size[i] = MovingWindow::getInstance().getWindow(0).globalDimensions.size[i]; + local_size[i] = subGrid.getLocalDomain().size[i]; + particle_size[i] = subGrid.getLocalDomain().size[i] / SuperCellSize::toRT()[i]; + position[i] = subGrid.getLocalDomain().offset[i]; + } + visualization = new VisualizationType( + cupla::manager::Device::get().current(), + cupla::manager::Device::get().current(), + cupla::manager::Stream::get().stream(), + name, + 0, + url, + port, + framebuffer_size, + global_size, + local_size, + particle_size, + position, + particleSources, + sources, + cellSizeFactor); + visualization->setJpegQuality(jpeg_quality); + // Defining the later periodicly sent meta data + if(rank == 0) + { + json_object_set_new(visualization->getJsonMetaRoot(), "time step", json_string("Time step")); + json_object_set_new( + visualization->getJsonMetaRoot(), + "drawing time", + json_string("Drawing time in us")); + json_object_set_new( + visualization->getJsonMetaRoot(), + "simulation time", + json_string("Simulation time in us")); + json_object_set_new( + visualization->getJsonMetaRoot(), + "cell count", + json_string("Total numbers of cells")); + json_object_set_new( + visualization->getJsonMetaRoot(), + "particle count", + json_string("Total numbers of particles")); + } + CommunicatorSetting communicatorBehaviour = reconnect ? RetryEverySend : ReturnAtError; + if(visualization->init(communicatorBehaviour) != 0) + { + if(rank == 0) + log("ISAAC Init failed, disable plugin"); + notifyPeriod = ""; + } + else + { + const int localNrOfCells + = cellDescription->getGridLayout().getDataSpaceWithoutGuarding().productOfComponents(); + cell_count = localNrOfCells * numProc; + particle_count = localNrOfCells * particles::TYPICAL_PARTICLES_PER_CELL + * (bmpl::size::type::value) * numProc; + last_notify = visualization->getTicksUs(); + if(rank == 0) + log("ISAAC Init succeded"); + } + } + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); } - else + + void pluginUnload() { - const int localNrOfCells = cellDescription->getGridLayout().getDataSpaceWithoutGuarding().productOfComponents(); - cell_count = localNrOfCells * numProc; - particle_count = localNrOfCells * particles::TYPICAL_PARTICLES_PER_CELL * (bmpl::size::type::value) * numProc; - last_notify = visualization->getTicksUs(); - if (rank == 0) - log ("ISAAC Init succeded"); + if(!notifyPeriod.empty()) + { + delete visualization; + visualization = nullptr; + if(rank == 0) + log("ISAAC finished"); + } } - } - Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); - } + }; - void pluginUnload() - { - if(!notifyPeriod.empty()) - { - delete visualization; - visualization = nullptr; - if (rank == 0) - log ("ISAAC finished"); - } - } -}; - -} //namespace isaac; -} //namespace picongpu; + } // namespace isaacP +} // namespace picongpu diff --git a/include/picongpu/plugins/PhaseSpace/AxisDescription.hpp b/include/picongpu/plugins/PhaseSpace/AxisDescription.hpp index 8c27b0ade5..1fee962109 100644 --- a/include/picongpu/plugins/PhaseSpace/AxisDescription.hpp +++ b/include/picongpu/plugins/PhaseSpace/AxisDescription.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl +/* Copyright 2014-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -19,6 +19,8 @@ #pragma once +#include + namespace picongpu { /** 2D Phase Space Selection @@ -35,10 +37,48 @@ namespace picongpu /** short hand enums */ enum element_momentum - { px = 0u, py = 1u, pz = 2u }; + { + px = 0u, + py = 1u, + pz = 2u + }; enum element_coordinate - { x = 0u, y = 1u, z = 2u }; + { + x = 0u, + y = 1u, + z = 2u + }; + + std::string momentumAsString() const + { + switch(momentum) + { + case px: + return "px"; + case py: + return "py"; + case pz: + return "pz"; + default: + throw std::runtime_error("Unreachable!"); + } + } + + std::string spaceAsString() const + { + switch(space) + { + case x: + return "x"; + case y: + return "y"; + case z: + return "z"; + default: + throw std::runtime_error("Unreachable!"); + } + } }; } /* namespace picongpu */ diff --git a/include/picongpu/plugins/PhaseSpace/DumpHBufferOpenPMD.hpp b/include/picongpu/plugins/PhaseSpace/DumpHBufferOpenPMD.hpp new file mode 100644 index 0000000000..a425562a39 --- /dev/null +++ b/include/picongpu/plugins/PhaseSpace/DumpHBufferOpenPMD.hpp @@ -0,0 +1,224 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" + +#include "picongpu/plugins/PhaseSpace/AxisDescription.hpp" +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace picongpu +{ + class DumpHBuffer + { + private: + using SuperCellSize = typename MappingDesc::SuperCellSize; + + public: + /** Dump the PhaseSpace host Buffer + * + * \tparam Type the HBuffers element type + * \tparam int the HBuffers dimension + * \param hBuffer const reference to the hBuffer, including guard cells in spatial dimension + * \param axis_element plot to create: e.g. py, x from momentum/spatial-coordinate + * \param unit sim unit of the buffer + * \param strSpecies unique short hand name of the species + * \param filenameSuffix infix + extension part of openPMD filename + * \param currentStep current time step + * \param mpiComm communicator of the participating ranks + */ + template + void operator()( + const pmacc::container::HostBuffer& hBuffer, + const AxisDescription axis_element, + const std::pair axis_p_range, + const float_64 pRange_unit, + const float_64 unit, + const std::string strSpecies, + const std::string filenameExtension, + const std::string jsonConfig, + const uint32_t currentStep, + MPI_Comm mpiComm) const + { + using Type = T_Type; + + /** file name ***************************************************** + * phaseSpace/PhaseSpace_xpy_timestep.h5 */ + std::string fCoords("xyz"); + std::ostringstream openPMDFilename; + openPMDFilename << "phaseSpace/PhaseSpace_" << strSpecies << "_" << fCoords.at(axis_element.space) << "p" + << fCoords.at(axis_element.momentum) << "_%T." << filenameExtension; + + /** get size of the fileWriter communicator ***********************/ + int size; + MPI_CHECK(MPI_Comm_size(mpiComm, &size)); + + /** create parallel domain collector ******************************/ + ::openPMD::Series series(openPMDFilename.str(), ::openPMD::Access::CREATE, mpiComm, jsonConfig); + ::openPMD::Iteration iteration = series.iterations[currentStep]; + + const std::string software("PIConGPU"); + + std::stringstream softwareVersion; + softwareVersion << PICONGPU_VERSION_MAJOR << "." << PICONGPU_VERSION_MINOR << "." + << PICONGPU_VERSION_PATCH; + if(!std::string(PICONGPU_VERSION_LABEL).empty()) + softwareVersion << "-" << PICONGPU_VERSION_LABEL; + series.setSoftware(software, softwareVersion.str()); + + pmacc::GridController& gc = pmacc::Environment::get().GridController(); + + /** calculate GUARD offset in the source hBuffer *****************/ + const uint32_t rGuardCells + = SuperCellSize().toRT()[axis_element.space] * GuardSize::toRT()[axis_element.space]; + + /** calculate local and global size of the phase space ***********/ + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + const SubGrid& subGrid = Environment::get().SubGrid(); + const std::uint64_t rLocalOffset = subGrid.getLocalDomain().offset[axis_element.space]; + const std::uint64_t rLocalSize = int(hBuffer.size().y() - 2 * rGuardCells); + const std::uint64_t rGlobalSize = subGrid.getGlobalDomain().size[axis_element.space]; + PMACC_VERIFY(int(rLocalSize) == subGrid.getLocalDomain().size[axis_element.space]); + + /* globalDomain of the phase space */ + ::openPMD::Extent globalPhaseSpace_extent{rGlobalSize, hBuffer.size().x()}; + + /* global moving window meta information */ + ::openPMD::Offset globalPhaseSpace_offset{0, 0}; + std::uint64_t globalMovingWindowOffset = 0; + std::uint64_t globalMovingWindowSize = rGlobalSize; + if(axis_element.space == AxisDescription::y) /* spatial axis == y */ + { + globalPhaseSpace_offset[0] = numSlides * rLocalSize; + Window window = MovingWindow::getInstance().getWindow(currentStep); + globalMovingWindowOffset = window.globalDimensions.offset[axis_element.space]; + globalMovingWindowSize = window.globalDimensions.size[axis_element.space]; + } + + /* localDomain: offset of it in the globalDomain and size */ + ::openPMD::Offset localPhaseSpace_offset{rLocalOffset, 0}; + ::openPMD::Extent localPhaseSpace_extent{rLocalSize, hBuffer.size().x()}; + + /** Dataset Name **************************************************/ + std::ostringstream dataSetName; + /* xpx or ypz or ... */ + dataSetName << strSpecies << "_" << fCoords.at(axis_element.space) << "p" + << fCoords.at(axis_element.momentum); + + /** debug log *****************************************************/ + int rank; + MPI_CHECK(MPI_Comm_rank(mpiComm, &rank)); + { + std::stringstream offsetAsString, localExtentAsString, globalExtentAsString; + offsetAsString << "[" << localPhaseSpace_offset[0] << ", " << localPhaseSpace_offset[1] << "]"; + localExtentAsString << "[" << localPhaseSpace_extent[0] << ", " << localPhaseSpace_extent[1] << "]"; + globalExtentAsString << "[" << globalPhaseSpace_extent[0] << ", " << globalPhaseSpace_extent[1] << "]"; + log( + "Dump buffer %1% to %2% at offset %3% with size %4% for total size %5% for rank %6% / %7%") + % (*(hBuffer.origin()(0, rGuardCells))) % dataSetName.str() % offsetAsString.str() + % localExtentAsString.str() % globalExtentAsString.str() % rank % size; + } + + /** write local domain ********************************************/ + + ::openPMD::Mesh mesh = iteration.meshes[dataSetName.str()]; + ::openPMD::MeshRecordComponent dataset = mesh[::openPMD::RecordComponent::SCALAR]; + + dataset.resetDataset({::openPMD::determineDatatype(), globalPhaseSpace_extent}); + std::shared_ptr data(&(*hBuffer.origin()(0, rGuardCells)), [](auto const&) {}); + dataset.storeChunk(data, localPhaseSpace_offset, localPhaseSpace_extent); + + /** meta attributes for the data set: unit, range, moving window **/ + + pmacc::Selection globalDomain = subGrid.getGlobalDomain(); + pmacc::Selection totalDomain = subGrid.getTotalDomain(); + // convert things to std::vector<> for the openPMD API to enjoy + std::vector globalDomainSize{&globalDomain.size[0], &globalDomain.size[0] + simDim}; + std::vector globalDomainOffset{&globalDomain.offset[0], &globalDomain.offset[0] + simDim}; + std::vector totalDomainSize{&totalDomain.size[0], &totalDomain.size[0] + simDim}; + std::vector totalDomainOffset{&totalDomain.offset[0], &totalDomain.offset[0] + simDim}; + std::vector globalDomainAxisLabels; + if(simDim == DIM2) + { + globalDomainAxisLabels = {"y", "x"}; // 2D: F[y][x] + } + if(simDim == DIM3) + { + globalDomainAxisLabels = {"z", "y", "x"}; // 3D: F[z][y][x] + } + + float_X const dr = cellSize[axis_element.space]; + + mesh.setAttribute("globalDomainSize", globalDomainSize); + mesh.setAttribute("globalDomainOffset", globalDomainOffset); + mesh.setAttribute("totalDomainSize", totalDomainSize); + mesh.setAttribute("totalDomainOffset", totalDomainOffset); + mesh.setAttribute("globalDomainAxisLabels", globalDomainAxisLabels); + mesh.setAttribute("totalDomainAxisLabels", globalDomainAxisLabels); + mesh.setAttribute("_global_start", globalPhaseSpace_offset); + mesh.setAttribute("_global_size", globalPhaseSpace_extent); + mesh.setAxisLabels({axis_element.spaceAsString(), axis_element.momentumAsString()}); + mesh.setAttribute("sim_unit", unit); + dataset.setUnitSI(unit); + { + using UD = ::openPMD::UnitDimension; + mesh.setUnitDimension({{UD::I, 1.0}, {UD::T, 1.0}, {UD::L, -1.0}}); // charge density + } + mesh.setAttribute("p_unit", pRange_unit); + mesh.setAttribute("p_min", axis_p_range.first); + mesh.setAttribute("p_max", axis_p_range.second); + mesh.setGridGlobalOffset({globalMovingWindowOffset * dr, axis_p_range.first}); + mesh.setAttribute("movingWindowOffset", globalMovingWindowOffset); + mesh.setAttribute("movingWindowSize", globalMovingWindowSize); + mesh.setAttribute("dr", dr); + mesh.setAttribute("dV", CELL_VOLUME); + mesh.setGridSpacing(std::vector{dr, CELL_VOLUME / dr}); + mesh.setAttribute("dr_unit", UNIT_LENGTH); + iteration.setDt(DELTA_T); + iteration.setTimeUnitSI(UNIT_TIME); + /* + * The value represents an aggregation over one cell, so any value is correct for the mesh position. + * Just use the center. + */ + dataset.setPosition(std::vector{0.5, 0.5}); + + // avoid deadlock between not finished pmacc tasks and mpi calls in openPMD + __getTransactionEvent().waitForFinished(); + + /** close file ****************************************************/ + iteration.close(); + } + }; + +} /* namespace picongpu */ diff --git a/include/picongpu/plugins/PhaseSpace/DumpHBufferSplashP.hpp b/include/picongpu/plugins/PhaseSpace/DumpHBufferSplashP.hpp deleted file mode 100644 index fde3952c74..0000000000 --- a/include/picongpu/plugins/PhaseSpace/DumpHBufferSplashP.hpp +++ /dev/null @@ -1,215 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/simulation_defines.hpp" - -#include "picongpu/traits/SplashToPIC.hpp" -#include "picongpu/traits/PICToSplash.hpp" - -#include "picongpu/plugins/PhaseSpace/AxisDescription.hpp" -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace picongpu -{ - class DumpHBuffer - { - private: - typedef typename MappingDesc::SuperCellSize SuperCellSize; - - public: - /** Dump the PhaseSpace host Buffer - * - * \tparam Type the HBuffers element type - * \tparam int the HBuffers dimension - * \param hBuffer const reference to the hBuffer, including guard cells in spatial dimension - * \param axis_element plot to create: e.g. py, x from momentum/spatial-coordinate - * \param unit sim unit of the buffer - * \param strSpecies unique short hand name of the species - * \param currentStep current time step - * \param mpiComm communicator of the participating ranks - */ - template - void operator()( const pmacc::container::HostBuffer& hBuffer, - const AxisDescription axis_element, - const std::pair axis_p_range, - const float_64 pRange_unit, - const float_64 unit, - const std::string strSpecies, - const uint32_t currentStep, - MPI_Comm mpiComm ) const - { - using namespace splash; - typedef T_Type Type; - const int bufDim = T_bufDim; - - /** file name ***************************************************** - * phaseSpace/PhaseSpace_xpy_timestep.h5 */ - std::string fCoords("xyz"); - std::ostringstream filename; - filename << "phaseSpace/PhaseSpace_" - << strSpecies << "_" - << fCoords.at(axis_element.space) - << "p" << fCoords.at(axis_element.momentum); - - /** get size of the fileWriter communicator ***********************/ - int size; - MPI_CHECK(MPI_Comm_size( mpiComm, &size )); - - /** create parallel domain collector ******************************/ - ParallelDomainCollector pdc( - mpiComm, MPI_INFO_NULL, Dimensions(size, 1, 1), 10 ); - - pmacc::GridController& gc = - pmacc::Environment::get().GridController(); - DataCollector::FileCreationAttr fAttr; - Dimensions mpiPosition( gc.getPosition()[axis_element.space], 0, 0 ); - fAttr.mpiPosition.set( mpiPosition ); - - DataCollector::initFileCreationAttr(fAttr); - - pdc.open( filename.str().c_str(), fAttr ); - - /** calculate GUARD offset in the source hBuffer *****************/ - const uint32_t rGuardCells = - SuperCellSize().toRT()[axis_element.space] * GuardSize::toRT()[axis_element.space]; - - /** calculate local and global size of the phase space ***********/ - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); - const SubGrid& subGrid = Environment::get().SubGrid(); - const int rLocalOffset = subGrid.getLocalDomain().offset[axis_element.space]; - const int rLocalSize = int(hBuffer.size().y() - 2*rGuardCells); - const int rGlobalSize = subGrid.getGlobalDomain().size[axis_element.space]; - PMACC_VERIFY( rLocalSize == subGrid.getLocalDomain().size[axis_element.space] ); - - /* globalDomain of the phase space */ - splash::Dimensions globalPhaseSpace_size( hBuffer.size().x(), - rGlobalSize, - 1 ); - - /* global moving window meta information */ - splash::Dimensions globalPhaseSpace_offset( 0, 0, 0 ); - int globalMovingWindowOffset = 0; - int globalMovingWindowSize = rGlobalSize; - if( axis_element.space == AxisDescription::y ) /* spatial axis == y */ - { - globalPhaseSpace_offset.set( 0, numSlides * rLocalSize, 0 ); - Window window = MovingWindow::getInstance( ).getWindow( currentStep ); - globalMovingWindowOffset = window.globalDimensions.offset[axis_element.space]; - globalMovingWindowSize = window.globalDimensions.size[axis_element.space]; - } - - /* localDomain: offset of it in the globalDomain and size */ - splash::Dimensions localPhaseSpace_offset( 0, rLocalOffset, 0 ); - splash::Dimensions localPhaseSpace_size( hBuffer.size().x(), - rLocalSize, - 1 ); - - /** Dataset Name **************************************************/ - std::ostringstream dataSetName; - /* xpx or ypz or ... */ - dataSetName << fCoords.at(axis_element.space) - << "p" << fCoords.at(axis_element.momentum); - - /** debug log *****************************************************/ - int rank; - MPI_CHECK(MPI_Comm_rank( mpiComm, &rank )); - log ("Dump buffer %1% to %2% at offset %3% with size %4% for total size %5% for rank %6% / %7%") - % ( *(hBuffer.origin()(0,rGuardCells)) ) % dataSetName.str() % localPhaseSpace_offset.toString() - % localPhaseSpace_size.toString() % globalPhaseSpace_size.toString() - % rank % size; - - /** write local domain ********************************************/ - typename PICToSplash::type ctPhaseSpace; - - // avoid deadlock between not finished pmacc tasks and mpi calls in HDF5 - __getTransactionEvent().waitForFinished(); - - pdc.writeDomain( currentStep, - /* global domain and my local offset within it */ - globalPhaseSpace_size, - localPhaseSpace_offset, - /* */ - ctPhaseSpace, - bufDim, - /* local data set dimensions */ - splash::Selection(localPhaseSpace_size), - /* data set name */ - dataSetName.str().c_str(), - /* global domain */ - splash::Domain( - globalPhaseSpace_offset, - globalPhaseSpace_size - ), - /* dataClass, buffer */ - DomainCollector::GridType, - &(*hBuffer.origin()(0,rGuardCells)) ); - - /** meta attributes for the data set: unit, range, moving window **/ - typedef PICToSplash::type SplashFloatXType; - typedef PICToSplash::type SplashFloat64Type; - ColTypeInt ctInt; - SplashFloat64Type ctFloat64; - SplashFloatXType ctFloatX; - - pdc.writeAttribute( currentStep, ctFloat64, dataSetName.str().c_str(), - "sim_unit", &unit ); - pdc.writeAttribute( currentStep, ctFloat64, dataSetName.str().c_str(), - "p_unit", &pRange_unit ); - pdc.writeAttribute( currentStep, ctFloatX, dataSetName.str().c_str(), - "p_min", &(axis_p_range.first) ); - pdc.writeAttribute( currentStep, ctFloatX, dataSetName.str().c_str(), - "p_max", &(axis_p_range.second) ); - pdc.writeAttribute( currentStep, ctInt, dataSetName.str().c_str(), - "movingWindowOffset", &globalMovingWindowOffset ); - pdc.writeAttribute( currentStep, ctInt, dataSetName.str().c_str(), - "movingWindowSize", &globalMovingWindowSize ); - - pdc.writeAttribute( currentStep, ctFloatX, dataSetName.str().c_str(), - "dr", &(cellSize[axis_element.space]) ); - pdc.writeAttribute( currentStep, ctFloatX, dataSetName.str().c_str(), - "dV", &CELL_VOLUME ); - pdc.writeAttribute( currentStep, ctFloat64, dataSetName.str().c_str(), - "dr_unit", &UNIT_LENGTH ); - pdc.writeAttribute( currentStep, ctFloatX, dataSetName.str().c_str(), - "dt", &DELTA_T ); - pdc.writeAttribute( currentStep, ctFloat64, dataSetName.str().c_str(), - "dt_unit", &UNIT_TIME ); - - /** close file ****************************************************/ - pdc.finalize(); - pdc.close(); - } - }; - -} /* namespace picongpu */ diff --git a/include/picongpu/plugins/PhaseSpace/PhaseSpace.hpp b/include/picongpu/plugins/PhaseSpace/PhaseSpace.hpp index 8f4cb6354c..438e71a5a3 100644 --- a/include/picongpu/plugins/PhaseSpace/PhaseSpace.hpp +++ b/include/picongpu/plugins/PhaseSpace/PhaseSpace.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -61,152 +61,101 @@ namespace picongpu struct Help : public plugins::multi::IHelp { - /** creates an instance of ISlave * * @tparam T_Slave type of the interface implementation (must inherit from ISlave) * @param help plugin defined help * @param id index of the plugin, range: [0;help->getNumPlugins()) */ - std::shared_ptr< ISlave > create( - std::shared_ptr< IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) + std::shared_ptr create(std::shared_ptr& help, size_t const id, MappingDesc* cellDescription) { - return std::shared_ptr< ISlave >( - new PhaseSpace< - T_AssignmentFunction, - Species - >( - help, - id, - cellDescription - ) - ); + return std::shared_ptr( + new PhaseSpace(help, id, cellDescription)); } // find all valid filter for the current used species - using EligibleFilters = typename MakeSeqFromNestedSeq< - typename bmpl::transform< - particles::filter::AllParticleFilters, - particles::traits::GenerateSolversIfSpeciesEligible< - bmpl::_1, - Species - > - >::type - >::type; + using EligibleFilters = typename MakeSeqFromNestedSeq>::type>::type; //! periodicity of computing the particle energy - plugins::multi::Option< std::string > notifyPeriod = { - "period", - "notify period" - }; - plugins::multi::Option< std::string > filter = { - "filter", - "particle filter: " - }; + plugins::multi::Option notifyPeriod = {"period", "notify period"}; + plugins::multi::Option filter = {"filter", "particle filter: "}; - plugins::multi::Option< std::string > element_space = { - "space", - "spatial component (x, y, z)" - }; - plugins::multi::Option< std::string > element_momentum = { - "momentum", - "momentum component (px, py, pz)" - }; - plugins::multi::Option< float_X > momentum_range_min = { - "min", - "min range momentum [m_species c]" - }; - plugins::multi::Option< float_X > momentum_range_max = { - "max", - "max range momentum [m_species c]" - }; + plugins::multi::Option element_space = {"space", "spatial component (x, y, z)"}; + plugins::multi::Option element_momentum = {"momentum", "momentum component (px, py, pz)"}; + plugins::multi::Option momentum_range_min = {"min", "min range momentum [m_species c]"}; + plugins::multi::Option momentum_range_max = {"max", "max range momentum [m_species c]"}; + + /* + * Set to h5 for now at least, to make for easier comparison of + * output with old outpu + */ + plugins::multi::Option file_name_extension + = {"ext", + "openPMD filename extension (this controls the" + "backend picked by the openPMD API)", + "h5"}; + + plugins::multi::Option json_config + = {"json", "advanced (backend) configuration for openPMD in JSON format", "{}"}; //! string list with all possible particle filters std::string concatenatedFilterNames; - std::vector< std::string > allowedFilters; + std::vector allowedFilters; ///! method used by plugin controller to get --help description void registerHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) { + meta::ForEach> getEligibleFilterNames; + getEligibleFilterNames(allowedFilters); + + concatenatedFilterNames = plugins::misc::concatenateToString(allowedFilters, ", "); + + notifyPeriod.registerHelp(desc, masterPrefix + prefix); + filter.registerHelp(desc, masterPrefix + prefix, std::string("[") + concatenatedFilterNames + "]"); - meta::ForEach< - EligibleFilters, - plugins::misc::AppendName< bmpl::_1 > - > getEligibleFilterNames; - getEligibleFilterNames( allowedFilters ); - - concatenatedFilterNames = plugins::misc::concatenateToString( - allowedFilters, - ", " - ); - - notifyPeriod.registerHelp( - desc, - masterPrefix + prefix - ); - filter.registerHelp( - desc, - masterPrefix + prefix, - std::string( "[" ) + concatenatedFilterNames + "]" - ); - - element_space.registerHelp( - desc, - masterPrefix + prefix - ); - element_momentum.registerHelp( - desc, - masterPrefix + prefix - ); - momentum_range_min.registerHelp( - desc, - masterPrefix + prefix - ); - momentum_range_max.registerHelp( - desc, - masterPrefix + prefix - ); + element_space.registerHelp(desc, masterPrefix + prefix); + element_momentum.registerHelp(desc, masterPrefix + prefix); + momentum_range_min.registerHelp(desc, masterPrefix + prefix); + momentum_range_max.registerHelp(desc, masterPrefix + prefix); + file_name_extension.registerHelp(desc, masterPrefix + prefix); + json_config.registerHelp(desc, masterPrefix + prefix); } void expandHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) { } void validateOptions() { - if( notifyPeriod.size() != filter.size() ) - throw std::runtime_error( name + ": parameter filter and period are not used the same number of times" ); - if( notifyPeriod.size() != element_space.size() ) - throw std::runtime_error( name + ": parameter space and period are not used the same number of times" ); - if( notifyPeriod.size() != element_momentum.size() ) - throw std::runtime_error( name + ": parameter momentum and period are not used the same number of times" ); - if( notifyPeriod.size() != momentum_range_min.size() ) - throw std::runtime_error( name + ": parameter min and period are not used the same number of times" ); - if( notifyPeriod.size() != momentum_range_max.size() ) - throw std::runtime_error( name + ": parameter max and period are not used the same number of times" ); + if(notifyPeriod.size() != filter.size()) + throw std::runtime_error( + name + ": parameter filter and period are not used the same number of times"); + if(notifyPeriod.size() != element_space.size()) + throw std::runtime_error( + name + ": parameter space and period are not used the same number of times"); + if(notifyPeriod.size() != element_momentum.size()) + throw std::runtime_error( + name + ": parameter momentum and period are not used the same number of times"); + if(notifyPeriod.size() != momentum_range_min.size()) + throw std::runtime_error( + name + ": parameter min and period are not used the same number of times"); + if(notifyPeriod.size() != momentum_range_max.size()) + throw std::runtime_error( + name + ": parameter max and period are not used the same number of times"); // check if user passed filter name are valid - for( auto const & filterName : filter) + for(auto const& filterName : filter) { - if( - std::find( - allowedFilters.begin(), - allowedFilters.end(), - filterName - ) == allowedFilters.end() - ) + if(std::find(allowedFilters.begin(), allowedFilters.end(), filterName) == allowedFilters.end()) { - throw std::runtime_error( name + ": unknown filter '" + filterName + "'" ); + throw std::runtime_error(name + ": unknown filter '" + filterName + "'"); } } } @@ -235,13 +184,12 @@ namespace picongpu //! short description of the plugin std::string const description = "create phase space of a species"; //! prefix used for command line arguments - std::string const prefix = Species::FrameType::getName( ) + std::string( "_phaseSpace" ); + std::string const prefix = Species::FrameType::getName() + std::string("_phaseSpace"); }; private: - - MappingDesc *m_cellDescription = nullptr; + MappingDesc* m_cellDescription = nullptr; /** plot to create: e.g. py, x from element_coordinate/momentum */ AxisDescription axis_element; @@ -249,7 +197,7 @@ namespace picongpu std::pair axis_p_range; uint32_t r_bins; - std::shared_ptr< Help > m_help; + std::shared_ptr m_help; size_t m_id; typedef float_32 float_PS; @@ -257,13 +205,16 @@ namespace picongpu * we use not more than 32KB shared memory * Note: checking the longest edge for all phase space configurations * is a conservative work around until #469 is implemented */ - typedef typename bmpl::accumulate< - typename SuperCellSize::mplVector, - bmpl::int_<0>, - bmpl::max - >::type SuperCellsLongestEdge; - static constexpr uint32_t maxShared = 32*1024; /* 32 KB */ - static constexpr uint32_t num_pbins = maxShared/(sizeof(float_PS)*SuperCellsLongestEdge::value); + typedef typename bmpl:: + accumulate, bmpl::max>::type + SuperCellsLongestEdge; + /* Note: the previously used 32 KB shared memory size is not correct + * for CPUs, as discovered in #3329. As a quick patch, slightly reduce + * it so that the buffer plus a few small shared memory variables + * together fit 30 KB as set by default on CPUs. So set to 30 000 bytes. + */ + static constexpr uint32_t maxShared = 30000; + static constexpr uint32_t num_pbins = maxShared / (sizeof(float_PS) * SuperCellsLongestEdge::value); container::DeviceBuffer* dBuffer = nullptr; @@ -274,7 +225,7 @@ namespace picongpu */ MPI_Comm commFileWriter = MPI_COMM_NULL; - template< uint32_t r_dir > + template struct StartBlockFunctor { @@ -289,139 +240,73 @@ namespace picongpu const TParticlesBox& pb, cursor::BufferCursor cur, const uint32_t p_dir, - const std::pair& p_range - ) : - particlesBox(pb), curOriginPhaseSpace(cur), p_element(p_dir), - axis_p_range(p_range) - {} - - template< - typename T_Filter, - typename T_Zone, - typename ... T_Args - > - void operator()( - T_Filter const & filter, - T_Zone const & zone, - T_Args && ... args - ) const + const std::pair& p_range) + : particlesBox(pb) + , curOriginPhaseSpace(cur) + , p_element(p_dir) + , axis_p_range(p_range) + { + } + + template + void operator()(T_Filter const& filter, T_Zone const& zone, T_Args&&... args) const { - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - algorithm::kernel::ForeachLockstep< - numWorkers, - SuperCellSize - > forEachSuperCell; - - FunctorBlock< - Species, - SuperCellSize, - float_PS, - num_pbins, - r_dir, - T_Filter, - numWorkers - > functorBlock( - particlesBox, - curOriginPhaseSpace, - p_element, - axis_p_range, - filter - ); - - forEachSuperCell( - zone, - functorBlock, - args ... - ); + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + algorithm::kernel::ForeachLockstep forEachSuperCell; + + FunctorBlock + functorBlock(particlesBox, curOriginPhaseSpace, p_element, axis_p_range, filter); + + forEachSuperCell(zone, functorBlock, args...); } }; public: - //! must be implemented by the user - static std::shared_ptr< plugins::multi::IHelp > getHelp() + static std::shared_ptr getHelp() { - return std::shared_ptr< plugins::multi::IHelp >( new Help{ } ); + return std::shared_ptr(new Help{}); } - PhaseSpace( - std::shared_ptr< plugins::multi::IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ); + PhaseSpace(std::shared_ptr& help, size_t const id, MappingDesc* cellDescription); virtual ~PhaseSpace(); - void notify( uint32_t currentStep ); + void notify(uint32_t currentStep); - void restart( - uint32_t restartStep, - std::string const & restartDirectory - ) + void restart(uint32_t restartStep, std::string const& restartDirectory) { - } - void checkpoint( - uint32_t currentStep, - std::string const & checkpointDirectory - ) + void checkpoint(uint32_t currentStep, std::string const& checkpointDirectory) { - } template - void calcPhaseSpace( const uint32_t currentStep ); + void calcPhaseSpace(const uint32_t currentStep); }; -namespace particles -{ -namespace traits -{ - template< - typename T_Species, - typename T_AssignmentFunction, - typename T_UnspecifiedSpecies - > - struct SpeciesEligibleForSolver< - T_Species, - PhaseSpace< - T_AssignmentFunction, - T_UnspecifiedSpecies - > - > + namespace particles { - using FrameType = typename T_Species::FrameType; - - using RequiredIdentifiers = MakeSeq_t< - weighting, - position<>, - momentum - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - using SpeciesHasMass = typename pmacc::traits::HasFlag< - FrameType, - massRatio<> - >::type; - using SpeciesHasCharge = typename pmacc::traits::HasFlag< - FrameType, - chargeRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasMass, - SpeciesHasCharge - >; - }; -} // namespace traits -} // namespace particles + namespace traits + { + template + struct SpeciesEligibleForSolver> + { + using FrameType = typename T_Species::FrameType; + + using RequiredIdentifiers = MakeSeq_t, momentum>; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + using SpeciesHasMass = typename pmacc::traits::HasFlag>::type; + using SpeciesHasCharge = typename pmacc::traits::HasFlag>::type; + + using type = typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu #include "PhaseSpace.tpp" diff --git a/include/picongpu/plugins/PhaseSpace/PhaseSpace.tpp b/include/picongpu/plugins/PhaseSpace/PhaseSpace.tpp index faa5d69306..a92f5779d9 100644 --- a/include/picongpu/plugins/PhaseSpace/PhaseSpace.tpp +++ b/include/picongpu/plugins/PhaseSpace/PhaseSpace.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten * * This file is part of PIConGPU. * @@ -20,7 +20,7 @@ #pragma once #include "PhaseSpace.hpp" -#include "DumpHBufferSplashP.hpp" +#include "DumpHBufferOpenPMD.hpp" #include #include @@ -43,58 +43,52 @@ namespace picongpu { template PhaseSpace::PhaseSpace( - std::shared_ptr< plugins::multi::IHelp > & help, + std::shared_ptr& help, size_t const id, - MappingDesc* cellDescription - ) : - m_help( std::static_pointer_cast< Help >(help) ), - m_id( id ), - m_cellDescription( cellDescription ) + MappingDesc* cellDescription) + : m_help(std::static_pointer_cast(help)) + , m_id(id) + , m_cellDescription(cellDescription) { // unit is m_species c (for a single "real" particle) - float_X pRangeSingle_unit( - frame::getMass< typename Species::FrameType >() * - SPEED_OF_LIGHT - ); + float_X pRangeSingle_unit(frame::getMass() * SPEED_OF_LIGHT); - axis_p_range.first = m_help->momentum_range_min.get( id ) * pRangeSingle_unit; - axis_p_range.second = m_help->momentum_range_max.get( id ) * pRangeSingle_unit ; + axis_p_range.first = m_help->momentum_range_min.get(id) * pRangeSingle_unit; + axis_p_range.second = m_help->momentum_range_max.get(id) * pRangeSingle_unit; /* String to Enum conversion */ uint32_t el_space; - if( m_help->element_space.get( id ) == "x" ) - el_space = AxisDescription::x; - else if( m_help->element_space.get( id ) == "y" ) - el_space = AxisDescription::y; - else if( m_help->element_space.get( id ) == "z" ) - el_space = AxisDescription::z; + if(m_help->element_space.get(id) == "x") + el_space = AxisDescription::x; + else if(m_help->element_space.get(id) == "y") + el_space = AxisDescription::y; + else if(m_help->element_space.get(id) == "z") + el_space = AxisDescription::z; else - throw PluginException("[Plugin] [" + m_help->getOptionPrefix() + "] space must be x, y or z" ); + throw PluginException("[Plugin] [" + m_help->getOptionPrefix() + "] space must be x, y or z"); uint32_t el_momentum = AxisDescription::px; - if( m_help->element_momentum.get( id ) == "px" ) - el_momentum = AxisDescription::px; - else if( m_help->element_momentum.get( id ) == "py" ) - el_momentum = AxisDescription::py; - else if( m_help->element_momentum.get( id ) == "pz" ) - el_momentum = AxisDescription::pz; + if(m_help->element_momentum.get(id) == "px") + el_momentum = AxisDescription::px; + else if(m_help->element_momentum.get(id) == "py") + el_momentum = AxisDescription::py; + else if(m_help->element_momentum.get(id) == "pz") + el_momentum = AxisDescription::pz; else - throw PluginException("[Plugin] [" + m_help->getOptionPrefix() + "] momentum must be px, py or pz" ); + throw PluginException("[Plugin] [" + m_help->getOptionPrefix() + "] momentum must be px, py or pz"); axis_element.momentum = el_momentum; axis_element.space = el_space; bool activatePlugin = true; - if( simDim == DIM2 && el_space == AxisDescription::z ) + if(simDim == DIM2 && el_space == AxisDescription::z) { std::cerr << "[Plugin] [" + m_help->getOptionPrefix() + "] Skip requested output for " - << m_help->element_space.get( id ) - << m_help->element_momentum.get( id ) - << std::endl; + << m_help->element_space.get(id) << m_help->element_momentum.get(id) << std::endl; activatePlugin = false; } - if( activatePlugin ) + if(activatePlugin) { /** create dir */ Environment::get().Filesystem().createDirectoryWithPermissions("phaseSpace"); @@ -102,10 +96,9 @@ namespace picongpu const uint32_t r_element = axis_element.space; /* CORE + BORDER + GUARD elements for spatial bins */ - this->r_bins = SuperCellSize().toRT()[r_element] - * this->m_cellDescription->getGridSuperCells()[r_element]; + this->r_bins = SuperCellSize().toRT()[r_element] * this->m_cellDescription->getGridSuperCells()[r_element]; - this->dBuffer = new container::DeviceBuffer( this->num_pbins, r_bins ); + this->dBuffer = new container::DeviceBuffer(this->num_pbins, r_bins); /* reduce-add phase space from other GPUs in range [p0;p1]x[r;r+dr] * to "lowest" node in range @@ -121,7 +114,7 @@ namespace picongpu pmacc::math::Size_t sizeTransversalPlane(gpuDim); sizeTransversalPlane[this->axis_element.space] = 1; - for( int planePos = 0; planePos <= (int)gpuDim[this->axis_element.space]; ++planePos ) + for(int planePos = 0; planePos <= (int) gpuDim[this->axis_element.space]; ++planePos) { /* my plane means: the offset for the transversal plane to my r_element * should be zero @@ -129,67 +122,64 @@ namespace picongpu pmacc::math::Int longOffset(pmacc::math::Int::create(0)); longOffset[this->axis_element.space] = planePos; - zone::SphericZone zoneTransversalPlane( sizeTransversalPlane, longOffset ); + zone::SphericZone zoneTransversalPlane(sizeTransversalPlane, longOffset); /* Am I the lowest GPU in my plane? */ bool isGroupRoot = false; - bool isInGroup = ( gpuPos[this->axis_element.space] == planePos ); - if( isInGroup ) + bool isInGroup = (gpuPos[this->axis_element.space] == planePos); + if(isInGroup) { pmacc::math::Int inPlaneGPU(gpuPos); inPlaneGPU[this->axis_element.space] = 0; - if( inPlaneGPU == pmacc::math::Int::create(0) ) + if(inPlaneGPU == pmacc::math::Int::create(0)) isGroupRoot = true; } - algorithm::mpi::Reduce* createReduce = - new algorithm::mpi::Reduce( zoneTransversalPlane, - isGroupRoot ); - if( isInGroup ) + algorithm::mpi::Reduce* createReduce + = new algorithm::mpi::Reduce(zoneTransversalPlane, isGroupRoot); + if(isInGroup) { this->planeReduce = createReduce; this->isPlaneReduceRoot = isGroupRoot; } else - __delete( createReduce ); + __delete(createReduce); } /* Create communicator with ranks of each plane reduce root */ { /* Array with root ranks of the planeReduce operations */ - std::vector planeReduceRootRanks( gc.getGlobalSize(), -1 ); + std::vector planeReduceRootRanks(gc.getGlobalSize(), -1); /* Am I one of the planeReduce root ranks? my global rank : -1 */ - int myRootRank = gc.getGlobalRank() * this->isPlaneReduceRoot - - ( ! this->isPlaneReduceRoot ); + int myRootRank = gc.getGlobalRank() * this->isPlaneReduceRoot - (!this->isPlaneReduceRoot); // avoid deadlock between not finished pmacc tasks and mpi blocking collectives __getTransactionEvent().waitForFinished(); MPI_Group world_group, new_group; - MPI_CHECK(MPI_Allgather( &myRootRank, 1, MPI_INT, - &(planeReduceRootRanks.front()), - 1, - MPI_INT, - MPI_COMM_WORLD )); + MPI_CHECK(MPI_Allgather( + &myRootRank, + 1, + MPI_INT, + &(planeReduceRootRanks.front()), + 1, + MPI_INT, + MPI_COMM_WORLD)); /* remove all non-roots (-1 values) */ - std::sort( planeReduceRootRanks.begin(), planeReduceRootRanks.end() ); - std::vector ranks( std::lower_bound( planeReduceRootRanks.begin(), - planeReduceRootRanks.end(), - 0 ), - planeReduceRootRanks.end() ); - - MPI_CHECK(MPI_Comm_group( MPI_COMM_WORLD, &world_group )); - MPI_CHECK(MPI_Group_incl( world_group, ranks.size(), ranks.data(), &new_group )); - MPI_CHECK(MPI_Comm_create( MPI_COMM_WORLD, new_group, &commFileWriter )); - MPI_CHECK(MPI_Group_free( &new_group )); - MPI_CHECK(MPI_Group_free( &world_group )); + std::sort(planeReduceRootRanks.begin(), planeReduceRootRanks.end()); + std::vector ranks( + std::lower_bound(planeReduceRootRanks.begin(), planeReduceRootRanks.end(), 0), + planeReduceRootRanks.end()); + + MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &world_group)); + MPI_CHECK(MPI_Group_incl(world_group, ranks.size(), ranks.data(), &new_group)); + MPI_CHECK(MPI_Comm_create(MPI_COMM_WORLD, new_group, &commFileWriter)); + MPI_CHECK(MPI_Group_free(&new_group)); + MPI_CHECK(MPI_Group_free(&world_group)); } // set how often the plugin should be executed while PIConGPU is running - Environment<>::get( ).PluginConnector( ).setNotificationPeriod( - this, - m_help->notifyPeriod.get(id) - ); + Environment<>::get().PluginConnector().setNotificationPeriod(this, m_help->notifyPeriod.get(id)); } } @@ -197,41 +187,41 @@ namespace picongpu template PhaseSpace::~PhaseSpace() { - __delete( this->dBuffer ); - __delete( planeReduce ); + __delete(this->dBuffer); + __delete(planeReduce); - if( commFileWriter != MPI_COMM_NULL ) + if(commFileWriter != MPI_COMM_NULL) { // avoid deadlock between not finished pmacc tasks and mpi blocking collectives __getTransactionEvent().waitForFinished(); - MPI_CHECK_NO_EXCEPT(MPI_Comm_free( &commFileWriter )); + MPI_CHECK_NO_EXCEPT(MPI_Comm_free(&commFileWriter)); } } - template + template template - void PhaseSpace::calcPhaseSpace( const uint32_t currentStep ) + void PhaseSpace::calcPhaseSpace(const uint32_t currentStep) { const pmacc::math::Int guardCells = SuperCellSize().toRT() * GuardSize::toRT(); - const pmacc::math::Size_t coreBorderSuperCells( this->m_cellDescription->getGridSuperCells() - 2 * GuardSize::toRT() ); - const pmacc::math::Size_t coreBorderCells = coreBorderSuperCells * - precisionCast( SuperCellSize().toRT() ); + const pmacc::math::Size_t coreBorderSuperCells( + this->m_cellDescription->getGridSuperCells() - 2 * GuardSize::toRT()); + const pmacc::math::Size_t coreBorderCells + = coreBorderSuperCells * precisionCast(SuperCellSize().toRT()); /* register particle species observer */ - DataConnector &dc = Environment<>::get().DataConnector(); - auto particles = dc.get< Species >( Species::FrameType::getName(), true ); + DataConnector& dc = Environment<>::get().DataConnector(); + auto particles = dc.get(Species::FrameType::getName(), true); /* select CORE + BORDER for all cells * CORE + BORDER is contiguous, in cuSTL we call this a "topological spheric zone" */ - zone::SphericZone zoneCoreBorder( coreBorderCells, guardCells ); + zone::SphericZone zoneCoreBorder(coreBorderCells, guardCells); - StartBlockFunctor< r_dir > startBlockFunctor( + StartBlockFunctor startBlockFunctor( particles->getDeviceParticlesBox(), dBuffer->origin(), this->axis_element.momentum, - this->axis_p_range - ); + this->axis_p_range); auto bindFunctor = std::bind( startBlockFunctor, @@ -240,39 +230,34 @@ namespace picongpu // area to work on zoneCoreBorder, // data below - passed to functor operator() - cursor::make_MultiIndexCursor() - ); - - meta::ForEach< - typename Help::EligibleFilters, - plugins::misc::ExecuteIfNameIsEqual< bmpl::_1 > - >{ }( - m_help->filter.get( m_id ), + cursor::make_MultiIndexCursor()); + + meta::ForEach>{}( + m_help->filter.get(m_id), currentStep, - bindFunctor - ); + bindFunctor); - dc.releaseData( Species::FrameType::getName() ); + dc.releaseData(Species::FrameType::getName()); } template - void PhaseSpace::notify( uint32_t currentStep ) + void PhaseSpace::notify(uint32_t currentStep) { /* reset device buffer */ - this->dBuffer->assign( float_PS(0.0) ); + this->dBuffer->assign(float_PS(0.0)); /* calculate local phase space */ - if( this->axis_element.space == AxisDescription::x ) - calcPhaseSpace( currentStep ); - else if( this->axis_element.space == AxisDescription::y ) - calcPhaseSpace( currentStep ); -#if(SIMDIM==DIM3) + if(this->axis_element.space == AxisDescription::x) + calcPhaseSpace(currentStep); + else if(this->axis_element.space == AxisDescription::y) + calcPhaseSpace(currentStep); +#if(SIMDIM == DIM3) else - calcPhaseSpace( currentStep ); + calcPhaseSpace(currentStep); #endif /* transfer to host */ - container::HostBuffer hBuffer( this->dBuffer->size() ); + container::HostBuffer hBuffer(this->dBuffer->size()); hBuffer = *this->dBuffer; /* reduce-add phase space from other GPUs in range [p0;p1]x[r;r+dr] @@ -281,17 +266,17 @@ namespace picongpu * spatial y and z direction to node with * lowest y and z position and same x range */ - container::HostBuffer hReducedBuffer( hBuffer.size() ); - hReducedBuffer.assign( float_PS(0.0) ); + container::HostBuffer hReducedBuffer(hBuffer.size()); + hReducedBuffer.assign(float_PS(0.0)); - planeReduce->template operator()( /* parameters: dest, source */ - hReducedBuffer, - hBuffer, - /* the functors return value will be written to dst */ - pmacc::algorithm::functor::Add() ); + planeReduce->template operator()(/* parameters: dest, source */ + hReducedBuffer, + hBuffer, + /* the functors return value will be written to dst */ + pmacc::algorithm::functor::Add()); /** all non-reduce-root processes are done now */ - if( !this->isPlaneReduceRoot ) + if(!this->isPlaneReduceRoot) return; /** \todo communicate GUARD and add it to the two neighbors BORDER */ @@ -306,17 +291,22 @@ namespace picongpu * on the p-axis should be scaled to represent single/real particles. * \see PhaseSpaceMulti::pluginLoad( ) */ - float_64 const pRange_unit = - UNIT_MASS * - UNIT_SPEED; + float_64 const pRange_unit = UNIT_MASS * UNIT_SPEED; DumpHBuffer dumpHBuffer; - if( this->commFileWriter != MPI_COMM_NULL ) - dumpHBuffer( hReducedBuffer, this->axis_element, - this->axis_p_range, pRange_unit, - unit, Species::FrameType::getName() + "_" + m_help->filter.get( m_id ), - currentStep, this->commFileWriter ); + if(this->commFileWriter != MPI_COMM_NULL) + dumpHBuffer( + hReducedBuffer, + this->axis_element, + this->axis_p_range, + pRange_unit, + unit, + Species::FrameType::getName() + "_" + m_help->filter.get(m_id), + m_help->file_name_extension.get(m_id), + m_help->json_config.get(m_id), + currentStep, + this->commFileWriter); } } /* namespace picongpu */ diff --git a/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp b/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp index 15f0508014..be7e5c9368 100644 --- a/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp +++ b/include/picongpu/plugins/PhaseSpace/PhaseSpaceFunctors.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Richard Pausch, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Richard Pausch, Rene Widera * * This file is part of PIConGPU. * @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include @@ -45,11 +45,10 @@ namespace picongpu { typedef void result_type; - template< typename T_Acc > - DINLINE void - operator()( const T_Acc& acc, Type& dest, const Type src ) const + template + DINLINE void operator()(const T_Acc& acc, Type& dest, const Type src) const { - atomicAdd( &dest, src, ::alpaka::hierarchy::Blocks{} ); + cupla::atomicAdd(acc, &dest, src, ::alpaka::hierarchy::Blocks{}); } }; @@ -77,14 +76,14 @@ namespace picongpu * \param el_p coordinate of the momentum \see PhaseSpace::axis_element \see AxisDescription * \param axis_p_range range of the momentum coordinate \see PhaseSpace::axis_p_range */ - template - DINLINE void - operator()( const T_Acc & acc, + template + DINLINE void operator()( + const T_Acc& acc, FramePtr frame, uint16_t particleID, cursor::CT::BufferCursor curDBufferOriginInBlock, const uint32_t el_p, - const std::pair& axis_p_range ) + const std::pair& axis_p_range) { auto particle = frame[particleID]; /** \todo this can become a functor to be even more flexible */ @@ -92,31 +91,29 @@ namespace picongpu /* cell id in this block */ const int linearCellIdx = particle[localCellIdx_]; - const pmacc::math::UInt32 cellIdx( - pmacc::math::MapToPos()( SuperCellSize(), linearCellIdx ) ); + const pmacc::math::UInt32 cellIdx(pmacc::math::MapToPos()(SuperCellSize(), linearCellIdx)); - const uint32_t r_bin = cellIdx[r_dir]; + const uint32_t r_bin = cellIdx[r_dir]; const float_X weighting = particle[weighting_]; - const float_X charge = attribute::getCharge( weighting,particle ); - const float_PS particleChargeDensity = - precisionCast( charge / CELL_VOLUME ); + const float_X charge = attribute::getCharge(weighting, particle); + const float_PS particleChargeDensity = precisionCast(charge / CELL_VOLUME); - const float_X rel_bin = (mom_i / weighting - axis_p_range.first) - / (axis_p_range.second - axis_p_range.first); - int p_bin = int( rel_bin * float_X(num_pbins) ); + const float_X rel_bin + = (mom_i / weighting - axis_p_range.first) / (axis_p_range.second - axis_p_range.first); + int p_bin = int(rel_bin * float_X(num_pbins)); /* out-of-range bins back to min/max */ - if( p_bin < 0 ) + if(p_bin < 0) p_bin = 0; - if( p_bin >= num_pbins ) + if(p_bin >= num_pbins) p_bin = num_pbins - 1; /** \todo take particle shape into account */ - atomicAdd( - &(*curDBufferOriginInBlock( p_bin, r_bin )), + cupla::atomicAdd( + acc, + &(*curDBufferOriginInBlock(p_bin, r_bin)), particleChargeDensity, - ::alpaka::hierarchy::Threads{} - ); + ::alpaka::hierarchy::Threads{}); } }; @@ -141,8 +138,7 @@ namespace picongpu uint32_t num_pbins, uint32_t r_dir, typename T_Filter, - uint32_t T_numWorkers - > + uint32_t T_numWorkers> struct FunctorBlock { typedef void result_type; @@ -169,11 +165,14 @@ namespace picongpu cursor::BufferCursor cur, const uint32_t p_dir, const std::pair& p_range, - const T_Filter & parFilter - ) : - particlesBox(pb), curOriginPhaseSpace(cur), p_element(p_dir), - axis_p_range(p_range), particleFilter(parFilter) - {} + const T_Filter& parFilter) + : particlesBox(pb) + , curOriginPhaseSpace(cur) + , p_element(p_dir) + , axis_p_range(p_range) + , particleFilter(parFilter) + { + } /** Called for the first cell of each block #-of-cells-in-block times * @@ -181,12 +180,11 @@ namespace picongpu * the current block starts * \see cuSTL/algorithm/kernel/Foreach.hpp */ - template< typename T_Acc > - DINLINE void - operator()( const T_Acc& acc, const pmacc::math::Int& indexBlockOffset ) + template + DINLINE void operator()(const T_Acc& acc, const pmacc::math::Int& indexBlockOffset) { constexpr uint32_t numWorkers = T_numWorkers; - const uint32_t workerIdx = threadIdx.x; + const uint32_t workerIdx = cupla::threadIdx(acc).x; /** \todo write math::Vector constructor that supports dim3 */ const pmacc::math::Int indexGlobal = indexBlockOffset; @@ -194,24 +192,20 @@ namespace picongpu /* create shared mem */ const int blockCellsInDir = SuperCellSize::template at::type::value; typedef typename pmacc::math::CT::Int dBufferSizeInBlock; - container::CT::SharedBuffer dBufferInBlock( acc ); + container::CT::SharedBuffer dBufferInBlock(acc); /* init shared mem */ - pmacc::algorithm::cudaBlock::Foreach< - pmacc::math::CT::Int< numWorkers > - > forEachThreadInBlock(workerIdx); - forEachThreadInBlock( acc, - dBufferInBlock.zone(), - dBufferInBlock.origin(), - pmacc::algorithm::functor::AssignValue(0.0) ); - __syncthreads(); + pmacc::algorithm::cuplaBlock::Foreach> forEachThreadInBlock(workerIdx); + forEachThreadInBlock( + acc, + dBufferInBlock.zone(), + dBufferInBlock.origin(), + pmacc::algorithm::functor::AssignValue(0.0)); + cupla::__syncthreads(acc); FunctorParticle functorParticle; - particleAccess::Cell2Particle< - SuperCellSize, - numWorkers - > forEachParticleInCell; + particleAccess::Cell2Particle forEachParticleInCell; forEachParticleInCell( acc, /* mandatory params */ @@ -223,20 +217,20 @@ namespace picongpu /* optional params */ dBufferInBlock.origin(), p_element, - axis_p_range - ); + axis_p_range); - __syncthreads(); + cupla::__syncthreads(acc); /* add to global dBuffer */ - forEachThreadInBlock( acc, - /* area to work on */ - dBufferInBlock.zone(), - /* data below - cursors will be shifted and - * dereferenced */ - curOriginPhaseSpace(0, indexBlockOffset[r_dir]), - dBufferInBlock.origin(), - /* functor */ - FunctorAtomicAdd() ); + forEachThreadInBlock( + acc, + /* area to work on */ + dBufferInBlock.zone(), + /* data below - cursors will be shifted and + * dereferenced */ + curOriginPhaseSpace(0, indexBlockOffset[r_dir]), + dBufferInBlock.origin(), + /* functor */ + FunctorAtomicAdd()); } }; diff --git a/include/picongpu/plugins/PluginController.hpp b/include/picongpu/plugins/PluginController.hpp index 9162b5b77e..865b069c9e 100644 --- a/include/picongpu/plugins/PluginController.hpp +++ b/include/picongpu/plugins/PluginController.hpp @@ -1,6 +1,7 @@ -/* Copyright 2013-2020 Axel Huebl, Benjamin Schneider, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Benjamin Schneider, Felix Schmitt, * Heiko Burau, Rene Widera, Richard Pausch, - * Benjamin Worpitz, Erik Zenker, Finn-Ole Carstens + * Benjamin Worpitz, Erik Zenker, Finn-Ole Carstens, + * Franz Poeschel * * This file is part of PIConGPU. * @@ -41,34 +42,39 @@ */ #include "picongpu/plugins/PngPlugin.hpp" -#if (ENABLE_ADIOS == 1) -# include "picongpu/plugins/adios/ADIOSWriter.hpp" +#if(ENABLE_ADIOS == 1) +# include "picongpu/plugins/adios/ADIOSWriter.hpp" #endif -#if( PMACC_CUDA_ENABLED == 1 ) -# include "picongpu/plugins/PositionsParticles.hpp" -# include "picongpu/plugins/ChargeConservation.hpp" -# include "picongpu/plugins/particleMerging/ParticleMerger.hpp" -# if(ENABLE_HDF5 == 1) -# include "picongpu/plugins/makroParticleCounter/PerSuperCell.hpp" -# endif - -# include "picongpu/plugins/SliceFieldPrinterMulti.hpp" -# if(SIMDIM==DIM3) -# include "picongpu/plugins/IntensityPlugin.hpp" -# endif +#if(ENABLE_OPENPMD == 1) +# include "picongpu/plugins/openPMD/openPMDWriter.hpp" +# include "picongpu/plugins/PhaseSpace/PhaseSpace.hpp" +# include "picongpu/plugins/xrayScattering/XrayScattering.hpp" #endif -#if (ENABLE_ISAAC == 1) && (SIMDIM==DIM3) +#if(PMACC_CUDA_ENABLED == 1) +# include "picongpu/plugins/PositionsParticles.hpp" +# include "picongpu/plugins/ChargeConservation.hpp" +# include "picongpu/plugins/particleMerging/ParticleMerger.hpp" +# include "picongpu/plugins/randomizedParticleMerger/RandomizedParticleMerger.hpp" +# if(ENABLE_HDF5 == 1) +# include "picongpu/plugins/makroParticleCounter/PerSuperCell.hpp" +# endif + +# include "picongpu/plugins/SliceFieldPrinterMulti.hpp" +# if(SIMDIM == DIM3) +# include "picongpu/plugins/IntensityPlugin.hpp" +# endif +#endif + +#if(ENABLE_ISAAC == 1) && (SIMDIM == DIM3) # include "picongpu/plugins/IsaacPlugin.hpp" #endif -#if (ENABLE_HDF5 == 1) -# include "picongpu/plugins/PhaseSpace/PhaseSpace.hpp" -# include "picongpu/plugins/particleCalorimeter/ParticleCalorimeter.hpp" -# include "picongpu/plugins/radiation/VectorTypes.hpp" -# include "picongpu/plugins/radiation/Radiation.hpp" -# include "picongpu/plugins/hdf5/HDF5Writer.hpp" +#if(ENABLE_HDF5 == 1) +# include "picongpu/plugins/particleCalorimeter/ParticleCalorimeter.hpp" +# include "picongpu/plugins/radiation/VectorTypes.hpp" +# include "picongpu/plugins/radiation/Radiation.hpp" #endif #include "picongpu/plugins/Checkpoint.hpp" @@ -85,234 +91,206 @@ namespace picongpu { + using namespace pmacc; -using namespace pmacc; - -/** - * Plugin management controller for user-level plugins. - */ -class PluginController : public ILightweightPlugin -{ -private: - - std::list plugins; - - template - struct PushBack + /** + * Plugin management controller for user-level plugins. + */ + class PluginController : public ILightweightPlugin { + private: + std::list plugins; - template - void operator()(T& list) - { - list.push_back(new T_Type()); - } - }; - - struct TupleSpeciesPlugin - { - enum Names + template + struct PushBack { - species = 0, - plugin = 1 + template + void operator()(T& list) + { + list.push_back(new T_Type()); + } }; - /** apply the 1st vector component to the 2nd - * - * @tparam T_TupleVector vector of type - * pmacc::math::CT::vector< Species, Plugin > - * with two components - */ - template< typename T_TupleVector > - struct Apply : - bmpl::apply1< - typename pmacc::math::CT::At< - T_TupleVector, - bmpl::int_< plugin > - >::type, - typename pmacc::math::CT::At< - T_TupleVector, - bmpl::int_< species > - >::type - > + struct TupleSpeciesPlugin { + enum Names + { + species = 0, + plugin = 1 + }; + + /** apply the 1st vector component to the 2nd + * + * @tparam T_TupleVector vector of type + * pmacc::math::CT::vector< Species, Plugin > + * with two components + */ + template + struct Apply + : bmpl::apply1< + typename pmacc::math::CT::At>::type, + typename pmacc::math::CT::At>::type> + { + }; + + /** Check the combination Species+Plugin in the Tuple + * + * @tparam T_TupleVector with Species, Plugin + */ + template + struct IsEligible + { + using Species = typename pmacc::math::CT::At>::type; + using Solver = typename pmacc::math::CT::At>::type; + + using type = typename particles::traits::SpeciesEligibleForSolver::type; + }; }; - /** Check the combination Species+Plugin in the Tuple - * - * @tparam T_TupleVector with Species, Plugin - */ - template< typename T_TupleVector > - struct IsEligible - { - using Species = typename pmacc::math::CT::At< - T_TupleVector, - bmpl::int_< species > - >::type; - using Solver = typename pmacc::math::CT::At< - T_TupleVector, - bmpl::int_< plugin > - >::type; - - using type = typename particles::traits::SpeciesEligibleForSolver< - Species, - Solver - >::type; - }; - }; + /* define stand alone plugins */ + using StandAlonePlugins = bmpl::vector< + Checkpoint, + EnergyFields +#if(ENABLE_ADIOS == 1) + , + plugins::multi::Master +#endif - /* define stand alone plugins */ - using StandAlonePlugins = bmpl::vector< - Checkpoint, - EnergyFields -#if (ENABLE_ADIOS == 1) - , plugins::multi::Master< adios::ADIOSWriter > +#if(ENABLE_OPENPMD == 1) + , + plugins::multi::Master #endif -#if( PMACC_CUDA_ENABLED == 1 ) - , SumCurrents - , ChargeConservation -# if(SIMDIM==DIM3) - , IntensityPlugin -# endif +#if(PMACC_CUDA_ENABLED == 1) + , + SumCurrents, + ChargeConservation +# if(SIMDIM == DIM3) + , + IntensityPlugin +# endif #endif -#if (ENABLE_ISAAC == 1) && (SIMDIM==DIM3) - , isaacP::IsaacPlugin +#if(ENABLE_ISAAC == 1) && (SIMDIM == DIM3) + , + isaacP::IsaacPlugin #endif + , + ResourceLog>; + -#if (ENABLE_HDF5 == 1) - , plugins::multi::Master< hdf5::HDF5Writer > + /* define field plugins */ + using UnspecializedFieldPlugins = bmpl::vector< +#if(PMACC_CUDA_ENABLED == 1) + SliceFieldPrinterMulti #endif - , ResourceLog - >; + >; + + using AllFields = bmpl::vector; + using CombinedUnspecializedFieldPlugins = + typename AllCombinations>::type; - /* define field plugins */ - using UnspecializedFieldPlugins = bmpl::vector< -#if( PMACC_CUDA_ENABLED == 1 ) - SliceFieldPrinterMulti< bmpl::_1 > + using FieldPlugins = typename bmpl:: + transform>::type; + + + /* define species plugins */ + using UnspecializedSpeciesPlugins = bmpl::vector< + plugins::multi::Master>, + plugins::multi::Master>, + plugins::multi::Master>, + CountParticles, + PngPlugin>, + plugins::transitionRadiation::TransitionRadiation +#if(ENABLE_OPENPMD == 1) + , + plugins::xrayScattering::XrayScattering #endif - >; - - using AllFields = bmpl::vector< FieldB, FieldE, FieldJ >; - - using CombinedUnspecializedFieldPlugins = typename AllCombinations< - bmpl::vector< - AllFields, - UnspecializedFieldPlugins - > - >::type; - - using FieldPlugins = typename bmpl::transform< - CombinedUnspecializedFieldPlugins, - typename TupleSpeciesPlugin::Apply< bmpl::_1 > - >::type; - - - /* define species plugins */ - using UnspecializedSpeciesPlugins = bmpl::vector < - plugins::multi::Master< EnergyParticles >, - plugins::multi::Master< CalcEmittance >, - plugins::multi::Master< BinEnergyParticles >, - CountParticles, - PngPlugin< Visualisation >, - plugins::transitionRadiation::TransitionRadiation -#if(ENABLE_HDF5 == 1) - , plugins::radiation::Radiation - , plugins::multi::Master< ParticleCalorimeter > - , plugins::multi::Master< PhaseSpace > +#if(ENABLE_HDF5 * ENABLE_OPENPMD == 1) + , + plugins::radiation::Radiation, + plugins::multi::Master> #endif -#if( PMACC_CUDA_ENABLED == 1 ) - , PositionsParticles - , plugins::particleMerging::ParticleMerger -# if(ENABLE_HDF5 == 1) - , PerSuperCell -# endif +#if(ENABLE_OPENPMD == 1) + , + plugins::multi::Master> #endif - >; - - using CombinedUnspecializedSpeciesPlugins = typename AllCombinations< - bmpl::vector< - VectorAllSpecies, - UnspecializedSpeciesPlugins - > - >::type; - - using CombinedUnspecializedSpeciesPluginsEligible = typename bmpl::copy_if< - CombinedUnspecializedSpeciesPlugins, - typename TupleSpeciesPlugin::IsEligible< bmpl::_1 > - >::type; - - using SpeciesPlugins = typename bmpl::transform< - CombinedUnspecializedSpeciesPluginsEligible, - typename TupleSpeciesPlugin::Apply< bmpl::_1 > - >::type; - - /* create sequence with all fully specialized plugins */ - using AllPlugins = MakeSeq_t< - StandAlonePlugins, - FieldPlugins, - SpeciesPlugins - >; +#if(PMACC_CUDA_ENABLED == 1) + , + PositionsParticles, + plugins::particleMerging::ParticleMerger, + plugins::randomizedParticleMerger::RandomizedParticleMerger +# if(ENABLE_HDF5 == 1) + , + PerSuperCell +# endif +#endif + >; - /** - * Initializes the controller by adding all user plugins to its internal list. - */ - virtual void init() - { - meta::ForEach > pushBack; - pushBack(plugins); - } + using CombinedUnspecializedSpeciesPlugins = + typename AllCombinations>::type; -public: + using CombinedUnspecializedSpeciesPluginsEligible = typename bmpl:: + copy_if>::type; - PluginController() - { - init(); - } + using SpeciesPlugins = typename bmpl:: + transform>::type; - virtual ~PluginController() - { + /* create sequence with all fully specialized plugins */ + using AllPlugins = MakeSeq_t; - } + /** + * Initializes the controller by adding all user plugins to its internal list. + */ + virtual void init() + { + meta::ForEach> pushBack; + pushBack(plugins); + } - void setMappingDescription(MappingDesc *cellDescription) - { - PMACC_ASSERT(cellDescription != nullptr); + public: + PluginController() + { + init(); + } - for (std::list::iterator iter = plugins.begin(); - iter != plugins.end(); - ++iter) + virtual ~PluginController() { - (*iter)->setMappingDescription(cellDescription); } - } - virtual void pluginRegisterHelp(po::options_description&) - { - // no help required at the moment - } + void setMappingDescription(MappingDesc* cellDescription) + { + PMACC_ASSERT(cellDescription != nullptr); - std::string pluginGetName() const - { - return "PluginController"; - } + for(std::list::iterator iter = plugins.begin(); iter != plugins.end(); ++iter) + { + (*iter)->setMappingDescription(cellDescription); + } + } - void notify(uint32_t) - { + virtual void pluginRegisterHelp(po::options_description&) + { + // no help required at the moment + } - } + std::string pluginGetName() const + { + return "PluginController"; + } - virtual void pluginUnload() - { - for (std::list::iterator iter = plugins.begin(); - iter != plugins.end(); - ++iter) + void notify(uint32_t) + { + } + + virtual void pluginUnload() { - __delete(*iter); + for(std::list::iterator iter = plugins.begin(); iter != plugins.end(); ++iter) + { + __delete(*iter); + } + plugins.clear(); } - plugins.clear(); - } -}; + }; -} +} // namespace picongpu diff --git a/include/picongpu/plugins/PngPlugin.hpp b/include/picongpu/plugins/PngPlugin.hpp index 51a2b3c1d5..aaafec8ae9 100644 --- a/include/picongpu/plugins/PngPlugin.hpp +++ b/include/picongpu/plugins/PngPlugin.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, * Richard Pausch * * This file is part of PIConGPU. @@ -48,21 +48,19 @@ namespace picongpu class PngPlugin : public ILightweightPlugin { public: - typedef VisClass VisType; typedef std::list VisPointerList; - PngPlugin() : - pluginName("PngPlugin: create png's of a species and fields"), - pluginPrefix(VisType::FrameType::getName() + "_" + VisClass::CreatorType::getName()), - cellDescription(nullptr) + PngPlugin() + : pluginName("PngPlugin: create png's of a species and fields") + , pluginPrefix(VisType::FrameType::getName() + "_" + VisClass::CreatorType::getName()) + , cellDescription(nullptr) { Environment<>::get().PluginConnector().registerPlugin(this); } virtual ~PngPlugin() { - } std::string pluginGetName() const @@ -72,50 +70,54 @@ namespace picongpu void pluginRegisterHelp(po::options_description& desc) { -#if( PIC_ENABLE_PNG == 1 ) - desc.add_options() - ((pluginPrefix + ".period").c_str(), po::value > (¬ifyPeriod)->multitoken(), "enable data output [for each n-th step]") - ((pluginPrefix + ".axis").c_str(), po::value > (&axis)->multitoken(), "axis which are shown [valid values x,y,z] example: yz") - ((pluginPrefix + ".slicePoint").c_str(), po::value > (&slicePoints)->multitoken(), "value range: 0 <= x <= 1 , point of the slice") - ((pluginPrefix + ".folder").c_str(), po::value > (&folders)->multitoken(), "folder for output files"); +#if(PIC_ENABLE_PNG == 1) + desc.add_options()( + (pluginPrefix + ".period").c_str(), + po::value>(¬ifyPeriod)->multitoken(), + "enable data output [for each n-th step]")( + (pluginPrefix + ".axis").c_str(), + po::value>(&axis)->multitoken(), + "axis which are shown [valid values x,y,z] example: yz")( + (pluginPrefix + ".slicePoint").c_str(), + po::value>(&slicePoints)->multitoken(), + "value range: 0 <= x <= 1 , point of the slice")( + (pluginPrefix + ".folder").c_str(), + po::value>(&folders)->multitoken(), + "folder for output files"); #else - desc.add_options() - ((pluginPrefix).c_str(), "plugin disabled [compiled without dependency PNGwriter]"); + desc.add_options()((pluginPrefix).c_str(), "plugin disabled [compiled without dependency PNGwriter]"); #endif } - void setMappingDescription(MappingDesc *cellDescription) + void setMappingDescription(MappingDesc* cellDescription) { this->cellDescription = cellDescription; } private: - void pluginLoad() { - - if (0 != notifyPeriod.size()) + if(0 != notifyPeriod.size()) { - if (0 != slicePoints.size() && - 0 != axis.size()) + if(0 != slicePoints.size() && 0 != axis.size()) { - for (int i = 0; i < (int) slicePoints.size(); ++i) /*!\todo: use vactor with max elements*/ + for(int i = 0; i < (int) slicePoints.size(); ++i) /*!\todo: use vactor with max elements*/ { std::string period = getValue(notifyPeriod, i); if(!period.empty()) { - - if (getValue(axis, i).length() == 2u) + if(getValue(axis, i).length() == 2u) { std::stringstream o_slicePoint; o_slicePoint << getValue(slicePoints, i); /*add default value for folder*/ - if (folders.empty()) + if(folders.empty()) { folders.push_back(std::string(".")); } - std::string filename(pluginPrefix + "_" + getValue(axis, i) + "_" + o_slicePoint.str()); + std::string filename( + pluginPrefix + "_" + getValue(axis, i) + "_" + o_slicePoint.str()); typename VisType::CreatorType pngCreator(filename, getValue(folders, i)); /** \todo rename me: transpose is the wrong name `swivel` is better * @@ -123,22 +125,26 @@ namespace picongpu * * example: transpose[2,1] means: use x and z from an other vector */ - DataSpace transpose( - charToAxisNumber(getValue(axis, i)[0]), - charToAxisNumber(getValue(axis, i)[1]) - ); + DataSpace transpose( + charToAxisNumber(getValue(axis, i)[0]), + charToAxisNumber(getValue(axis, i)[1])); /* if simulation run in 2D ignore all xz, yz slices (we had no z direction)*/ - const bool isAllowed2DSlice = (simDim == DIM3) || (transpose.x() != 2 && transpose.y() != 2); + const bool isAllowed2DSlice + = (simDim == DIM3) || (transpose.x() != 2 && transpose.y() != 2); const bool isSlidingWindowEnabled = MovingWindow::getInstance().isEnabled(); /* if sliding window is active we are not allowed to create pngs from xz slice * This means one dimension in transpose must contain 1 (y direction) */ - const bool isAllowedMovingWindowSlice = - !isSlidingWindowEnabled || - (transpose.x() == 1 || transpose.y() == 1); - if( isAllowed2DSlice && isAllowedMovingWindowSlice ) + const bool isAllowedMovingWindowSlice + = !isSlidingWindowEnabled || (transpose.x() == 1 || transpose.y() == 1); + if(isAllowed2DSlice && isAllowedMovingWindowSlice) { - VisType* tmp = new VisType(pluginName, pngCreator, period, transpose, getValue(slicePoints, i)); + VisType* tmp = new VisType( + pluginName, + pngCreator, + period, + transpose, + getValue(slicePoints, i)); visIO.push_back(tmp); tmp->setMappingDescription(cellDescription); tmp->init(); @@ -146,15 +152,19 @@ namespace picongpu else { if(!isAllowedMovingWindowSlice) - std::cerr << "[WARNING] You are running a simulation with moving window: png output along the axis "<< - getValue(axis, i) << " will be ignored" << std::endl; + std::cerr << "[WARNING] You are running a simulation with moving window: png " + "output along the axis " + << getValue(axis, i) << " will be ignored" << std::endl; if(!isAllowed2DSlice) - std::cerr << "[WARNING] You are running a 2D simulation: png output along the axis "<< - getValue(axis, i) << " will be ignored" << std::endl; + std::cerr + << "[WARNING] You are running a 2D simulation: png output along the axis " + << getValue(axis, i) << " will be ignored" << std::endl; } } else - throw std::runtime_error((std::string("[Png Plugin] wrong charecter count in axis: ") + getValue(axis, i)).c_str()); + throw std::runtime_error( + (std::string("[Png Plugin] wrong charecter count in axis: ") + getValue(axis, i)) + .c_str()); } } } @@ -167,9 +177,7 @@ namespace picongpu void pluginUnload() { - for (typename VisPointerList::iterator iter = visIO.begin(); - iter != visIO.end(); - ++iter) + for(typename VisPointerList::iterator iter = visIO.begin(); iter != visIO.end(); ++iter) { __delete(*iter); } @@ -187,9 +195,10 @@ namespace picongpu template typename Vec::value_type getValue(Vec vec, size_t id) { - if (vec.size() == 0) - throw std::runtime_error("[Png Plugin] getValue is used with a parameter set with no parameters (count is 0)"); - if (id >= vec.size()) + if(vec.size() == 0) + throw std::runtime_error( + "[Png Plugin] getValue is used with a parameter set with no parameters (count is 0)"); + if(id >= vec.size()) { return vec[vec.size() - 1]; } @@ -198,9 +207,9 @@ namespace picongpu int charToAxisNumber(char c) { - if (c == 'x') + if(c == 'x') return 0; - if (c == 'y') + if(c == 'y') return 1; return 2; } @@ -216,34 +225,21 @@ namespace picongpu VisPointerList visIO; MappingDesc* cellDescription; - }; -namespace particles -{ -namespace traits -{ - template< - typename T_Species, - typename T_VisClass - > - struct SpeciesEligibleForSolver< - T_Species, - PngPlugin< T_VisClass > - > + namespace particles { - using FrameType = typename T_Species::FrameType; + namespace traits + { + template + struct SpeciesEligibleForSolver> + { + using FrameType = typename T_Species::FrameType; - using RequiredIdentifiers = MakeSeq_t< - weighting - >; + using RequiredIdentifiers = MakeSeq_t; - using type = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - }; -} // namespace traits -} // namespace particles + using type = typename pmacc::traits::HasIdentifiers::type; + }; + } // namespace traits + } // namespace particles } // namespace picongpu - diff --git a/include/picongpu/plugins/PositionsParticles.hpp b/include/picongpu/plugins/PositionsParticles.hpp index f0201e19f6..b5778849d2 100644 --- a/include/picongpu/plugins/PositionsParticles.hpp +++ b/include/picongpu/plugins/PositionsParticles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Benjamin Worpitz, Richard Pausch * * This file is part of PIConGPU. @@ -39,295 +39,262 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -namespace po = boost::program_options; + namespace po = boost::program_options; -template -struct SglParticle -{ - FloatPos position; - float3_X momentum; - float_X mass; - float_X weighting; - float_X charge; - float_X gamma; - - SglParticle() : position(FloatPos::create(0.0)), momentum(float3_X::create(0.0)), mass(0.0), - weighting(0.0), charge(0.0), gamma(0.0) + template + struct SglParticle { - } + FloatPos position; + float3_X momentum; + float_X mass; + float_X weighting; + float_X charge; + float_X gamma; + + SglParticle() + : position(FloatPos::create(0.0)) + , momentum(float3_X::create(0.0)) + , mass(0.0) + , weighting(0.0) + , charge(0.0) + , gamma(0.0) + { + } - DataSpace globalCellOffset; + DataSpace globalCellOffset; - //! todo + //! todo - floatD_64 getGlobalCell() const - { - floatD_64 doubleGlobalCellOffset; - for(uint32_t i=0;i(position)); - } + return floatD_64(doubleGlobalCellOffset + precisionCast(position)); + } - template + template friend std::ostream& operator<<(std::ostream& out, const SglParticle& v) - { - floatD_64 pos; - for(uint32_t i=0;i(v.momentum.x()) * UNIT_MASS * UNIT_SPEED, - precisionCast(v.momentum.y()) * UNIT_MASS * UNIT_SPEED, - precisionCast(v.momentum.z()) * UNIT_MASS * UNIT_SPEED ); - - const float_64 mass = precisionCast(v.mass) * UNIT_MASS; - const float_64 charge = precisionCast(v.charge) * UNIT_CHARGE; - - using dbl = std::numeric_limits; - out.precision(dbl::digits10); - - out << std::scientific << pos << " " << mom << " " << mass << " " - << precisionCast(v.weighting) - << " " << charge << " " << precisionCast(v.gamma); - return out; - } -}; - -/** write the position of a single particle to a file - * \warning this plugin MUST NOT be used with more than one (global!) - * particle and is created for one-particle-test-purposes only - */ -struct KernelPositionsParticles -{ - template< - typename ParBox, - typename FloatPos, - typename Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - ParBox pb, - SglParticle* gParticle, - Mapping mapper - ) const - { + { + floatD_64 pos; + for(uint32_t i = 0; i < simDim; ++i) + pos[i] = (v.getGlobalCell()[i] * cellSize[i] * UNIT_LENGTH); - using FramePtr = typename ParBox::FramePtr; - PMACC_SMEM( acc, frame, FramePtr ); + const float3_64 mom( + precisionCast(v.momentum.x()) * UNIT_MASS * UNIT_SPEED, + precisionCast(v.momentum.y()) * UNIT_MASS * UNIT_SPEED, + precisionCast(v.momentum.z()) * UNIT_MASS * UNIT_SPEED); + const float_64 mass = precisionCast(v.mass) * UNIT_MASS; + const float_64 charge = precisionCast(v.charge) * UNIT_CHARGE; - using SuperCellSize = typename Mapping::SuperCellSize; + using dbl = std::numeric_limits; + out.precision(dbl::digits10); - const DataSpace threadIndex(threadIdx); - const int linearThreadIdx = DataSpaceOperations::template map (threadIndex); - const DataSpace superCellIdx(mapper.getSuperCellIndex(DataSpace (blockIdx))); + out << std::scientific << pos << " " << mom << " " << mass << " " << precisionCast(v.weighting) + << " " << charge << " " << precisionCast(v.gamma); + return out; + } + }; - if (linearThreadIdx == 0) + /** write the position of a single particle to a file + * \warning this plugin MUST NOT be used with more than one (global!) + * particle and is created for one-particle-test-purposes only + */ + struct KernelPositionsParticles + { + template + DINLINE void operator()(T_Acc const& acc, ParBox pb, SglParticle* gParticle, Mapping mapper) const { - frame = pb.getLastFrame(superCellIdx); - } + using FramePtr = typename ParBox::FramePtr; + PMACC_SMEM(acc, frame, FramePtr); - __syncthreads(); - if (!frame.isValid()) - return; //end kernel if we have no frames - /* BUGFIX to issue #538 - * volatile prohibits that the compiler creates wrong code*/ - volatile bool isParticle = frame[linearThreadIdx][multiMask_]; + using SuperCellSize = typename Mapping::SuperCellSize; - while (frame.isValid()) - { - if (isParticle) - { - auto particle = frame[linearThreadIdx]; - gParticle->position = particle[position_]; - gParticle->momentum = particle[momentum_]; - gParticle->weighting = particle[weighting_]; - gParticle->mass = attribute::getMass(gParticle->weighting,particle); - gParticle->charge = attribute::getCharge(gParticle->weighting,particle); - gParticle->gamma = Gamma<>()(gParticle->momentum, gParticle->mass); + const DataSpace threadIndex(cupla::threadIdx(acc)); + const int linearThreadIdx = DataSpaceOperations::template map(threadIndex); + const DataSpace superCellIdx(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); - // storage number in the actual frame - const lcellId_t frameCellNr = particle[localCellIdx_]; + if(linearThreadIdx == 0) + { + frame = pb.getLastFrame(superCellIdx); + } - // offset in the actual superCell = cell offset in the supercell - const DataSpace frameCellOffset(DataSpaceOperations::template map (frameCellNr)); + cupla::__syncthreads(acc); + if(!frame.isValid()) + return; // end kernel if we have no frames + /* BUGFIX to issue #538 + * volatile prohibits that the compiler creates wrong code*/ + volatile bool isParticle = frame[linearThreadIdx][multiMask_]; - gParticle->globalCellOffset = (superCellIdx - mapper.getGuardingSuperCells()) - * MappingDesc::SuperCellSize::toRT() - + frameCellOffset; - } - __syncthreads(); - if (linearThreadIdx == 0) + while(frame.isValid()) { - frame = pb.getPreviousFrame(frame); + if(isParticle) + { + auto particle = frame[linearThreadIdx]; + gParticle->position = particle[position_]; + gParticle->momentum = particle[momentum_]; + gParticle->weighting = particle[weighting_]; + gParticle->mass = attribute::getMass(gParticle->weighting, particle); + gParticle->charge = attribute::getCharge(gParticle->weighting, particle); + gParticle->gamma = Gamma<>()(gParticle->momentum, gParticle->mass); + + // storage number in the actual frame + const lcellId_t frameCellNr = particle[localCellIdx_]; + + // offset in the actual superCell = cell offset in the supercell + const DataSpace frameCellOffset( + DataSpaceOperations::template map(frameCellNr)); + + + gParticle->globalCellOffset + = (superCellIdx - mapper.getGuardingSuperCells()) * MappingDesc::SuperCellSize::toRT() + + frameCellOffset; + } + cupla::__syncthreads(acc); + if(linearThreadIdx == 0) + { + frame = pb.getPreviousFrame(frame); + } + isParticle = true; + cupla::__syncthreads(acc); } - isParticle = true; - __syncthreads(); } + }; - } -}; - -template -class PositionsParticles : public ILightweightPlugin -{ -private: - typedef MappingDesc::SuperCellSize SuperCellSize; - typedef floatD_X FloatPos; - - GridBuffer, DIM1> *gParticle; + template + class PositionsParticles : public ILightweightPlugin + { + private: + typedef MappingDesc::SuperCellSize SuperCellSize; + typedef floatD_X FloatPos; - MappingDesc *cellDescription; - std::string notifyPeriod; + GridBuffer, DIM1>* gParticle; - std::string pluginName; - std::string pluginPrefix; + MappingDesc* cellDescription; + std::string notifyPeriod; -public: + std::string pluginName; + std::string pluginPrefix; - PositionsParticles() : - pluginName("PositionsParticles: write position of one particle of a species to std::cout"), - pluginPrefix(ParticlesType::FrameType::getName() + std::string("_position")), - gParticle(nullptr), - cellDescription(nullptr) - { + public: + PositionsParticles() + : pluginName("PositionsParticles: write position of one particle of a species to std::cout") + , pluginPrefix(ParticlesType::FrameType::getName() + std::string("_position")) + , gParticle(nullptr) + , cellDescription(nullptr) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } - Environment<>::get().PluginConnector().registerPlugin(this); - } + virtual ~PositionsParticles() + { + } - virtual ~PositionsParticles() - { - } + void notify(uint32_t currentStep) + { + const int rank = Environment::get().GridController().getGlobalRank(); + const SglParticle positionParticle = getPositionsParticles(currentStep); + + /*FORMAT OUTPUT*/ + if(positionParticle.mass != float_X(0.0)) + std::cout << "[ANALYSIS] [" << rank << "] [COUNTER] [" << pluginPrefix << "] [" << currentStep << "] " + << std::setprecision(16) << float_64(currentStep) * SI::DELTA_T_SI << " " << positionParticle + << "\n"; // no flush + } - void notify(uint32_t currentStep) - { - const int rank = Environment::get().GridController().getGlobalRank(); - const SglParticle positionParticle = getPositionsParticles < CORE + BORDER > (currentStep); + void pluginRegisterHelp(po::options_description& desc) + { + desc.add_options()( + (pluginPrefix + ".period").c_str(), + po::value(¬ifyPeriod), + "enable plugin [for each n-th step]"); + } - /*FORMAT OUTPUT*/ - if (positionParticle.mass != float_X(0.0)) - std::cout << "[ANALYSIS] [" << rank << "] [COUNTER] [" << pluginPrefix << "] [" << currentStep << "] " - << std::setprecision(16) << float_64(currentStep) * SI::DELTA_T_SI << " " - << positionParticle << "\n"; // no flush - } + std::string pluginGetName() const + { + return pluginName; + } - void pluginRegisterHelp(po::options_description& desc) - { - desc.add_options() - ((pluginPrefix + ".period").c_str(), - po::value (¬ifyPeriod), "enable plugin [for each n-th step]"); - } + void setMappingDescription(MappingDesc* cellDescription) + { + this->cellDescription = cellDescription; + } - std::string pluginGetName() const - { - return pluginName; - } + private: + void pluginLoad() + { + if(!notifyPeriod.empty()) + { + // create one float3_X on gpu und host + gParticle = new GridBuffer, DIM1>(DataSpace(1)); - void setMappingDescription(MappingDesc *cellDescription) - { - this->cellDescription = cellDescription; - } + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + } + } -private: + void pluginUnload() + { + __delete(gParticle); + } - void pluginLoad() - { - if(!notifyPeriod.empty()) + template + SglParticle getPositionsParticles(uint32_t currentStep) { - //create one float3_X on gpu und host - gParticle = new GridBuffer, DIM1 > (DataSpace (1)); + typedef typename MappingDesc::SuperCellSize SuperCellSize; + SglParticle positionParticleTmp; - Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); - } - } + DataConnector& dc = Environment<>::get().DataConnector(); + auto particles = dc.get(ParticlesType::FrameType::getName(), true); - void pluginUnload() - { - __delete(gParticle); - } + gParticle->getDeviceBuffer().setValue(positionParticleTmp); + auto block = SuperCellSize::toRT(); - template< uint32_t AREA> - SglParticle getPositionsParticles(uint32_t currentStep) - { - typedef typename MappingDesc::SuperCellSize SuperCellSize; - SglParticle positionParticleTmp; + AreaMapping mapper(*cellDescription); + PMACC_KERNEL(KernelPositionsParticles{}) + (mapper.getGridDim(), + block)(particles->getDeviceParticlesBox(), gParticle->getDeviceBuffer().getBasePointer(), mapper); - DataConnector &dc = Environment<>::get().DataConnector(); - auto particles = dc.get< ParticlesType >( ParticlesType::FrameType::getName(), true ); + dc.releaseData(ParticlesType::FrameType::getName()); + gParticle->deviceToHost(); - gParticle->getDeviceBuffer().setValue(positionParticleTmp); - auto block = SuperCellSize::toRT(); + DataSpace localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); - AreaMapping mapper(*cellDescription); - PMACC_KERNEL(KernelPositionsParticles{}) - (mapper.getGridDim(), block) - (particles->getDeviceParticlesBox(), - gParticle->getDeviceBuffer().getBasePointer(), - mapper); + DataSpace gpuPhyCellOffset(Environment::get().SubGrid().getLocalDomain().offset); + gpuPhyCellOffset.y() += (localSize.y() * numSlides); - dc.releaseData( ParticlesType::FrameType::getName() ); - gParticle->deviceToHost(); + gParticle->getHostBuffer().getDataBox()[0].globalCellOffset += gpuPhyCellOffset; - DataSpace localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); - DataSpace gpuPhyCellOffset(Environment::get().SubGrid().getLocalDomain().offset); - gpuPhyCellOffset.y() += (localSize.y() * numSlides); + return gParticle->getHostBuffer().getDataBox()[0]; + } + }; - gParticle->getHostBuffer().getDataBox()[0].globalCellOffset += gpuPhyCellOffset; + namespace particles + { + namespace traits + { + template + struct SpeciesEligibleForSolver> + { + using FrameType = typename T_Species::FrameType; + using RequiredIdentifiers = MakeSeq_t>; - return gParticle->getHostBuffer().getDataBox()[0]; - } + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; -}; + using SpeciesHasMass = typename pmacc::traits::HasFlag>::type; + using SpeciesHasCharge = typename pmacc::traits::HasFlag>::type; -namespace particles -{ -namespace traits -{ - template< - typename T_Species, - typename T_UnspecifiedSpecies - > - struct SpeciesEligibleForSolver< - T_Species, - PositionsParticles< T_UnspecifiedSpecies > - > - { - using FrameType = typename T_Species::FrameType; - - using RequiredIdentifiers = MakeSeq_t< - weighting, - momentum, - position<> - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - using SpeciesHasMass = typename pmacc::traits::HasFlag< - FrameType, - massRatio<> - >::type; - using SpeciesHasCharge = typename pmacc::traits::HasFlag< - FrameType, - chargeRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasMass, - SpeciesHasCharge - >; - }; -} // namespace traits -} // namespace particles + using type = typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/plugins/ResourceLog.cpp b/include/picongpu/plugins/ResourceLog.cpp index 6fafbd1f8b..5fc24c8e8a 100644 --- a/include/picongpu/plugins/ResourceLog.cpp +++ b/include/picongpu/plugins/ResourceLog.cpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Erik Zenker, Axel Huebl +/* Copyright 2016-2021 Erik Zenker, Axel Huebl * * This file is part of PMacc. * @@ -25,9 +25,9 @@ #include // STL -#include /* std::string */ -#include /* std::stringstream */ -#include /* std::map */ +#include /* std::string */ +#include /* std::stringstream */ +#include /* std::map */ #include /* std::runtime_error */ // C LIB @@ -36,55 +36,45 @@ namespace picongpu { -namespace detail -{ - std::string - writeMapToPropertyTree( - std::map< std::string, size_t > valueMap, - std::string outputFormat - ) + namespace detail { - // Create property tree which contains the resource information - using boost::property_tree::ptree; - ptree pt; + std::string writeMapToPropertyTree(std::map valueMap, std::string outputFormat) + { + // Create property tree which contains the resource information + using boost::property_tree::ptree; + ptree pt; - for( auto it = valueMap.begin(); it != valueMap.end(); ++it ) { - pt.put( it->first, it->second ); - } + for(auto it = valueMap.begin(); it != valueMap.end(); ++it) + { + pt.put(it->first, it->second); + } - // Write property tree to string stream - std::stringstream ss; - if( outputFormat == "json" ) - { - write_json( ss, pt, false ); - } - else if( outputFormat == "jsonpp" ) - { - write_json( ss, pt, true ); - } - else if( outputFormat == "xml" ) - { - write_xml( ss, pt ); - } - else if( outputFormat == "xmlpp" ) - { - write_xml( - ss, - pt, - boost::property_tree::xml_writer_make_settings< std::string >( '\t', 1 ) - ); - } - else - { - throw std::runtime_error( - std::string( "resourcelog.format " ) + - outputFormat + - std::string( " is not known, use json or xml." ) - ); - } + // Write property tree to string stream + std::stringstream ss; + if(outputFormat == "json") + { + write_json(ss, pt, false); + } + else if(outputFormat == "jsonpp") + { + write_json(ss, pt, true); + } + else if(outputFormat == "xml") + { + write_xml(ss, pt); + } + else if(outputFormat == "xmlpp") + { + write_xml(ss, pt, boost::property_tree::xml_writer_make_settings('\t', 1)); + } + else + { + throw std::runtime_error( + std::string("resourcelog.format ") + outputFormat + + std::string(" is not known, use json or xml.")); + } - return ss.str(); - } -} // namespace detail + return ss.str(); + } + } // namespace detail } // namespace picongpu - diff --git a/include/picongpu/plugins/ResourceLog.hpp b/include/picongpu/plugins/ResourceLog.hpp index 878e88b8f8..76228f36c4 100644 --- a/include/picongpu/plugins/ResourceLog.hpp +++ b/include/picongpu/plugins/ResourceLog.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Erik Zenker +/* Copyright 2016-2021 Erik Zenker * * This file is part of PMacc. * @@ -35,12 +35,12 @@ #include // STL -#include /* std::cout, std::ostream */ -#include /* std::accumulate */ -#include /* std::string */ -#include /* std::stringstream */ -#include /* std::filebuf */ -#include /* std::map */ +#include /* std::cout, std::ostream */ +#include /* std::accumulate */ +#include /* std::string */ +#include /* std::stringstream */ +#include /* std::filebuf */ +#include /* std::map */ #include /* std::accumulate */ // C LIB @@ -52,19 +52,15 @@ namespace picongpu { using namespace pmacc; /** @todo do not pull into global (header) scope */ -namespace detail -{ - std::string - writeMapToPropertyTree( - std::map valueMap, - std::string outputFormat - ); -} + namespace detail + { + std::string writeMapToPropertyTree(std::map valueMap, std::string outputFormat); + } class ResourceLog : public ILightweightPlugin { private: - MappingDesc *cellDescription; + MappingDesc* cellDescription; ResourceMonitor resourceMonitor; // programm options @@ -77,9 +73,7 @@ namespace detail std::map propertyMap; public: - - ResourceLog() : - cellDescription(NULL) + ResourceLog() : cellDescription(NULL) { Environment<>::get().PluginConnector().registerPlugin(this); } @@ -94,13 +88,14 @@ namespace detail std::map valueMap; if(contains(propertyMap, "rank")) - valueMap["resourceLog.rank"] = static_cast(Environment::get().GridController().getGlobalRank()); + valueMap["resourceLog.rank"] + = static_cast(Environment::get().GridController().getGlobalRank()); - if(contains(propertyMap,"position")) + if(contains(propertyMap, "position")) { auto const currentPosition = Environment::get().GridController().getPosition(); char const axisName[] = {'x', 'y', 'z'}; - for( size_t d = 0; d < simDim; ++d ) + for(size_t d = 0; d < simDim; ++d) valueMap[std::string("resourceLog.position.") + axisName[d]] = currentPosition[d]; } @@ -110,17 +105,19 @@ namespace detail if(contains(propertyMap, "cellCount")) valueMap["resourceLog.cellCount"] = resourceMonitor.getCellCount(); - if(contains(propertyMap,"particleCount")) + if(contains(propertyMap, "particleCount")) { // enforce that the filter interface is fulfilled - particles::filter::IUnary< particles::filter::All > parFilter{ currentStep }; - std::vector particleCounts = resourceMonitor.getParticleCounts(*cellDescription, parFilter ); - valueMap["resourceLog.particleCount"] = std::accumulate(particleCounts.begin(), particleCounts.end(), 0); + particles::filter::IUnary parFilter{currentStep}; + std::vector particleCounts + = resourceMonitor.getParticleCounts(*cellDescription, parFilter); + valueMap["resourceLog.particleCount"] + = std::accumulate(particleCounts.begin(), particleCounts.end(), 0); } // // Write property tree to a string - std::string properties = ::picongpu::detail::writeMapToPropertyTree( valueMap, outputFormat ); + std::string properties = ::picongpu::detail::writeMapToPropertyTree(valueMap, outputFormat); // // Write property tree to the output stream @@ -128,38 +125,45 @@ namespace detail { std::cout << properties; } - else if (streamType == "stderr") + else if(streamType == "stderr") { std::cerr << properties; } - else if (streamType == "file") + else if(streamType == "file") { std::ostream os(&fileBuf); os << properties; } else { - throw std::runtime_error(std::string("resourcelog.stream ") + streamType + std::string(" is not known, use stdout, stderr or file instead.")); + throw std::runtime_error( + std::string("resourcelog.stream ") + streamType + + std::string(" is not known, use stdout, stderr or file instead.")); } } void pluginRegisterHelp(po::options_description& desc) { /* register command line parameters for your plugin */ - desc.add_options() - ("resourceLog.period", po::value(¬ifyPeriod), - "Enable ResourceLog plugin [for each n-th step]") - ("resourceLog.prefix", po::value(&outputFilePrefix)->default_value("resourceLog_"), - "Set the filename prefix for output file if a filestream was selected") - ("resourceLog.stream", po::value(&streamType)->default_value("file"), - "Output stream [stdout, stderr, file]") - ("resourceLog.properties", po::value >(&properties)->multitoken(), - "List of properties to log [rank, position, currentStep, cellCount, particleCount]") - ("resourceLog.format", po::value(&outputFormat)->default_value("json"), - "Output format of log (pp for pretty print) [json, jsonpp, xml, xmlpp]"); + desc.add_options()( + "resourceLog.period", + po::value(¬ifyPeriod), + "Enable ResourceLog plugin [for each n-th step]")( + "resourceLog.prefix", + po::value(&outputFilePrefix)->default_value("resourceLog_"), + "Set the filename prefix for output file if a filestream was selected")( + "resourceLog.stream", + po::value(&streamType)->default_value("file"), + "Output stream [stdout, stderr, file]")( + "resourceLog.properties", + po::value>(&properties)->multitoken(), + "List of properties to log [rank, position, currentStep, cellCount, particleCount]")( + "resourceLog.format", + po::value(&outputFormat)->default_value("json"), + "Output format of log (pp for pretty print) [json, jsonpp, xml, xmlpp]"); } - void setMappingDescription(MappingDesc *cellDescription) + void setMappingDescription(MappingDesc* cellDescription) { this->cellDescription = cellDescription; } @@ -167,12 +171,15 @@ namespace detail private: std::string notifyPeriod; - void pluginLoad() { - if(!notifyPeriod.empty()) { + void pluginLoad() + { + if(!notifyPeriod.empty()) + { Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); // Set default resources to log - if (properties.empty()) { + if(properties.empty()) + { properties.push_back("rank"); properties.push_back("position"); properties.push_back("currentStep"); @@ -184,14 +191,17 @@ namespace detail propertyMap["particleCount"] = true; propertyMap["cellCount"] = true; } - else { - for (size_t i = 0; i < properties.size(); ++i) { + else + { + for(size_t i = 0; i < properties.size(); ++i) + { propertyMap[properties[i]] = true; } } // Prepare file for output stream - if (streamType == "file") { + if(streamType == "file") + { size_t rank = static_cast(Environment::get().GridController().getGlobalRank()); std::stringstream ss; ss << outputFilePrefix << rank; @@ -204,19 +214,19 @@ namespace detail void pluginUnload() { - if(fileBuf.is_open()){ + if(fileBuf.is_open()) + { fileBuf.close(); } /* called when plugin is unloaded, cleanup here */ } - template + template bool contains(T_MAP const map, std::string const value) { return (map.find(value) != map.end()); } - }; -} +} // namespace picongpu #include diff --git a/include/picongpu/plugins/SliceFieldPrinter.hpp b/include/picongpu/plugins/SliceFieldPrinter.hpp index 161248425c..fe91e1165d 100644 --- a/include/picongpu/plugins/SliceFieldPrinter.hpp +++ b/include/picongpu/plugins/SliceFieldPrinter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -28,40 +28,43 @@ namespace picongpu { + using namespace pmacc; -using namespace pmacc; + namespace po = boost::program_options; -namespace po = boost::program_options; + template + class SliceFieldPrinterMulti; -template -class SliceFieldPrinterMulti; + template + class SliceFieldPrinter : public ILightweightPlugin + { + private: + std::string notifyPeriod; + bool sliceIsOK; + std::string fileName; + int plane; + float_X slicePoint; + MappingDesc* cellDescription; + container::DeviceBuffer* dBuffer_SI; -template -class SliceFieldPrinter : public ILightweightPlugin -{ -private: - std::string notifyPeriod; - bool sliceIsOK; - std::string fileName; - int plane; - float_X slicePoint; - MappingDesc *cellDescription; - container::DeviceBuffer* dBuffer_SI; + void pluginLoad(); + void pluginUnload(); - void pluginLoad(); - void pluginUnload(); + template + void printSlice(const TField& field, int nAxis, float slicePoint, std::string filename); - template - void printSlice(const TField& field, int nAxis, float slicePoint, std::string filename); + friend class SliceFieldPrinterMulti; - friend class SliceFieldPrinterMulti; -public: - void notify(uint32_t currentStep); - std::string pluginGetName() const; - void pluginRegisterHelp(po::options_description& desc); - void setMappingDescription(MappingDesc* desc) {this->cellDescription = desc;} -}; + public: + void notify(uint32_t currentStep); + std::string pluginGetName() const; + void pluginRegisterHelp(po::options_description& desc); + void setMappingDescription(MappingDesc* desc) + { + this->cellDescription = desc; + } + }; -} +} // namespace picongpu #include "SliceFieldPrinter.tpp" diff --git a/include/picongpu/plugins/SliceFieldPrinter.tpp b/include/picongpu/plugins/SliceFieldPrinter.tpp index bbe336ed23..29cce64e36 100644 --- a/include/picongpu/plugins/SliceFieldPrinter.tpp +++ b/include/picongpu/plugins/SliceFieldPrinter.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -41,148 +41,149 @@ namespace picongpu { - -namespace SliceFieldPrinterHelper -{ -template -class ConversionFunctor -{ -public: - /* convert field data to higher precision and convert to SI units on GPUs */ - template< typename T_Acc > - DINLINE void operator()( - T_Acc const & acc, - float3_64& target, - const typename Field::ValueType fieldData - ) const + namespace SliceFieldPrinterHelper { - target = precisionCast(fieldData) * float_64((Field::getUnit())[0]) ; + template + class ConversionFunctor + { + public: + /* convert field data to higher precision and convert to SI units on GPUs */ + template + DINLINE void operator()(T_Acc const& acc, float3_64& target, const typename Field::ValueType fieldData) + const + { + target = precisionCast(fieldData) * float_64((Field::getUnit())[0]); + } + }; + } // end namespace SliceFieldPrinterHelper + + + template + void SliceFieldPrinter::pluginLoad() + { + if(float_X(0.0) <= slicePoint && slicePoint <= float_X(1.0)) + { + /* in case the slice point is inside of [0.0,1.0] */ + sliceIsOK = true; + Environment<>::get().PluginConnector().setNotificationPeriod(this, this->notifyPeriod); + namespace vec = ::pmacc::math; + typedef SuperCellSize BlockDim; + + vec::Size_t size = vec::Size_t(this->cellDescription->getGridSuperCells()) + * precisionCast(BlockDim::toRT()) + - precisionCast(2 * BlockDim::toRT()); + this->dBuffer_SI = new container::DeviceBuffer( + size.shrink((this->plane + 1) % simDim)); + } + else + { + /* in case the slice point is outside of [0.0,1.0] */ + sliceIsOK = false; + std::cerr << "In the SliceFieldPrinter plugin a slice point" + << " (slice_point=" << slicePoint << ") is outside of [0.0, 1.0]. " << std::endl + << "The request will be ignored. " << std::endl; + } } -}; -} // end namespace SliceFieldPrinterHelper - -template -void SliceFieldPrinter::pluginLoad() -{ - if( float_X(0.0) <= slicePoint && slicePoint <= float_X(1.0)) - { - /* in case the slice point is inside of [0.0,1.0] */ - sliceIsOK = true; - Environment<>::get().PluginConnector().setNotificationPeriod(this, this->notifyPeriod); - namespace vec = ::pmacc::math; - typedef SuperCellSize BlockDim; - - vec::Size_t size = vec::Size_t(this->cellDescription->getGridSuperCells()) * precisionCast(BlockDim::toRT()) - - precisionCast(2 * BlockDim::toRT()); - this->dBuffer_SI = new container::DeviceBuffer( - size.shrink((this->plane+1)%simDim)); - } - else - { - /* in case the slice point is outside of [0.0,1.0] */ - sliceIsOK = false; - std::cerr << "In the SliceFieldPrinter plugin a slice point" - << " (slice_point=" << slicePoint - << ") is outside of [0.0, 1.0]. " << std::endl - << "The request will be ignored. " << std::endl; - } -} - -template -void SliceFieldPrinter::pluginUnload() -{ - __delete(this->dBuffer_SI); -} + template + void SliceFieldPrinter::pluginUnload() + { + __delete(this->dBuffer_SI); + } -template -void SliceFieldPrinter::pluginRegisterHelp(po::options_description&) -{ - // nothing to do here -} + template + void SliceFieldPrinter::pluginRegisterHelp(po::options_description&) + { + // nothing to do here + } -template -std::string SliceFieldPrinter::pluginGetName() const -{ - return "SliceFieldPrinter"; -} + template + std::string SliceFieldPrinter::pluginGetName() const + { + return "SliceFieldPrinter"; + } -template -void SliceFieldPrinter::notify(uint32_t currentStep) -{ - if(sliceIsOK) + template + void SliceFieldPrinter::notify(uint32_t currentStep) { - namespace vec = ::pmacc::math; - typedef SuperCellSize BlockDim; - DataConnector &dc = Environment<>::get().DataConnector(); - auto field_coreBorder = - dc.get< Field >( Field::getName(), true )->getGridBuffer(). - getDeviceBuffer().cartBuffer(). - view(BlockDim::toRT(), -BlockDim::toRT()); - - std::ostringstream filename; - filename << this->fileName << "_" << currentStep << ".dat"; - printSlice(field_coreBorder, this->plane, this->slicePoint, filename.str()); + if(sliceIsOK) + { + namespace vec = ::pmacc::math; + typedef SuperCellSize BlockDim; + DataConnector& dc = Environment<>::get().DataConnector(); + auto field_coreBorder = dc.get(Field::getName(), true) + ->getGridBuffer() + .getDeviceBuffer() + .cartBuffer() + .view(BlockDim::toRT(), -BlockDim::toRT()); + + std::ostringstream filename; + filename << this->fileName << "_" << currentStep << ".dat"; + printSlice(field_coreBorder, this->plane, this->slicePoint, filename.str()); + } } -} -template -template -void SliceFieldPrinter::printSlice(const TField& field, int nAxis, float slicePoint, std::string filename) -{ - namespace vec = pmacc::math; + template + template + void SliceFieldPrinter::printSlice(const TField& field, int nAxis, float slicePoint, std::string filename) + { + namespace vec = pmacc::math; - pmacc::GridController& con = pmacc::Environment::get().GridController(); - vec::Size_t gpuDim = (vec::Size_t)con.getGpuNodes(); - vec::Size_t globalGridSize = gpuDim * field.size(); - int globalPlane = globalGridSize[nAxis] * slicePoint; - int localPlane = globalPlane % field.size()[nAxis]; - int gpuPlane = globalPlane / field.size()[nAxis]; + pmacc::GridController& con = pmacc::Environment::get().GridController(); + vec::Size_t gpuDim = (vec::Size_t) con.getGpuNodes(); + vec::Size_t globalGridSize = gpuDim * field.size(); + int globalPlane = globalGridSize[nAxis] * slicePoint; + int localPlane = globalPlane % field.size()[nAxis]; + int gpuPlane = globalPlane / field.size()[nAxis]; - vec::Int nVector(vec::Int::create(0)); - nVector[nAxis] = 1; + vec::Int nVector(vec::Int::create(0)); + nVector[nAxis] = 1; - zone::SphericZone gpuGatheringZone(gpuDim, nVector * gpuPlane); - gpuGatheringZone.size[nAxis] = 1; + zone::SphericZone gpuGatheringZone(gpuDim, nVector * gpuPlane); + gpuGatheringZone.size[nAxis] = 1; - algorithm::mpi::Gather gather(gpuGatheringZone); + algorithm::mpi::Gather gather(gpuGatheringZone); - if(!gather.participate()) return; + if(!gather.participate()) + return; -#if(SIMDIM==DIM3) - vec::UInt32<3> twistedAxesVec((nAxis+1)%3, (nAxis+2)%3, nAxis); +#if(SIMDIM == DIM3) + vec::UInt32<3> twistedAxesVec((nAxis + 1) % 3, (nAxis + 2) % 3, nAxis); - /* convert data to higher precision and to SI units */ - SliceFieldPrinterHelper::ConversionFunctor cf; - algorithm::kernel::RT::Foreach()( - dBuffer_SI->zone(), dBuffer_SI->origin(), - cursor::tools::slice(field.originCustomAxes(twistedAxesVec)(0,0,localPlane)), - cf ); + /* convert data to higher precision and to SI units */ + SliceFieldPrinterHelper::ConversionFunctor cf; + algorithm::kernel::RT::Foreach()( + dBuffer_SI->zone(), + dBuffer_SI->origin(), + cursor::tools::slice(field.originCustomAxes(twistedAxesVec)(0, 0, localPlane)), + cf); #endif -#if(SIMDIM==DIM2) - vec::UInt32<2> twistedAxesVec((nAxis+1)%2, nAxis); - - /* convert data to higher precision and to SI units */ - SliceFieldPrinterHelper::ConversionFunctor cf; - algorithm::kernel::RT::Foreach()( - dBuffer_SI->zone(), dBuffer_SI->origin(), - cursor::tools::slice(field.originCustomAxes(twistedAxesVec)(0,localPlane)), - cf ); +#if(SIMDIM == DIM2) + vec::UInt32<2> twistedAxesVec((nAxis + 1) % 2, nAxis); + + /* convert data to higher precision and to SI units */ + SliceFieldPrinterHelper::ConversionFunctor cf; + algorithm::kernel::RT::Foreach()( + dBuffer_SI->zone(), + dBuffer_SI->origin(), + cursor::tools::slice(field.originCustomAxes(twistedAxesVec)(0, localPlane)), + cf); #endif - /* copy selected plane from device to host */ - container::HostBuffer hBuffer(dBuffer_SI->size()); - hBuffer = *dBuffer_SI; + /* copy selected plane from device to host */ + container::HostBuffer hBuffer(dBuffer_SI->size()); + hBuffer = *dBuffer_SI; - /* collect data from all nodes/GPUs */ - vec::Size_t globalDomainSize = Environment::get().SubGrid().getGlobalDomain().size; - vec::Size_t globalSliceSize = globalDomainSize.shrink((nAxis+1)%simDim); - container::HostBuffer globalBuffer(globalSliceSize); - gather(globalBuffer, hBuffer, nAxis); - if(!gather.root()) return; + /* collect data from all nodes/GPUs */ + vec::Size_t globalDomainSize = Environment::get().SubGrid().getGlobalDomain().size; + vec::Size_t globalSliceSize = globalDomainSize.shrink((nAxis + 1) % simDim); + container::HostBuffer globalBuffer(globalSliceSize); + gather(globalBuffer, hBuffer, nAxis); + if(!gather.root()) + return; - std::ofstream file(filename.c_str()); - file << globalBuffer; -} + std::ofstream file(filename.c_str()); + file << globalBuffer; + } } /* end namespace picongpu */ diff --git a/include/picongpu/plugins/SliceFieldPrinterMulti.hpp b/include/picongpu/plugins/SliceFieldPrinterMulti.hpp index 8dc75da1b5..7623e41d28 100644 --- a/include/picongpu/plugins/SliceFieldPrinterMulti.hpp +++ b/include/picongpu/plugins/SliceFieldPrinterMulti.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -29,37 +29,39 @@ namespace picongpu { + using namespace pmacc; + namespace po = boost::program_options; -using namespace pmacc; -namespace po = boost::program_options; + template + class SliceFieldPrinterMulti : public ILightweightPlugin + { + private: + std::string name; + std::string prefix; + std::vector notifyPeriod; + std::vector fileName; + std::vector plane; + std::vector slicePoint; + MappingDesc* cellDescription; + std::vector> childs; -template -class SliceFieldPrinterMulti : public ILightweightPlugin -{ -private: - std::string name; - std::string prefix; - std::vector notifyPeriod; - std::vector fileName; - std::vector plane; - std::vector slicePoint; - MappingDesc *cellDescription; - std::vector > childs; - - void pluginLoad(); - void pluginUnload(); + void pluginLoad(); + void pluginUnload(); -public: - SliceFieldPrinterMulti(); - virtual ~SliceFieldPrinterMulti() {} + public: + SliceFieldPrinterMulti(); + virtual ~SliceFieldPrinterMulti() + { + } - void notify(uint32_t) {} - void setMappingDescription(MappingDesc* desc); - void pluginRegisterHelp(po::options_description& desc); - std::string pluginGetName() const; -}; + void notify(uint32_t) + { + } + void setMappingDescription(MappingDesc* desc); + void pluginRegisterHelp(po::options_description& desc); + std::string pluginGetName() const; + }; -} +} // namespace picongpu #include "SliceFieldPrinterMulti.tpp" - diff --git a/include/picongpu/plugins/SliceFieldPrinterMulti.tpp b/include/picongpu/plugins/SliceFieldPrinterMulti.tpp index c6ca959073..2cb785825b 100644 --- a/include/picongpu/plugins/SliceFieldPrinterMulti.tpp +++ b/include/picongpu/plugins/SliceFieldPrinterMulti.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -38,61 +38,67 @@ namespace picongpu { + template + SliceFieldPrinterMulti::SliceFieldPrinterMulti() + : name("SliceFieldPrinter: prints a slice of a field") + , prefix(Field::getName() + std::string("_slice")) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } -template -SliceFieldPrinterMulti::SliceFieldPrinterMulti() - : name("SliceFieldPrinter: prints a slice of a field"), - prefix(Field::getName() + std::string("_slice")) -{ - Environment<>::get().PluginConnector().registerPlugin(this); -} - -template -void SliceFieldPrinterMulti::pluginRegisterHelp(po::options_description& desc) -{ - desc.add_options() - ((this->prefix + ".period").c_str(), - po::value > (&this->notifyPeriod)->multitoken(), "notify period"); - desc.add_options() - ((this->prefix + ".fileName").c_str(), - po::value > (&this->fileName)->multitoken(), "file name to store slices in"); - desc.add_options() - ((this->prefix + ".plane").c_str(), - po::value > (&this->plane)->multitoken(), "specifies the axis which stands on the cutting plane (0,1,2)"); - desc.add_options() - ((this->prefix + ".slicePoint").c_str(), - po::value > (&this->slicePoint)->multitoken(), "slice point 0.0 <= x <= 1.0"); -} + template + void SliceFieldPrinterMulti::pluginRegisterHelp(po::options_description& desc) + { + desc.add_options()( + (this->prefix + ".period").c_str(), + po::value>(&this->notifyPeriod)->multitoken(), + "notify period"); + desc.add_options()( + (this->prefix + ".fileName").c_str(), + po::value>(&this->fileName)->multitoken(), + "file name to store slices in"); + desc.add_options()( + (this->prefix + ".plane").c_str(), + po::value>(&this->plane)->multitoken(), + "specifies the axis which stands on the cutting plane (0,1,2)"); + desc.add_options()( + (this->prefix + ".slicePoint").c_str(), + po::value>(&this->slicePoint)->multitoken(), + "slice point 0.0 <= x <= 1.0"); + } -template -std::string SliceFieldPrinterMulti::pluginGetName() const {return this->name;} + template + std::string SliceFieldPrinterMulti::pluginGetName() const + { + return this->name; + } -template -void SliceFieldPrinterMulti::pluginLoad() -{ - this->childs.resize(this->notifyPeriod.size()); - for(uint32_t i = 0; i < this->childs.size(); i++) + template + void SliceFieldPrinterMulti::pluginLoad() { - this->childs[i].setMappingDescription(this->cellDescription); - this->childs[i].notifyPeriod = this->notifyPeriod[i]; - this->childs[i].fileName = this->fileName[i]; - this->childs[i].plane = this->plane[i]; - this->childs[i].slicePoint = this->slicePoint[i]; - this->childs[i].pluginLoad(); + this->childs.resize(this->notifyPeriod.size()); + for(uint32_t i = 0; i < this->childs.size(); i++) + { + this->childs[i].setMappingDescription(this->cellDescription); + this->childs[i].notifyPeriod = this->notifyPeriod[i]; + this->childs[i].fileName = this->fileName[i]; + this->childs[i].plane = this->plane[i]; + this->childs[i].slicePoint = this->slicePoint[i]; + this->childs[i].pluginLoad(); + } } -} -template -void SliceFieldPrinterMulti::pluginUnload() -{ - for(uint32_t i = 0; i < this->childs.size(); i++) - this->childs[i].pluginUnload(); -} + template + void SliceFieldPrinterMulti::pluginUnload() + { + for(uint32_t i = 0; i < this->childs.size(); i++) + this->childs[i].pluginUnload(); + } -template -void SliceFieldPrinterMulti::setMappingDescription(MappingDesc* desc) -{ - this->cellDescription = desc; -} + template + void SliceFieldPrinterMulti::setMappingDescription(MappingDesc* desc) + { + this->cellDescription = desc; + } -} +} // namespace picongpu diff --git a/include/picongpu/plugins/SumCurrents.hpp b/include/picongpu/plugins/SumCurrents.hpp index 87565861ee..885b7a3c54 100644 --- a/include/picongpu/plugins/SumCurrents.hpp +++ b/include/picongpu/plugins/SumCurrents.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Felix Schmitt, Benjamin Worpitz, Richard Pausch * * This file is part of PIConGPU. @@ -35,174 +35,157 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -namespace po = boost::program_options; + namespace po = boost::program_options; -using J_DataBox = FieldJ::DataBoxType; + using J_DataBox = FieldJ::DataBoxType; -struct KernelSumCurrents -{ - template< - typename Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - J_DataBox fieldJ, - float3_X* gCurrent, - Mapping mapper - ) const + struct KernelSumCurrents { - using SuperCellSize = typename Mapping::SuperCellSize; - - PMACC_SMEM( acc, sh_sumJ, float3_X ); - - const DataSpace threadIndex(threadIdx); - const int linearThreadIdx = DataSpaceOperations::template map (threadIndex); - - if (linearThreadIdx == 0) + template + DINLINE void operator()(T_Acc const& acc, J_DataBox fieldJ, float3_X* gCurrent, Mapping mapper) const { - sh_sumJ = float3_X::create(0.0); - } + using SuperCellSize = typename Mapping::SuperCellSize; - __syncthreads(); + PMACC_SMEM(acc, sh_sumJ, float3_X); + const DataSpace threadIndex(cupla::threadIdx(acc)); + const int linearThreadIdx = DataSpaceOperations::template map(threadIndex); - const DataSpace superCellIdx(mapper.getSuperCellIndex(DataSpace (blockIdx))); - const DataSpace cell(superCellIdx * SuperCellSize::toRT() + threadIndex); + if(linearThreadIdx == 0) + { + sh_sumJ = float3_X::create(0.0); + } - const float3_X myJ = fieldJ(cell); + cupla::__syncthreads(acc); - atomicAdd( &(sh_sumJ.x()), myJ.x(), ::alpaka::hierarchy::Threads{}); - atomicAdd( &(sh_sumJ.y()), myJ.y(), ::alpaka::hierarchy::Threads{}); - atomicAdd( &(sh_sumJ.z()), myJ.z(), ::alpaka::hierarchy::Threads{}); - __syncthreads(); + const DataSpace superCellIdx(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + const DataSpace cell(superCellIdx * SuperCellSize::toRT() + threadIndex); - if (linearThreadIdx == 0) - { - atomicAdd( &(gCurrent->x()), sh_sumJ.x(), ::alpaka::hierarchy::Blocks{}); - atomicAdd( &(gCurrent->y()), sh_sumJ.y(), ::alpaka::hierarchy::Blocks{}); - atomicAdd( &(gCurrent->z()), sh_sumJ.z(), ::alpaka::hierarchy::Blocks{}); - } - } -}; + const float3_X myJ = fieldJ(cell); -class SumCurrents : public ILightweightPlugin -{ -private: - MappingDesc *cellDescription; - std::string notifyPeriod; + cupla::atomicAdd(acc, &(sh_sumJ.x()), myJ.x(), ::alpaka::hierarchy::Threads{}); + cupla::atomicAdd(acc, &(sh_sumJ.y()), myJ.y(), ::alpaka::hierarchy::Threads{}); + cupla::atomicAdd(acc, &(sh_sumJ.z()), myJ.z(), ::alpaka::hierarchy::Threads{}); - GridBuffer *sumcurrents; + cupla::__syncthreads(acc); -public: + if(linearThreadIdx == 0) + { + cupla::atomicAdd(acc, &(gCurrent->x()), sh_sumJ.x(), ::alpaka::hierarchy::Blocks{}); + cupla::atomicAdd(acc, &(gCurrent->y()), sh_sumJ.y(), ::alpaka::hierarchy::Blocks{}); + cupla::atomicAdd(acc, &(gCurrent->z()), sh_sumJ.z(), ::alpaka::hierarchy::Blocks{}); + } + } + }; - SumCurrents() : - cellDescription(nullptr) + class SumCurrents : public ILightweightPlugin { + private: + MappingDesc* cellDescription; + std::string notifyPeriod; - Environment<>::get().PluginConnector().registerPlugin(this); - } + GridBuffer* sumcurrents; - virtual ~SumCurrents() - { + public: + SumCurrents() : cellDescription(nullptr) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } - } + virtual ~SumCurrents() + { + } - void notify(uint32_t currentStep) - { - const int rank = Environment::get().GridController().getGlobalRank(); - const float3_X gCurrent = getSumCurrents(); - - // gCurrent is just j - // j = I/A -#if(SIMDIM==DIM3) - const float3_X realCurrent( - gCurrent.x() * CELL_HEIGHT * CELL_DEPTH, - gCurrent.y() * CELL_WIDTH * CELL_DEPTH, - gCurrent.z() * CELL_WIDTH * CELL_HEIGHT); -#elif(SIMDIM==DIM2) - const float3_X realCurrent( - gCurrent.x() * CELL_HEIGHT, - gCurrent.y() * CELL_WIDTH, - gCurrent.z() * CELL_WIDTH * CELL_HEIGHT); + void notify(uint32_t currentStep) + { + const int rank = Environment::get().GridController().getGlobalRank(); + const float3_X gCurrent = getSumCurrents(); + + // gCurrent is just j + // j = I/A +#if(SIMDIM == DIM3) + const float3_X realCurrent( + gCurrent.x() * CELL_HEIGHT * CELL_DEPTH, + gCurrent.y() * CELL_WIDTH * CELL_DEPTH, + gCurrent.z() * CELL_WIDTH * CELL_HEIGHT); +#elif(SIMDIM == DIM2) + const float3_X realCurrent( + gCurrent.x() * CELL_HEIGHT, + gCurrent.y() * CELL_WIDTH, + gCurrent.z() * CELL_WIDTH * CELL_HEIGHT); #endif - float3_64 realCurrent_SI( - float_64(realCurrent.x()) * (UNIT_CHARGE / UNIT_TIME), - float_64(realCurrent.y()) * (UNIT_CHARGE / UNIT_TIME), - float_64(realCurrent.z()) * (UNIT_CHARGE / UNIT_TIME)); - - /*FORMAT OUTPUT*/ - using dbl = std::numeric_limits; - - std::cout.precision(dbl::digits10); - if (math::abs(gCurrent.x()) + math::abs(gCurrent.y()) + math::abs(gCurrent.z()) != float_X(0.0)) - std::cout << "[ANALYSIS] [" << rank << "] [COUNTER] [SumCurrents] [" << currentStep - << std::scientific << "] " << - realCurrent_SI << " Abs:" << math::abs(realCurrent_SI) << std::endl; - } - - void pluginRegisterHelp(po::options_description& desc) - { - desc.add_options() - ("sumcurr.period", po::value (¬ifyPeriod), "enable plugin [for each n-th step]"); - } - - std::string pluginGetName() const - { - return "SumCurrents"; - } - - void setMappingDescription(MappingDesc *cellDescription) - { - this->cellDescription = cellDescription; - } - -private: + float3_64 realCurrent_SI( + float_64(realCurrent.x()) * (UNIT_CHARGE / UNIT_TIME), + float_64(realCurrent.y()) * (UNIT_CHARGE / UNIT_TIME), + float_64(realCurrent.z()) * (UNIT_CHARGE / UNIT_TIME)); + + /*FORMAT OUTPUT*/ + using dbl = std::numeric_limits; + + std::cout.precision(dbl::digits10); + if(math::abs(gCurrent.x()) + math::abs(gCurrent.y()) + math::abs(gCurrent.z()) != float_X(0.0)) + std::cout << "[ANALYSIS] [" << rank << "] [COUNTER] [SumCurrents] [" << currentStep << std::scientific + << "] " << realCurrent_SI << " Abs:" << math::abs(realCurrent_SI) << std::endl; + } - void pluginLoad() - { - if (!notifyPeriod.empty()) + void pluginRegisterHelp(po::options_description& desc) { - sumcurrents = new GridBuffer (DataSpace (1)); //create one int on gpu und host + desc.add_options()( + "sumcurr.period", + po::value(¬ifyPeriod), + "enable plugin [for each n-th step]"); + } - Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + std::string pluginGetName() const + { + return "SumCurrents"; } - } - void pluginUnload() - { - if(!notifyPeriod.empty()) + void setMappingDescription(MappingDesc* cellDescription) { - __delete(sumcurrents); + this->cellDescription = cellDescription; } - } - float3_X getSumCurrents() - { - DataConnector &dc = Environment<>::get().DataConnector(); - auto fieldJ = dc.get< FieldJ >( FieldJ::getName(), true ); + private: + void pluginLoad() + { + if(!notifyPeriod.empty()) + { + sumcurrents = new GridBuffer(DataSpace(1)); // create one int on gpu und host - sumcurrents->getDeviceBuffer().setValue(float3_X::create(0.0)); - auto block = MappingDesc::SuperCellSize::toRT(); + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + } + } - AreaMapping mapper(*cellDescription); - PMACC_KERNEL(KernelSumCurrents{}) - (mapper.getGridDim(), block) - (fieldJ->getDeviceDataBox(), - sumcurrents->getDeviceBuffer().getBasePointer(), - mapper); + void pluginUnload() + { + if(!notifyPeriod.empty()) + { + __delete(sumcurrents); + } + } - dc.releaseData( FieldJ::getName() ); + float3_X getSumCurrents() + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto fieldJ = dc.get(FieldJ::getName(), true); - sumcurrents->deviceToHost(); - return sumcurrents->getHostBuffer().getDataBox()[0]; - } + sumcurrents->getDeviceBuffer().setValue(float3_X::create(0.0)); + auto block = MappingDesc::SuperCellSize::toRT(); -}; + AreaMapping mapper(*cellDescription); + PMACC_KERNEL(KernelSumCurrents{}) + (mapper.getGridDim(), + block)(fieldJ->getDeviceDataBox(), sumcurrents->getDeviceBuffer().getBasePointer(), mapper); -} + dc.releaseData(FieldJ::getName()); + sumcurrents->deviceToHost(); + return sumcurrents->getHostBuffer().getDataBox()[0]; + } + }; +} // namespace picongpu diff --git a/include/picongpu/plugins/adios/ADIOSCountParticles.hpp b/include/picongpu/plugins/adios/ADIOSCountParticles.hpp index 6efeac564d..18f065c6be 100644 --- a/include/picongpu/plugins/adios/ADIOSCountParticles.hpp +++ b/include/picongpu/plugins/adios/ADIOSCountParticles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Felix Schmitt, Axel Huebl +/* Copyright 2014-2021 Felix Schmitt, Axel Huebl * * This file is part of PIConGPU. * @@ -51,149 +51,167 @@ namespace picongpu { - -namespace adios -{ -using namespace pmacc; - - - -/** Count number of particles for a species - * - * @tparam T_Species type of species - * - */ -template< typename T_SpeciesFilter > -struct ADIOSCountParticles -{ -public: - - typedef typename T_SpeciesFilter::Species ThisSpecies; - typedef typename ThisSpecies::FrameType FrameType; - typedef typename FrameType::ParticleDescription ParticleDescription; - typedef typename FrameType::ValueTypeSeq ParticleAttributeList; - - /* delete multiMask and localCellIdx in adios particle*/ - typedef bmpl::vector TypesToDelete; - typedef typename RemoveFromSeq::type ParticleCleanedAttributeList; - - /* add totalCellIdx for adios particle*/ - typedef typename MakeSeq< - ParticleCleanedAttributeList, - totalCellIdx - >::type ParticleNewAttributeList; - - typedef - typename ReplaceValueTypeSeq::type - NewParticleDescription; - - typedef Frame AdiosFrameType; - - HINLINE void operator()(ThreadParams* params) + namespace adios { - DataConnector &dc = Environment<>::get().DataConnector(); - GridController& gc = Environment::get().GridController(); - uint64_t mpiSize = gc.getGlobalSize(); - uint64_t mpiRank = gc.getGlobalRank(); - - const std::string speciesGroup( T_SpeciesFilter::getName() + "/" ); - const std::string speciesPath( params->adiosBasePath + - std::string(ADIOS_PATH_PARTICLES) + speciesGroup ); - - /* load particle without copy particle data to host */ - auto speciesTmp = dc.get< ThisSpecies >( ThisSpecies::FrameType::getName(), true ); - // enforce that the filter interface is fulfilled - particles::filter::IUnary< typename T_SpeciesFilter::Filter > particleFilter{ params->currentStep }; - /* count total number of particles on the device */ - uint64_cu totalNumParticles = 0; - totalNumParticles = pmacc::CountParticles::countOnDevice < CORE + BORDER > ( - *speciesTmp, - *(params->cellDescription), - params->localWindowToDomainOffset, - params->window.localDimensions.size, - particleFilter); - - /* MPI_Allgather to compute global size and my offset */ - uint64_t myNumParticles = totalNumParticles; - uint64_t allNumParticles[mpiSize]; - uint64_t globalNumParticles = 0; - uint64_t myParticleOffset = 0; - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Allgather( - &myNumParticles, 1, MPI_UNSIGNED_LONG_LONG, - allNumParticles, 1, MPI_UNSIGNED_LONG_LONG, - gc.getCommunicator().getMPIComm())); - - for (uint64_t i = 0; i < mpiSize; ++i) - { - globalNumParticles += allNumParticles[i]; - if (i < mpiRank) - myParticleOffset += allNumParticles[i]; - } - - /* iterate over all attributes of this species */ - meta::ForEach > attributeSize; - attributeSize(params, speciesGroup, myNumParticles, globalNumParticles, myParticleOffset); - - /* TODO: constant particle records */ - - /* openPMD ED-PIC: additional attributes */ - traits::PICToAdios adiosDoubleType; - const float_64 particleShape( GetShape::type::support - 1 ); - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "particleShape", speciesPath.c_str(), - adiosDoubleType.type, 1, (void*)&particleShape )); - - traits::GetSpeciesFlagName > currentDepositionName; - const std::string currentDeposition( currentDepositionName() ); - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "currentDeposition", speciesPath.c_str(), - adios_string, 1, (void*)currentDeposition.c_str() )); - - traits::GetSpeciesFlagName > particlePushName; - const std::string particlePush( particlePushName() ); - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "particlePush", speciesPath.c_str(), - adios_string, 1, (void*)particlePush.c_str() )); - - traits::GetSpeciesFlagName > particleInterpolationName; - const std::string particleInterpolation( particleInterpolationName() ); - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "particleInterpolation", speciesPath.c_str(), - adios_string, 1, (void*)particleInterpolation.c_str() )); - - const std::string particleSmoothing( "none" ); - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "particleSmoothing", speciesPath.c_str(), - adios_string, 1, (void*)particleSmoothing.c_str() )); - - /* define adios var for species index/info table */ - { - const uint64_t localTableSize = 5; - traits::PICToAdios adiosIndexType; + using namespace pmacc; - const char* path = nullptr; - int64_t adiosSpeciesIndexVar = defineAdiosVar( - params->adiosGroupHandle, - (speciesPath + "particles_info").c_str(), - path, - adiosIndexType.type, - pmacc::math::UInt64(localTableSize), - pmacc::math::UInt64(localTableSize * uint64_t(gc.getGlobalSize()) ), - pmacc::math::UInt64(localTableSize * uint64_t(gc.getGlobalRank()) ), - true, - params->adiosCompression); - params->adiosSpeciesIndexVarIds.push_back(adiosSpeciesIndexVar); - - params->adiosGroupSize += sizeof(uint64_t) * localTableSize * gc.getGlobalSize(); - } - } -}; - - -} //namspace adios - -} //namespace picongpu + /** Count number of particles for a species + * + * @tparam T_Species type of species + * + */ + template + struct ADIOSCountParticles + { + public: + typedef typename T_SpeciesFilter::Species ThisSpecies; + typedef typename ThisSpecies::FrameType FrameType; + typedef typename FrameType::ParticleDescription ParticleDescription; + typedef typename FrameType::ValueTypeSeq ParticleAttributeList; + + /* delete multiMask and localCellIdx in adios particle*/ + typedef bmpl::vector TypesToDelete; + typedef typename RemoveFromSeq::type ParticleCleanedAttributeList; + + /* add totalCellIdx for adios particle*/ + typedef typename MakeSeq::type ParticleNewAttributeList; + + typedef typename ReplaceValueTypeSeq::type + NewParticleDescription; + + typedef Frame AdiosFrameType; + + HINLINE void operator()(ThreadParams* params) + { + DataConnector& dc = Environment<>::get().DataConnector(); + GridController& gc = Environment::get().GridController(); + uint64_t mpiSize = gc.getGlobalSize(); + uint64_t mpiRank = gc.getGlobalRank(); + + const std::string speciesGroup(T_SpeciesFilter::getName() + "/"); + const std::string speciesPath( + params->adiosBasePath + std::string(ADIOS_PATH_PARTICLES) + speciesGroup); + + /* load particle without copy particle data to host */ + auto speciesTmp = dc.get(ThisSpecies::FrameType::getName(), true); + // enforce that the filter interface is fulfilled + particles::filter::IUnary particleFilter{params->currentStep}; + /* count total number of particles on the device */ + uint64_cu totalNumParticles = 0; + totalNumParticles = pmacc::CountParticles::countOnDevice( + *speciesTmp, + *(params->cellDescription), + params->localWindowToDomainOffset, + params->window.localDimensions.size, + particleFilter); + + /* MPI_Allgather to compute global size and my offset */ + uint64_t myNumParticles = totalNumParticles; + uint64_t allNumParticles[mpiSize]; + uint64_t globalNumParticles = 0; + uint64_t myParticleOffset = 0; + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather( + &myNumParticles, + 1, + MPI_UNSIGNED_LONG_LONG, + allNumParticles, + 1, + MPI_UNSIGNED_LONG_LONG, + gc.getCommunicator().getMPIComm())); + + for(uint64_t i = 0; i < mpiSize; ++i) + { + globalNumParticles += allNumParticles[i]; + if(i < mpiRank) + myParticleOffset += allNumParticles[i]; + } + + /* iterate over all attributes of this species */ + meta::ForEach> + attributeSize; + attributeSize(params, speciesGroup, myNumParticles, globalNumParticles, myParticleOffset); + + /* TODO: constant particle records */ + + /* openPMD ED-PIC: additional attributes */ + traits::PICToAdios adiosDoubleType; + const float_64 particleShape(GetShape::type::assignmentFunctionOrder); + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "particleShape", + speciesPath.c_str(), + adiosDoubleType.type, + 1, + (void*) &particleShape)); + + traits::GetSpeciesFlagName> currentDepositionName; + const std::string currentDeposition(currentDepositionName()); + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "currentDeposition", + speciesPath.c_str(), + adios_string, + 1, + (void*) currentDeposition.c_str())); + + traits::GetSpeciesFlagName> particlePushName; + const std::string particlePush(particlePushName()); + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "particlePush", + speciesPath.c_str(), + adios_string, + 1, + (void*) particlePush.c_str())); + + traits::GetSpeciesFlagName> particleInterpolationName; + const std::string particleInterpolation(particleInterpolationName()); + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "particleInterpolation", + speciesPath.c_str(), + adios_string, + 1, + (void*) particleInterpolation.c_str())); + + const std::string particleSmoothing("none"); + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "particleSmoothing", + speciesPath.c_str(), + adios_string, + 1, + (void*) particleSmoothing.c_str())); + + /* define adios var for species index/info table */ + { + const uint64_t localTableSize = 5; + traits::PICToAdios adiosIndexType; + + const char* path = nullptr; + int64_t adiosSpeciesIndexVar = defineAdiosVar( + params->adiosGroupHandle, + (speciesPath + "particles_info").c_str(), + path, + adiosIndexType.type, + pmacc::math::UInt64(localTableSize), + pmacc::math::UInt64(localTableSize * uint64_t(gc.getGlobalSize())), + pmacc::math::UInt64(localTableSize * uint64_t(gc.getGlobalRank())), + true, + params->adiosCompression); + + params->adiosSpeciesIndexVarIds.push_back(adiosSpeciesIndexVar); + + params->adiosGroupSize += sizeof(uint64_t) * localTableSize * gc.getGlobalSize(); + } + } + }; + + + } // namespace adios + +} // namespace picongpu diff --git a/include/picongpu/plugins/adios/ADIOSWriter.def b/include/picongpu/plugins/adios/ADIOSWriter.def index de506f1196..ce5765f189 100644 --- a/include/picongpu/plugins/adios/ADIOSWriter.def +++ b/include/picongpu/plugins/adios/ADIOSWriter.def @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Felix Schmitt, Axel Huebl +/* Copyright 2014-2021 Felix Schmitt, Axel Huebl * * This file is part of PIConGPU. * @@ -26,7 +26,7 @@ #include #include #include -#include // std::cerr +#include // std::cerr #include // throw std::runtime_error #include @@ -38,133 +38,133 @@ namespace picongpu { - -namespace adios -{ -using namespace pmacc; + namespace adios + { + using namespace pmacc; -namespace po = boost::program_options; + namespace po = boost::program_options; #define ADIOS_INVALID_HANDLE -1 -#define ADIOS_SUCCESS err_no_error -#define ADIOS_GROUP_NAME "data" +#define ADIOS_SUCCESS err_no_error +#define ADIOS_GROUP_NAME "data" -#define ADIOS_PATH_ROOT "/data/" -#define ADIOS_PATH_FIELDS "fields/" +#define ADIOS_PATH_ROOT "/data/" +#define ADIOS_PATH_FIELDS "fields/" #define ADIOS_PATH_PARTICLES "particles/" -#define ADIOS_SIZE_LOCAL "size_" -#define ADIOS_SIZE_GLOBAL "totalSize_" -#define ADIOS_OFFSET_GLOBAL "offset_" - -#define ADIOS_CMD(_cmd) \ -{ \ - int _err_code = _cmd; \ - if (_err_code != ADIOS_SUCCESS) \ - { \ - std::string errMsg( adios_errmsg() ); \ - if( errMsg.empty() ) errMsg = '\n'; \ - std::stringstream s; \ - s << "ADIOS: error at cmd '" << #_cmd \ - << "' (" << _err_code << ", " << adios_errno << ") in " \ - << __FILE__ << ":" << __LINE__ << " " << errMsg; \ - throw std::runtime_error(s.str()); \ - } \ -} - -#define ADIOS_CMD_EXPECT_NONNULL(_cmd) \ -{ \ - if (!(_cmd)) \ - { \ - std::string errMsg( adios_errmsg() ); \ - if( errMsg.empty() ) errMsg = '\n'; \ - std::stringstream s; \ - s << "ADIOS: error at cmd '" << #_cmd \ - << "' (" << adios_errno << ") in " \ - << __FILE__ << ":" << __LINE__ << " " << errMsg; \ - throw std::runtime_error(s.str()); \ - } \ -} - -struct ThreadParams -{ - uint32_t currentStep; /** current simulation step */ - std::string adiosFilename; /* e.g., simData */ - std::string fullFilename; /* e.g., simData_1000.bp */ - - /** current dump is a checkpoint */ - bool isCheckpoint; - ADIOS_FILE* fp; /* file pointer for checkpoint file */ - - MPI_Comm adiosComm; /* MPI communicator for adios lib */ - bool adiosBufferInitialized; /* set if ADIOS buffer has been allocated */ - int64_t adiosFileHandle; /* ADIOS file handle */ - int64_t adiosGroupHandle; /* ADIOS group handle */ - uint64_t adiosGroupSize; /* size of ADIOS group in bytes */ - uint32_t adiosAggregators; /* number of ADIOS aggregators for MPI_AGGREGATE */ - uint32_t adiosOST; /* number of ADIOS OST for MPI_AGGREGATE */ - bool adiosDisableMeta; /* disable online gather and write of a meta file */ - std::string adiosTransportParams; /* additional transport params */ - std::string adiosBasePath; /* base path for the current step */ - std::string adiosCompression; /* ADIOS data transform compression method */ - - pmacc::math::UInt64 fieldsSizeDims; - pmacc::math::UInt64 fieldsGlobalSizeDims; - pmacc::math::UInt64 fieldsOffsetDims; - - std::list adiosFieldVarIds; /* var IDs for fields in order of appearance */ - std::list adiosParticleAttrVarIds; /* var IDs for particle attributes in order of appearance */ - std::list adiosSpeciesIndexVarIds; /* var IDs for species index tables in order of appearance */ - - GridLayout gridLayout; - MappingDesc *cellDescription; - - float_X *fieldBfr; /* temp. buffer for fields */ - - Window window; /* window describing the volume to be dumped */ - - DataSpace localWindowToDomainOffset; /** offset from local moving window to local domain */ -}; - -/** - * Writes simulation data to adios files. - * Implements the ILightweightPlugin interface. - */ - -class ADIOSWriter; - -/** Default ADIOS types we will use */ -typedef PICToAdios AdiosUInt32Type; -typedef PICToAdios AdiosFloatXType; -typedef PICToAdios AdiosDoubleType; - -/** - * Wrapper for adios_define_var that sets data transform method - * - * @tparam DIM number of variable dimensions - * - * @param group_id pointer to the internal group structure - * @param name string containing the name part of a variable - * @param path string containing the path of an variable - * @param type variable type - * @param dimensions variable local dimension - * @param globalDimensions variable global dimension - * @param offset variable local offset - * @param compression enable compression data transform - * @param compressionMethod string denoting the data transform to use - * @return ADIOS variable ID - */ -template -int64_t defineAdiosVar(int64_t group_id, - const char * name, - const char * path, - enum ADIOS_DATATYPES type, - pmacc::math::UInt64 dimensions, - pmacc::math::UInt64 globalDimensions, - pmacc::math::UInt64 offset, - bool compression, - std::string compressionMethod); - -} //namespace adios -} //namespace picongpu +#define ADIOS_SIZE_LOCAL "size_" +#define ADIOS_SIZE_GLOBAL "totalSize_" +#define ADIOS_OFFSET_GLOBAL "offset_" + +#define ADIOS_CMD(_cmd) \ + { \ + int _err_code = _cmd; \ + if(_err_code != ADIOS_SUCCESS) \ + { \ + std::string errMsg(adios_errmsg()); \ + if(errMsg.empty()) \ + errMsg = '\n'; \ + std::stringstream s; \ + s << "ADIOS: error at cmd '" << #_cmd << "' (" << _err_code << ", " << adios_errno << ") in " << __FILE__ \ + << ":" << __LINE__ << " " << errMsg; \ + throw std::runtime_error(s.str()); \ + } \ + } + +#define ADIOS_CMD_EXPECT_NONNULL(_cmd) \ + { \ + if(!(_cmd)) \ + { \ + std::string errMsg(adios_errmsg()); \ + if(errMsg.empty()) \ + errMsg = '\n'; \ + std::stringstream s; \ + s << "ADIOS: error at cmd '" << #_cmd << "' (" << adios_errno << ") in " << __FILE__ << ":" << __LINE__ \ + << " " << errMsg; \ + throw std::runtime_error(s.str()); \ + } \ + } + + struct ThreadParams + { + uint32_t currentStep; /** current simulation step */ + std::string adiosFilename; /* e.g., simData */ + std::string fullFilename; /* e.g., simData_1000.bp */ + + /** current dump is a checkpoint */ + bool isCheckpoint; + ADIOS_FILE* fp; /* file pointer for checkpoint file */ + + MPI_Comm adiosComm; /* MPI communicator for adios lib */ + bool adiosBufferInitialized; /* set if ADIOS buffer has been allocated */ + int64_t adiosFileHandle; /* ADIOS file handle */ + int64_t adiosGroupHandle; /* ADIOS group handle */ + uint64_t adiosGroupSize; /* size of ADIOS group in bytes */ + uint32_t adiosAggregators; /* number of ADIOS aggregators for MPI_AGGREGATE */ + uint32_t adiosOST; /* number of ADIOS OST for MPI_AGGREGATE */ + bool adiosDisableMeta; /* disable online gather and write of a meta file */ + std::string adiosTransportParams; /* additional transport params */ + std::string adiosBasePath; /* base path for the current step */ + std::string adiosCompression; /* ADIOS data transform compression method */ + + pmacc::math::UInt64 fieldsSizeDims; + pmacc::math::UInt64 fieldsGlobalSizeDims; + pmacc::math::UInt64 fieldsOffsetDims; + + std::list adiosFieldVarIds; /* var IDs for fields in order of appearance */ + std::list adiosParticleAttrVarIds; /* var IDs for particle attributes in order of appearance */ + std::list adiosSpeciesIndexVarIds; /* var IDs for species index tables in order of appearance */ + + GridLayout gridLayout; + MappingDesc* cellDescription; + + float_X* fieldBfr; /* temp. buffer for fields */ + + Window window; /* window describing the volume to be dumped */ + + DataSpace localWindowToDomainOffset; /** offset from local moving window to local domain */ + }; + + /** + * Writes simulation data to adios files. + * Implements the ILightweightPlugin interface. + */ + + class ADIOSWriter; + + /** Default ADIOS types we will use */ + typedef PICToAdios AdiosUInt32Type; + typedef PICToAdios AdiosFloatXType; + typedef PICToAdios AdiosDoubleType; + + /** + * Wrapper for adios_define_var that sets data transform method + * + * @tparam DIM number of variable dimensions + * + * @param group_id pointer to the internal group structure + * @param name string containing the name part of a variable + * @param path string containing the path of an variable + * @param type variable type + * @param dimensions variable local dimension + * @param globalDimensions variable global dimension + * @param offset variable local offset + * @param compression enable compression data transform + * @param compressionMethod string denoting the data transform to use + * @return ADIOS variable ID + */ + template + int64_t defineAdiosVar( + int64_t group_id, + const char* name, + const char* path, + enum ADIOS_DATATYPES type, + pmacc::math::UInt64 dimensions, + pmacc::math::UInt64 globalDimensions, + pmacc::math::UInt64 offset, + bool compression, + std::string compressionMethod); + + } // namespace adios +} // namespace picongpu diff --git a/include/picongpu/plugins/adios/ADIOSWriter.hpp b/include/picongpu/plugins/adios/ADIOSWriter.hpp index cc4ec9602f..7e4ec93722 100644 --- a/include/picongpu/plugins/adios/ADIOSWriter.hpp +++ b/include/picongpu/plugins/adios/ADIOSWriter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2014-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * * This file is part of PIConGPU. @@ -42,16 +42,16 @@ #include "picongpu/fields/MaxwellSolver/YeePML/Field.hpp" #include +#include #include +#include #include #include #include #include #include "picongpu/simulation/control/MovingWindow.hpp" #include -#if( PMACC_CUDA_ENABLED == 1 ) -# include -#endif +#include #include #include "picongpu/plugins/output/IIOBackend.hpp" @@ -80,7 +80,7 @@ #include #if !defined(_WIN32) -#include +# include #endif #include @@ -92,1590 +92,1480 @@ namespace picongpu { + namespace adios + { + using namespace pmacc; + + + namespace po = boost::program_options; + + template + int64_t defineAdiosVar( + int64_t group_id, + const char* name, + const char* path, + enum ADIOS_DATATYPES type, + pmacc::math::UInt64 dimensions, + pmacc::math::UInt64 globalDimensions, + pmacc::math::UInt64 offset, + bool compression, + std::string compressionMethod) + { + int64_t var_id = 0; + + std::string const revertedDimensions = dimensions.revert().toString(",", ""); + std::string const revertedGlobalDimensions = globalDimensions.revert().toString(",", ""); + std::string const revertedOffset = offset.revert().toString(",", ""); + var_id = adios_define_var( + group_id, + name, + path, + type, + revertedDimensions.c_str(), + revertedGlobalDimensions.c_str(), + revertedOffset.c_str()); + + if(compression) + { + /* enable adios transform layer for variable */ + adios_set_transform(var_id, compressionMethod.c_str()); + } -namespace adios -{ + log("ADIOS: Defined varID=%1% for '%2%' at %3% for %4%/%5% elements") % var_id + % std::string(name) % offset.toString() % dimensions.toString() % globalDimensions.toString(); + return var_id; + } -using namespace pmacc; + /** Writes simulation data to adios files. + * + * Implements the IIOBackend interface. + */ + class ADIOSWriter : public IIOBackend + { + public: + struct Help : public plugins::multi::IHelp + { + /** creates a instance of ISlave + * + * @tparam T_Slave type of the interface implementation (must inherit from ISlave) + * @param help plugin defined help + * @param id index of the plugin, range: [0;help->getNumPlugins()) + */ + std::shared_ptr create( + std::shared_ptr& help, + size_t const id, + MappingDesc* cellDescription) + { + return std::shared_ptr(new ADIOSWriter(help, id, cellDescription)); + } + plugins::multi::Option notifyPeriod = {"period", "enable ADIOS IO [for each n-th step]"}; + plugins::multi::Option source = {"source", "data sources: ", "species_all, fields_all"}; -namespace po = boost::program_options; + plugins::multi::Option fileName = {"file", "ADIOS output filename (prefix)"}; -template -int64_t defineAdiosVar(int64_t group_id, - const char * name, - const char * path, - enum ADIOS_DATATYPES type, - pmacc::math::UInt64 dimensions, - pmacc::math::UInt64 globalDimensions, - pmacc::math::UInt64 offset, - bool compression, - std::string compressionMethod) -{ - int64_t var_id = 0; - - std::string const revertedDimensions = - dimensions.revert().toString(",", ""); - std::string const revertedGlobalDimensions = - globalDimensions.revert().toString(",", ""); - std::string const revertedOffset = - offset.revert().toString(",", ""); - var_id = adios_define_var( - group_id, name, path, type, - revertedDimensions.c_str(), - revertedGlobalDimensions.c_str(), - revertedOffset.c_str() - ); - - if(compression) - { - /* enable adios transform layer for variable */ - adios_set_transform(var_id, compressionMethod.c_str()); - } + std::vector allowedDataSources = {"species_all", "fields_all"}; - log ("ADIOS: Defined varID=%1% for '%2%' at %3% for %4%/%5% elements") % - var_id % std::string(name) % offset.toString() % dimensions.toString() % globalDimensions.toString(); - return var_id; -} + plugins::multi::Option numAggregators + = {"aggregators", "Number of aggregators [0 == number of MPI processes]", 0u}; -/** Writes simulation data to adios files. - * - * Implements the IIOBackend interface. - */ -class ADIOSWriter : public IIOBackend -{ -public: - struct Help : public plugins::multi::IHelp - { - /** creates a instance of ISlave - * - * @tparam T_Slave type of the interface implementation (must inherit from ISlave) - * @param help plugin defined help - * @param id index of the plugin, range: [0;help->getNumPlugins()) - */ - std::shared_ptr< ISlave > create( - std::shared_ptr< IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) - { - return std::shared_ptr< ISlave >( - new ADIOSWriter( - help, - id, - cellDescription - ) - ); - } + plugins::multi::Option numOSTs = {"ost", "Number of OST", 1u}; - plugins::multi::Option< std::string > notifyPeriod = { - "period", - "enable ADIOS IO [for each n-th step]" - }; + plugins::multi::Option disableMeta + = {"disable-meta", + "Disable online gather and write of a global meta file, can be time consuming (use `bpmeta` " + "post-mortem)", + 0u}; - plugins::multi::Option< std::string > source = { - "source", - "data sources: ", - "species_all, fields_all" - }; + /* select MPI method, #OSTs and #aggregators */ + plugins::multi::Option transportParams + = {"transport-params", + "additional transport parameters, see ADIOS manual chapter 6.1.5, e.g., " + "'random_offset=1;stripe_count=4'", + ""}; - plugins::multi::Option< std::string > fileName = { - "file", - "ADIOS output filename (prefix)" - }; + plugins::multi::Option compression + = {"compression", "ADIOS compression method, e.g., zlib (see `adios_config -m` for help)", "none"}; - std::vector< std::string > allowedDataSources = { - "species_all", - "fields_all" - }; + /** defines if the plugin must register itself to the PMacc plugin system + * + * true = the plugin is registering it self + * false = the plugin is not registering itself (plugin is controlled by another class) + */ + bool selfRegister = false; - plugins::multi::Option< uint32_t > numAggregators = { - "aggregators", - "Number of aggregators [0 == number of MPI processes]", - 0u - }; + template + struct CreateSpeciesFilter + { + using type = plugins::misc::SpeciesFilter< + typename pmacc::math::CT::At>::type, + typename pmacc::math::CT::At>::type>; + }; - plugins::multi::Option< uint32_t > numOSTs = { - "ost", - "Number of OST", - 1u - }; + using AllParticlesTimesAllFilters = typename AllCombinations< + bmpl::vector>::type; - plugins::multi::Option< uint32_t > disableMeta = { - "disable-meta", - "Disable online gather and write of a global meta file, can be time consuming (use `bpmeta` post-mortem)", - 0u - }; + using AllSpeciesFilter = + typename bmpl::transform>::type; - /* select MPI method, #OSTs and #aggregators */ - plugins::multi::Option< std::string > transportParams = { - "transport-params", - "additional transport parameters, see ADIOS manual chapter 6.1.5, e.g., 'random_offset=1;stripe_count=4'", - "" - }; + using AllEligibleSpeciesSources = + typename bmpl::copy_if>::type; - plugins::multi::Option< std::string > compression = { - "compression", - "ADIOS compression method, e.g., zlib (see `adios_config -m` for help)", - "none" - }; + using AllFieldSources = FileOutputFields; - /** defines if the plugin must register itself to the PMacc plugin system - * - * true = the plugin is registering it self - * false = the plugin is not registering itself (plugin is controlled by another class) - */ - bool selfRegister = false; + ///! method used by plugin controller to get --help description + void registerHelp( + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) + { + meta::ForEach> + getEligibleDataSourceNames; + getEligibleDataSourceNames(allowedDataSources); - template - struct CreateSpeciesFilter - { - using type = plugins::misc::SpeciesFilter< - typename pmacc::math::CT::At< - T_TupleVector, - bmpl::int_<0> - >::type, - typename pmacc::math::CT::At< - T_TupleVector, - bmpl::int_<1> - >::type - >; - }; + meta::ForEach> appendFieldSourceNames; + appendFieldSourceNames(allowedDataSources); - using AllParticlesTimesAllFilters = typename AllCombinations< - bmpl::vector< - FileOutputParticles, - particles::filter::AllParticleFilters - > - >::type; - - using AllSpeciesFilter = typename bmpl::transform< - AllParticlesTimesAllFilters, - CreateSpeciesFilter< bmpl::_1 > - >::type; - - using AllEligibleSpeciesSources = typename bmpl::copy_if< - AllSpeciesFilter, - plugins::misc::speciesFilter::IsEligible< bmpl::_1 > - >::type; - - using AllFieldSources = FileOutputFields; - - ///! method used by plugin controller to get --help description - void registerHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) - { - meta::ForEach< - AllEligibleSpeciesSources, - plugins::misc::AppendName< bmpl::_1 > - > getEligibleDataSourceNames; - getEligibleDataSourceNames( allowedDataSources ); - - meta::ForEach< - AllFieldSources, - plugins::misc::AppendName< bmpl::_1 > - > appendFieldSourceNames; - appendFieldSourceNames( allowedDataSources ); - - // string list with all possible particle sources - std::string concatenatedSourceNames = plugins::misc::concatenateToString( - allowedDataSources, - ", " - ); - - notifyPeriod.registerHelp( - desc, - masterPrefix + prefix - ); - source.registerHelp( - desc, - masterPrefix + prefix, - std::string( "[" ) + concatenatedSourceNames + "]" - ); - fileName.registerHelp( - desc, - masterPrefix + prefix - ); - - expandHelp(desc, ""); - selfRegister = true; - } + // string list with all possible particle sources + std::string concatenatedSourceNames = plugins::misc::concatenateToString(allowedDataSources, ", "); - void expandHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) - { - numAggregators.registerHelp( - desc, - masterPrefix + prefix - ); - numOSTs.registerHelp( - desc, - masterPrefix + prefix - ); - disableMeta.registerHelp( - desc, - masterPrefix + prefix - ); - transportParams.registerHelp( - desc, - masterPrefix + prefix - ); - compression.registerHelp( - desc, - masterPrefix + prefix - ); - } + notifyPeriod.registerHelp(desc, masterPrefix + prefix); + source.registerHelp(desc, masterPrefix + prefix, std::string("[") + concatenatedSourceNames + "]"); + fileName.registerHelp(desc, masterPrefix + prefix); - void validateOptions() - { - if( selfRegister ) - { - if( notifyPeriod.empty() || fileName.empty() ) - throw std::runtime_error( - name + - ": parameter period and file must be defined" - ); - - // check if user passed data source names are valid - for( auto const & dataSourceNames : source) + expandHelp(desc, ""); + selfRegister = true; + } + + void expandHelp( + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) { - auto vectorOfDataSourceNames = plugins::misc::splitString( - plugins::misc::removeSpaces( dataSourceNames ) - ); + numAggregators.registerHelp(desc, masterPrefix + prefix); + numOSTs.registerHelp(desc, masterPrefix + prefix); + disableMeta.registerHelp(desc, masterPrefix + prefix); + transportParams.registerHelp(desc, masterPrefix + prefix); + compression.registerHelp(desc, masterPrefix + prefix); + } - for( auto const & f : vectorOfDataSourceNames ) + void validateOptions() + { + if(selfRegister) { - if( - !plugins::misc::containsObject( - allowedDataSources, - f - ) - ) + if(notifyPeriod.empty() || fileName.empty()) + throw std::runtime_error(name + ": parameter period and file must be defined"); + + // check if user passed data source names are valid + for(auto const& dataSourceNames : source) { - throw std::runtime_error( name + ": unknown data source '" + f + "'" ); + auto vectorOfDataSourceNames + = plugins::misc::splitString(plugins::misc::removeSpaces(dataSourceNames)); + + for(auto const& f : vectorOfDataSourceNames) + { + if(!plugins::misc::containsObject(allowedDataSources, f)) + { + throw std::runtime_error(name + ": unknown data source '" + f + "'"); + } + } } } } - } - } - size_t getNumPlugins() const - { - if( selfRegister ) - return notifyPeriod.size(); - else - return 1; - } + size_t getNumPlugins() const + { + if(selfRegister) + return notifyPeriod.size(); + else + return 1; + } - std::string getDescription() const - { - return description; - } + std::string getDescription() const + { + return description; + } - std::string getOptionPrefix() const - { - return prefix; - } + std::string getOptionPrefix() const + { + return prefix; + } - std::string getName() const - { - return name; - } + std::string getName() const + { + return name; + } - std::string const name = "ADIOSWriter"; - //! short description of the plugin - std::string const description = "dump simulation data with ADIOS"; - //! prefix used for command line arguments - std::string const prefix = "adios"; - }; + std::string const name = "ADIOSWriter"; + //! short description of the plugin + std::string const description = "dump simulation data with ADIOS"; + //! prefix used for command line arguments + std::string const prefix = "adios"; + }; - //! must be implemented by the user - static std::shared_ptr< plugins::multi::IHelp > getHelp() - { - return std::shared_ptr< plugins::multi::IHelp >( new Help{ } ); - } -private: + //! must be implemented by the user + static std::shared_ptr getHelp() + { + return std::shared_ptr(new Help{}); + } - template - static std::vector createUnit(UnitType unit, uint32_t numComponents) - { - std::vector tmp(numComponents); - for (uint32_t i = 0; i < numComponents; ++i) - tmp[i] = unit[i]; - return tmp; - } - - /** - * Write calculated fields to adios file. - */ - template< typename T_Field > - struct GetFields - { - private: - using ValueType = typename T_Field::ValueType; - using ComponentType = typename GetComponentsType::type; + private: + template + static std::vector createUnit(UnitType unit, uint32_t numComponents) + { + std::vector tmp(numComponents); + for(uint32_t i = 0; i < numComponents; ++i) + tmp[i] = unit[i]; + return tmp; + } - public: + /** + * Write calculated fields to adios file. + */ + template + struct GetFields + { + private: + using ValueType = typename T_Field::ValueType; + using ComponentType = typename GetComponentsType::type; - HDINLINE void operator()(ThreadParams* params) - { + public: + HDINLINE void operator()(ThreadParams* params) + { #ifndef __CUDA_ARCH__ - DataConnector &dc = Environment::get().DataConnector(); - - auto field = dc.get< T_Field >( T_Field::getName() ); - params->gridLayout = field->getGridLayout(); - const bool isDomainBound = traits::IsFieldDomainBound< T_Field >::value; - - PICToAdios adiosType; - ADIOSWriter::template writeField( - params, - sizeof(ComponentType), - adiosType.type, - GetNComponents::value, - T_Field::getName(), - field->getHostDataBox().getPointer(), - isDomainBound - ); - - dc.releaseData( T_Field::getName() ); + DataConnector& dc = Environment::get().DataConnector(); + + auto field = dc.get(T_Field::getName()); + params->gridLayout = field->getGridLayout(); + const bool isDomainBound = traits::IsFieldDomainBound::value; + + PICToAdios adiosType; + ADIOSWriter::template writeField( + params, + sizeof(ComponentType), + adiosType.type, + GetNComponents::value, + T_Field::getName(), + field->getHostDataBox().getPointer(), + isDomainBound); + + dc.releaseData(T_Field::getName()); #endif - } + } + }; - }; + /** Calculate FieldTmp with given solver and particle species + * and write them to adios. + * + * FieldTmp is calculated on device and than dumped to adios. + */ + template + struct GetFields> + { + /* + * This is only a wrapper function to allow disable nvcc warnings. + * Warning: calling a __host__ function from __host__ __device__ + * function. + * Use of PMACC_NO_NVCC_HDWARNING is not possible if we call a virtual + * method inside of the method were we disable the warnings. + * Therefore we create this method and call a new method were we can + * call virtual functions. + */ + PMACC_NO_NVCC_HDWARNING + HDINLINE void operator()(ThreadParams* tparam) + { + this->operator_impl(tparam); + } - /** Calculate FieldTmp with given solver and particle species - * and write them to adios. - * - * FieldTmp is calculated on device and than dumped to adios. - */ - template< typename Solver, typename Species > - struct GetFields > - { + private: + typedef typename FieldTmp::ValueType ValueType; + typedef typename GetComponentsType::type ComponentType; - /* - * This is only a wrapper function to allow disable nvcc warnings. - * Warning: calling a __host__ function from __host__ __device__ - * function. - * Use of PMACC_NO_NVCC_HDWARNING is not possible if we call a virtual - * method inside of the method were we disable the warnings. - * Therefore we create this method and call a new method were we can - * call virtual functions. - */ - PMACC_NO_NVCC_HDWARNING - HDINLINE void operator()(ThreadParams* tparam) - { - this->operator_impl(tparam); - } - private: - typedef typename FieldTmp::ValueType ValueType; - typedef typename GetComponentsType::type ComponentType; + /** Create a name for the adios identifier. + */ + static std::string getName() + { + return FieldTmpOperation::getName(); + } - /** Create a name for the adios identifier. - */ - static std::string getName() - { - return FieldTmpOperation::getName(); - } + HINLINE void operator_impl(ThreadParams* params) + { + DataConnector& dc = Environment<>::get().DataConnector(); + + /*## update field ##*/ + + /*load FieldTmp without copy data to host*/ + PMACC_CASSERT_MSG(_please_allocate_at_least_one_FieldTmp_in_memory_param, fieldTmpNumSlots > 0); + auto fieldTmp = dc.get(FieldTmp::getUniqueId(0), true); + /*load particle without copy particle data to host*/ + auto speciesTmp = dc.get(Species::FrameType::getName(), true); + + fieldTmp->getGridBuffer().getDeviceBuffer().setValue(ValueType::create(0.0)); + /*run algorithm*/ + fieldTmp->template computeValue(*speciesTmp, params->currentStep); + + EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); + __setTransactionEvent(fieldTmpEvent); + /* copy data to host that we can write same to disk*/ + fieldTmp->getGridBuffer().deviceToHost(); + dc.releaseData(Species::FrameType::getName()); + /*## finish update field ##*/ + + const uint32_t components = GetNComponents::value; + PICToAdios adiosType; + + params->gridLayout = fieldTmp->getGridLayout(); + const bool isDomainBound = traits::IsFieldDomainBound::value; + /*write data to ADIOS file*/ + ADIOSWriter::template writeField( + params, + sizeof(ComponentType), + adiosType.type, + components, + getName(), + fieldTmp->getHostDataBox().getPointer(), + isDomainBound); + + dc.releaseData(FieldTmp::getUniqueId(0)); + } + }; + + template + static void defineFieldVar( + ThreadParams* params, + uint32_t nComponents, + ADIOS_DATATYPES adiosType, + const std::string name, + std::vector unit, + std::vector unitDimension, + std::vector> inCellPosition, + float_X timeOffset) + { + PICToAdios adiosDoubleType; + PICToAdios adiosFloatXType; - HINLINE void operator_impl(ThreadParams* params) - { - DataConnector &dc = Environment<>::get().DataConnector(); - - /*## update field ##*/ - - /*load FieldTmp without copy data to host*/ - PMACC_CASSERT_MSG( - _please_allocate_at_least_one_FieldTmp_in_memory_param, - fieldTmpNumSlots > 0 - ); - auto fieldTmp = dc.get< FieldTmp >( FieldTmp::getUniqueId( 0 ), true ); - /*load particle without copy particle data to host*/ - auto speciesTmp = dc.get< Species >( Species::FrameType::getName(), true ); - - fieldTmp->getGridBuffer().getDeviceBuffer().setValue(ValueType::create(0.0)); - /*run algorithm*/ - fieldTmp->template computeValue< CORE + BORDER, Solver >(*speciesTmp, params->currentStep); - - EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); - __setTransactionEvent(fieldTmpEvent); - /* copy data to host that we can write same to disk*/ - fieldTmp->getGridBuffer().deviceToHost(); - dc.releaseData(Species::FrameType::getName()); - /*## finish update field ##*/ - - const uint32_t components = GetNComponents::value; - PICToAdios adiosType; - - params->gridLayout = fieldTmp->getGridLayout(); - const bool isDomainBound = traits::IsFieldDomainBound< FieldTmp >::value; - /*write data to ADIOS file*/ - ADIOSWriter::template writeField( - params, - sizeof(ComponentType), - adiosType.type, - components, - getName(), - fieldTmp->getHostDataBox().getPointer(), - isDomainBound - ); - - dc.releaseData( FieldTmp::getUniqueId( 0 ) ); + auto const componentNames = plugins::misc::getComponentNames(nComponents); - } + /* parameter checking */ + PMACC_ASSERT(unit.size() == nComponents); + PMACC_ASSERT(inCellPosition.size() == nComponents); + for(uint32_t n = 0; n < nComponents; ++n) + PMACC_ASSERT(inCellPosition.at(n).size() == simDim); + PMACC_ASSERT(unitDimension.size() == 7); // seven openPMD base units - }; + const std::string recordName(params->adiosBasePath + std::string(ADIOS_PATH_FIELDS) + name); - template< typename T_Field > - static void defineFieldVar(ThreadParams* params, - uint32_t nComponents, ADIOS_DATATYPES adiosType, const std::string name, - std::vector unit, std::vector unitDimension, - std::vector > inCellPosition, float_X timeOffset) - { - PICToAdios adiosDoubleType; - PICToAdios adiosFloatXType; + auto fieldsSizeDims = params->fieldsSizeDims; + auto fieldsGlobalSizeDims = params->fieldsGlobalSizeDims; + auto fieldsOffsetDims = params->fieldsOffsetDims; - auto const componentNames = plugins::misc::getComponentNames( nComponents ); + /* Patch for non-domain-bound fields + * This is an ugly fix to allow output of reduced 1d PML buffers + */ + if(!traits::IsFieldDomainBound::value) + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto field = dc.get(T_Field::getName()); + fieldsSizeDims = precisionCast(field->getGridLayout().getDataSpaceWithoutGuarding()); + dc.releaseData(T_Field::getName()); + + /* Scan the PML buffer local size along all local domains + * This code is based on the same operation in hdf5::Field::writeField(), + * the same comments apply here + */ + log("ADIOS: (begin) collect PML sizes for %1%") % name; + auto& gridController = Environment::get().GridController(); + auto const numRanks = uint64_t{gridController.getGlobalSize()}; + /* Use domain position-based rank, not MPI rank, to be independent + * of the MPI rank assignment scheme + */ + auto const rank = uint64_t{gridController.getScalarPosition()}; + std::vector localSizes(2u * numRanks, 0u); + uint64_t localSizeInfo[2] = {fieldsSizeDims[0], rank}; + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather( + localSizeInfo, + 2, + MPI_UINT64_T, + &(*localSizes.begin()), + 2, + MPI_UINT64_T, + gridController.getCommunicator().getMPIComm())); + uint64_t globalOffsetFile = 0; + uint64_t globalSize = 0; + for(uint64_t r = 0; r < numRanks; ++r) + { + globalSize += localSizes.at(2u * r); + if(localSizes.at(2u * r + 1u) < rank) + globalOffsetFile += localSizes.at(2u * r); + } + log("ADIOS: (end) collect PML sizes for %1%") % name; - /* parameter checking */ - PMACC_ASSERT( unit.size() == nComponents ); - PMACC_ASSERT( inCellPosition.size() == nComponents ); - for( uint32_t n = 0; n < nComponents; ++n ) - PMACC_ASSERT( inCellPosition.at(n).size() == simDim ); - PMACC_ASSERT(unitDimension.size() == 7); // seven openPMD base units + fieldsGlobalSizeDims = pmacc::math::UInt64::create(1); + fieldsGlobalSizeDims[0] = globalSize; + fieldsOffsetDims = pmacc::math::UInt64::create(0); + fieldsOffsetDims[0] = globalOffsetFile; + } - const std::string recordName( params->adiosBasePath + - std::string(ADIOS_PATH_FIELDS) + name ); + for(uint32_t c = 0; c < nComponents; c++) + { + std::string datasetName = recordName; + if(nComponents > 1) + datasetName += "/" + componentNames[c]; + + /* define adios var for field, e.g. field_FieldE_y */ + const char* path = nullptr; + int64_t adiosFieldVarId = defineAdiosVar( + params->adiosGroupHandle, + datasetName.c_str(), + path, + adiosType, + fieldsSizeDims, + fieldsGlobalSizeDims, + fieldsOffsetDims, + true, + params->adiosCompression); + + params->adiosFieldVarIds.push_back(adiosFieldVarId); + + /* already add the unitSI and further attribute so `adios_group_size` + * calculates the reservation for the buffer correctly */ + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "position", + datasetName.c_str(), + adiosFloatXType.type, + simDim, + &(*inCellPosition.at(c).begin()))); + + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "unitSI", + datasetName.c_str(), + adiosDoubleType.type, + 1, + &unit.at(c))); + } - auto fieldsSizeDims = params->fieldsSizeDims; - auto fieldsGlobalSizeDims = params->fieldsGlobalSizeDims; - auto fieldsOffsetDims = params->fieldsOffsetDims; + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "unitDimension", + recordName.c_str(), + adiosDoubleType.type, + 7, + &(*unitDimension.begin()))); - /* Patch for non-domain-bound fields - * This is an ugly fix to allow output of reduced 1d PML buffers, - * that are the same size on each domain. - * This code is to be replaced with the openPMD output plugin soon. - */ - if( !traits::IsFieldDomainBound< T_Field >::value ) - { - DataConnector &dc = Environment<>::get().DataConnector(); - auto field = dc.get< T_Field >( T_Field::getName() ); - fieldsSizeDims = precisionCast< uint64_t >( field->getGridLayout().getDataSpaceWithoutGuarding() ); - dc.releaseData( T_Field::getName() ); - auto const & gridController = Environment::get().GridController(); - auto const numRanks = gridController.getGlobalSize(); - auto const rank = gridController.getGlobalRank(); - fieldsGlobalSizeDims = pmacc::math::UInt64::create( 1 ); - fieldsGlobalSizeDims[ 0 ] = numRanks * fieldsSizeDims[ 0 ]; - fieldsOffsetDims = pmacc::math::UInt64::create( 0 ); - fieldsOffsetDims[ 0 ] = rank * fieldsSizeDims[ 0 ]; - } + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "timeOffset", + recordName.c_str(), + adiosFloatXType.type, + 1, + &timeOffset)); + + const std::string geometry("cartesian"); + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "geometry", + recordName.c_str(), + adios_string, + 1, + (void*) geometry.c_str())); + + const std::string dataOrder("C"); + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "dataOrder", + recordName.c_str(), + adios_string, + 1, + (void*) dataOrder.c_str())); - for( uint32_t c = 0; c < nComponents; c++ ) - { - std::string datasetName = recordName; - if (nComponents > 1) - datasetName += "/" + componentNames[c]; + if(simDim == DIM2) + { + const char* axisLabels[] = {"y", "x"}; // 2D: F[y][x] + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "axisLabels", + recordName.c_str(), + adios_string_array, + simDim, + axisLabels)); + } + if(simDim == DIM3) + { + const char* axisLabels[] = {"z", "y", "x"}; // 3D: F[z][y][x] + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "axisLabels", + recordName.c_str(), + adios_string_array, + simDim, + axisLabels)); + } - /* define adios var for field, e.g. field_FieldE_y */ - const char* path = nullptr; - int64_t adiosFieldVarId = defineAdiosVar( - params->adiosGroupHandle, - datasetName.c_str(), - path, - adiosType, - fieldsSizeDims, - fieldsGlobalSizeDims, - fieldsOffsetDims, - true, - params->adiosCompression); - - params->adiosFieldVarIds.push_back(adiosFieldVarId); - - /* already add the unitSI and further attribute so `adios_group_size` - * calculates the reservation for the buffer correctly */ - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "position", datasetName.c_str(), - adiosFloatXType.type, simDim, &(*inCellPosition.at(c).begin()) )); - - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "unitSI", datasetName.c_str(), - adiosDoubleType.type, 1, &unit.at(c) )); - } + // cellSize is {x, y, z} but fields are F[z][y][x] + std::vector gridSpacing(simDim, 0.0); + for(uint32_t d = 0; d < simDim; ++d) + gridSpacing.at(simDim - 1 - d) = cellSize[d]; - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "unitDimension", recordName.c_str(), - adiosDoubleType.type, 7, &(*unitDimension.begin()) )); + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "gridSpacing", + recordName.c_str(), + adiosFloatXType.type, + simDim, + &(*gridSpacing.begin()))); + + /* globalSlideOffset due to gpu slides between origin at time step 0 + * and origin at current time step + * ATTENTION: splash offset are globalSlideOffset + picongpu offsets + */ + DataSpace globalSlideOffset; + const pmacc::Selection localDomain = Environment::get().SubGrid().getLocalDomain(); + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(params->currentStep); + globalSlideOffset.y() += numSlides * localDomain.size.y(); + + // globalDimensions is {x, y, z} but fields are F[z][y][x] + std::vector gridGlobalOffset(simDim, 0.0); + for(uint32_t d = 0; d < simDim; ++d) + gridGlobalOffset.at(simDim - 1 - d) = float_64(cellSize[d]) + * float_64(params->window.globalDimensions.offset[d] + globalSlideOffset[d]); + + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "gridGlobalOffset", + recordName.c_str(), + adiosDoubleType.type, + simDim, + &(*gridGlobalOffset.begin()))); - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "timeOffset", recordName.c_str(), - adiosFloatXType.type, 1, &timeOffset )); + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "gridUnitSI", + recordName.c_str(), + adiosDoubleType.type, + 1, + (void*) &UNIT_LENGTH)); + + const std::string fieldSmoothing("none"); + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "fieldSmoothing", + recordName.c_str(), + adios_string, + 1, + (void*) fieldSmoothing.c_str())); + } - const std::string geometry( "cartesian" ); - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "geometry", recordName.c_str(), - adios_string, 1, (void*)geometry.c_str() )); + /** + * Collect field sizes to set adios group size. + */ + template + struct CollectFieldsSizes + { + public: + typedef typename T::ValueType ValueType; + typedef typename T::UnitValueType UnitType; + typedef typename GetComponentsType::type ComponentType; - const std::string dataOrder( "C" ); - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "dataOrder", recordName.c_str(), - adios_string, 1, (void*)dataOrder.c_str() )); + static std::vector getUnit() + { + UnitType unit = T::getUnit(); + return createUnit(unit, T::numComponents); + } - if( simDim == DIM2 ) - { - const char* axisLabels[] = {"y", "x"}; // 2D: F[y][x] - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "axisLabels", recordName.c_str(), - adios_string_array, simDim, axisLabels )); - } - if( simDim == DIM3 ) - { - const char* axisLabels[] = {"z", "y", "x"}; // 3D: F[z][y][x] - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "axisLabels", recordName.c_str(), - adios_string_array, simDim, axisLabels )); - } + HDINLINE void operator()(ThreadParams* params) + { +#ifndef __CUDA_ARCH__ + const uint32_t components = T::numComponents; + + auto localSize = params->window.localDimensions.size; + /* Patch for non-domain-bound fields + * This is an ugly fix to allow output of reduced 1d PML buffers, + * that are the same size on each domain. + * This code is to be replaced with the openPMD output plugin soon. + */ + if(!traits::IsFieldDomainBound::value) + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto field = dc.get(T::getName()); + localSize = field->getGridLayout().getDataSpaceWithoutGuarding(); + dc.releaseData(T::getName()); + } - // cellSize is {x, y, z} but fields are F[z][y][x] - std::vector gridSpacing(simDim, 0.0); - for( uint32_t d = 0; d < simDim; ++d ) - gridSpacing.at(simDim-1-d) = cellSize[d]; + // adios buffer size for this dataset (all components) + uint64_t localGroupSize = localSize.productOfComponents() * sizeof(ComponentType) * components; - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "gridSpacing", recordName.c_str(), - adiosFloatXType.type, simDim, &(*gridSpacing.begin()) )); + params->adiosGroupSize += localGroupSize; - /* globalSlideOffset due to gpu slides between origin at time step 0 - * and origin at current time step - * ATTENTION: splash offset are globalSlideOffset + picongpu offsets - */ - DataSpace globalSlideOffset; - const pmacc::Selection& localDomain = Environment::get().SubGrid().getLocalDomain(); - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(params->currentStep); - globalSlideOffset.y() += numSlides * localDomain.size.y(); - - // globalDimensions is {x, y, z} but fields are F[z][y][x] - std::vector gridGlobalOffset(simDim, 0.0); - for( uint32_t d = 0; d < simDim; ++d ) - gridGlobalOffset.at(simDim-1-d) = - float_64(cellSize[d]) * - float_64(params->window.globalDimensions.offset[d] + - globalSlideOffset[d]); - - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "gridGlobalOffset", recordName.c_str(), - adiosDoubleType.type, simDim, &(*gridGlobalOffset.begin()) )); - - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "gridUnitSI", recordName.c_str(), - adiosDoubleType.type, 1, (void*)&UNIT_LENGTH )); - - const std::string fieldSmoothing( "none" ); - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "fieldSmoothing", recordName.c_str(), - adios_string, 1, (void*)fieldSmoothing.c_str() )); - } - - /** - * Collect field sizes to set adios group size. - */ - template< typename T > - struct CollectFieldsSizes - { - public: - typedef typename T::ValueType ValueType; - typedef typename T::UnitValueType UnitType; - typedef typename GetComponentsType::type ComponentType; + // convert in a std::vector of std::vector format for writeField API + const traits::FieldPosition fieldPos; - static std::vector getUnit() - { - UnitType unit = T::getUnit(); - return createUnit(unit, T::numComponents); - } + std::vector> inCellPosition; + for(uint32_t n = 0; n < T::numComponents; ++n) + { + std::vector inCellPositonComponent; + for(uint32_t d = 0; d < simDim; ++d) + inCellPositonComponent.push_back(fieldPos()[n][d]); + inCellPosition.push_back(inCellPositonComponent); + } - HDINLINE void operator()(ThreadParams* params) - { -#ifndef __CUDA_ARCH__ - const uint32_t components = T::numComponents; + /** \todo check if always correct at this point, depends on solver + * implementation */ + const float_X timeOffset = 0.0; + + PICToAdios adiosType; + defineFieldVar( + params, + components, + adiosType.type, + T::getName(), + getUnit(), + T::getUnitDimension(), + inCellPosition, + timeOffset); +#endif + } + }; - auto localSize = params->window.localDimensions.size; - /* Patch for non-domain-bound fields - * This is an ugly fix to allow output of reduced 1d PML buffers, - * that are the same size on each domain. - * This code is to be replaced with the openPMD output plugin soon. + /** + * Collect field sizes to set adios group size. + * Specialization. */ - if( !traits::IsFieldDomainBound< T >::value ) + template + struct CollectFieldsSizes> { - DataConnector &dc = Environment<>::get().DataConnector(); - auto field = dc.get< T >( T::getName() ); - localSize = field->getGridLayout().getDataSpaceWithoutGuarding(); - dc.releaseData( T::getName() ); - } - - // adios buffer size for this dataset (all components) - uint64_t localGroupSize = - localSize.productOfComponents() * - sizeof(ComponentType) * - components; - - params->adiosGroupSize += localGroupSize; + public: + PMACC_NO_NVCC_HDWARNING + HDINLINE void operator()(ThreadParams* tparam) + { + this->operator_impl(tparam); + } - // convert in a std::vector of std::vector format for writeField API - const traits::FieldPosition fieldPos; + private: + typedef typename FieldTmp::ValueType ValueType; + typedef typename FieldTmp::UnitValueType UnitType; + typedef typename GetComponentsType::type ComponentType; - std::vector > inCellPosition; - for( uint32_t n = 0; n < T::numComponents; ++n ) - { - std::vector inCellPositonComponent; - for( uint32_t d = 0; d < simDim; ++d ) - inCellPositonComponent.push_back( fieldPos()[n][d] ); - inCellPosition.push_back( inCellPositonComponent ); - } + /** Create a name for the adios identifier. + */ + static std::string getName() + { + return FieldTmpOperation::getName(); + } - /** \todo check if always correct at this point, depends on solver - * implementation */ - const float_X timeOffset = 0.0; + /** Get the unit for the result from the solver*/ + static std::vector getUnit() + { + UnitType unit = FieldTmp::getUnit(); + const uint32_t components = GetNComponents::value; + return createUnit(unit, components); + } - PICToAdios adiosType; - defineFieldVar< T >(params, components, adiosType.type, T::getName(), getUnit(), - T::getUnitDimension(), inCellPosition, timeOffset); -#endif - } - }; - - /** - * Collect field sizes to set adios group size. - * Specialization. - */ - template< typename Solver, typename Species > - struct CollectFieldsSizes > - { - public: + HINLINE void operator_impl(ThreadParams* params) + { + const uint32_t components = GetNComponents::value; + + auto localSize = params->window.localDimensions.size; + /* Patch for non-domain-bound fields + * This is an ugly fix to allow output of reduced 1d PML buffers, + * that are the same size on each domain. + * This code is to be replaced with the openPMD output plugin soon. + */ + if(!traits::IsFieldDomainBound::value) + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto field = dc.get(FieldTmp::getName()); + localSize = field->getGridLayout().getDataSpaceWithoutGuarding(); + dc.releaseData(FieldTmp::getName()); + } - PMACC_NO_NVCC_HDWARNING - HDINLINE void operator()(ThreadParams* tparam) - { - this->operator_impl(tparam); - } + // adios buffer size for this dataset (all components) + uint64_t localGroupSize = localSize.productOfComponents() * sizeof(ComponentType) * components; + + params->adiosGroupSize += localGroupSize; + + /*wrap in a one-component vector for writeField API*/ + const traits::FieldPosition fieldPos; + + std::vector> inCellPosition; + std::vector inCellPositonComponent; + for(uint32_t d = 0; d < simDim; ++d) + inCellPositonComponent.push_back(fieldPos()[0][d]); + inCellPosition.push_back(inCellPositonComponent); + + /** \todo check if always correct at this point, depends on solver + * implementation */ + const float_X timeOffset = 0.0; + + PICToAdios adiosType; + defineFieldVar( + params, + components, + adiosType.type, + getName(), + getUnit(), + FieldTmp::getUnitDimension(), + inCellPosition, + timeOffset); + } + }; - private: - typedef typename FieldTmp::ValueType ValueType; - typedef typename FieldTmp::UnitValueType UnitType; - typedef typename GetComponentsType::type ComponentType; + public: + /** constructor + * + * @param help instance of the class Help + * @param id index of this plugin instance within help + * @param cellDescription PIConGPu cell description information for kernel index mapping + */ + ADIOSWriter(std::shared_ptr& help, size_t const id, MappingDesc* cellDescription) + : m_help(std::static_pointer_cast(help)) + , m_id(id) + , m_cellDescription(cellDescription) + , outputDirectory("bp") + , lastSpeciesSyncStep(pmacc::traits::limits::Max::value) + { + mThreadParams.adiosAggregators = m_help->numAggregators.get(id); + mThreadParams.adiosOST = m_help->numOSTs.get(id); + mThreadParams.adiosDisableMeta = m_help->disableMeta.get(id); + mThreadParams.adiosTransportParams = m_help->transportParams.get(id); + mThreadParams.adiosCompression = m_help->compression.get(id); + + GridController& gc = Environment::get().GridController(); + /* It is important that we never change the mpi_pos after this point + * because we get problems with the restart. + * Otherwise we do not know which gpu must load the ghost parts around + * the sliding window. + */ + mpi_pos = gc.getPosition(); + mpi_size = gc.getGpuNodes(); + + /* if number of aggregators is not set we use all mpi process as aggregator*/ + if(mThreadParams.adiosAggregators == 0) + mThreadParams.adiosAggregators = mpi_size.productOfComponents(); + + if(m_help->selfRegister) + { + std::string notifyPeriod = m_help->notifyPeriod.get(id); + /* only register for notify callback when .period is set on command line */ + if(!notifyPeriod.empty()) + { + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); - /** Create a name for the adios identifier. - */ - static std::string getName() - { - return FieldTmpOperation::getName(); - } + /** create notify directory */ + Environment::get().Filesystem().createDirectoryWithPermissions(outputDirectory); + } + } - /** Get the unit for the result from the solver*/ - static std::vector getUnit() - { - UnitType unit = FieldTmp::getUnit(); - const uint32_t components = GetNComponents::value; - return createUnit(unit, components); - } + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + /* Initialize adios library */ + mThreadParams.adiosComm = MPI_COMM_NULL; + MPI_CHECK(MPI_Comm_dup(gc.getCommunicator().getMPIComm(), &(mThreadParams.adiosComm))); + mThreadParams.adiosBufferInitialized = false; + + /* select MPI method, #OSTs and #aggregators */ + std::stringstream strMPITransportParams; + strMPITransportParams << "num_aggregators=" << mThreadParams.adiosAggregators + << ";num_ost=" << mThreadParams.adiosOST; + /* create meta file offline/post-mortem with bpmeta */ + if(mThreadParams.adiosDisableMeta) + strMPITransportParams << ";have_metadata_file=0"; + /* additional, uncovered transport parameters, e.g., + * use system-defaults for striping per aggregated file */ + if(!mThreadParams.adiosTransportParams.empty()) + strMPITransportParams << ";" << mThreadParams.adiosTransportParams; + + mpiTransportParams = strMPITransportParams.str(); + } - HINLINE void operator_impl(ThreadParams* params) - { - const uint32_t components = GetNComponents::value; - - auto localSize = params->window.localDimensions.size; - /* Patch for non-domain-bound fields - * This is an ugly fix to allow output of reduced 1d PML buffers, - * that are the same size on each domain. - * This code is to be replaced with the openPMD output plugin soon. - */ - if( !traits::IsFieldDomainBound< FieldTmp >::value ) + virtual ~ADIOSWriter() { - DataConnector &dc = Environment<>::get().DataConnector(); - auto field = dc.get< FieldTmp >( FieldTmp::getName() ); - localSize = field->getGridLayout().getDataSpaceWithoutGuarding(); - dc.releaseData( FieldTmp::getName() ); + if(mThreadParams.adiosComm != MPI_COMM_NULL) + { + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK_NO_EXCEPT(MPI_Comm_free(&(mThreadParams.adiosComm))); + } } - // adios buffer size for this dataset (all components) - uint64_t localGroupSize = - localSize.productOfComponents() * - sizeof(ComponentType) * - components; - - params->adiosGroupSize += localGroupSize; - - /*wrap in a one-component vector for writeField API*/ - const traits::FieldPosition - fieldPos; - - std::vector > inCellPosition; - std::vector inCellPositonComponent; - for( uint32_t d = 0; d < simDim; ++d ) - inCellPositonComponent.push_back( fieldPos()[0][d] ); - inCellPosition.push_back( inCellPositonComponent ); - - /** \todo check if always correct at this point, depends on solver - * implementation */ - const float_X timeOffset = 0.0; + void notify(uint32_t currentStep) + { + // notify is only allowed if the plugin is not controlled by the class Checkpoint + assert(m_help->selfRegister); - PICToAdios adiosType; - defineFieldVar< FieldTmp >(params, components, adiosType.type, getName(), getUnit(), - FieldTmp::getUnitDimension(), inCellPosition, timeOffset); - } + __getTransactionEvent().waitForFinished(); - }; - -public: - - /** constructor - * - * @param help instance of the class Help - * @param id index of this plugin instance within help - * @param cellDescription PIConGPu cell description information for kernel index mapping - */ - ADIOSWriter( - std::shared_ptr< plugins::multi::IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) : - m_help( std::static_pointer_cast< Help >(help) ), - m_id( id ), - m_cellDescription( cellDescription ), - outputDirectory("bp"), - lastSpeciesSyncStep(pmacc::traits::limits::Max::value) - { + std::string filename = m_help->fileName.get(m_id); - mThreadParams.adiosAggregators = m_help->numAggregators.get( id ); - mThreadParams.adiosOST = m_help->numOSTs.get( id ); - mThreadParams.adiosDisableMeta = m_help->disableMeta.get( id ); - mThreadParams.adiosTransportParams = m_help->transportParams.get( id ); - mThreadParams.adiosCompression = m_help->compression.get( id ); - - GridController &gc = Environment::get().GridController(); - /* It is important that we never change the mpi_pos after this point - * because we get problems with the restart. - * Otherwise we do not know which gpu must load the ghost parts around - * the sliding window. - */ - mpi_pos = gc.getPosition(); - mpi_size = gc.getGpuNodes(); + /* if file name is relative, prepend with common directory */ + if(boost::filesystem::path(filename).has_root_path()) + mThreadParams.adiosFilename = filename; + else + mThreadParams.adiosFilename = outputDirectory + "/" + filename; - /* if number of aggregators is not set we use all mpi process as aggregator*/ - if( mThreadParams.adiosAggregators == 0 ) - mThreadParams.adiosAggregators=mpi_size.productOfComponents(); + /* window selection */ + mThreadParams.window = MovingWindow::getInstance().getWindow(currentStep); + mThreadParams.isCheckpoint = false; + dumpData(currentStep); + } - if( m_help->selfRegister ) - { - std::string notifyPeriod = m_help->notifyPeriod.get( id ); - /* only register for notify callback when .period is set on command line */ - if(!notifyPeriod.empty()) + virtual void restart(uint32_t restartStep, std::string const& restartDirectory) { - Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); - - /** create notify directory */ - Environment::get().Filesystem().createDirectoryWithPermissions(outputDirectory); + /* ISlave restart interface is not needed becase IIOBackend + * restart interface is used + */ } - } - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - /* Initialize adios library */ - mThreadParams.adiosComm = MPI_COMM_NULL; - MPI_CHECK(MPI_Comm_dup(gc.getCommunicator().getMPIComm(), &(mThreadParams.adiosComm))); - mThreadParams.adiosBufferInitialized = false; - - /* select MPI method, #OSTs and #aggregators */ - std::stringstream strMPITransportParams; - strMPITransportParams << "num_aggregators=" << mThreadParams.adiosAggregators - << ";num_ost=" << mThreadParams.adiosOST; - /* create meta file offline/post-mortem with bpmeta */ - if( mThreadParams.adiosDisableMeta ) - strMPITransportParams << ";have_metadata_file=0"; - /* additional, uncovered transport parameters, e.g., - * use system-defaults for striping per aggregated file */ - if( ! mThreadParams.adiosTransportParams.empty() ) - strMPITransportParams << ";" << mThreadParams.adiosTransportParams; - - mpiTransportParams = strMPITransportParams.str(); - } - - virtual ~ADIOSWriter() - { - if (mThreadParams.adiosComm != MPI_COMM_NULL) - { - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK_NO_EXCEPT(MPI_Comm_free(&(mThreadParams.adiosComm))); - } - } - - void notify(uint32_t currentStep) - { - // notify is only allowed if the plugin is not controlled by the class Checkpoint - assert( m_help->selfRegister ); - __getTransactionEvent().waitForFinished(); - - std::string filename = m_help->fileName.get( m_id ); - - /* if file name is relative, prepend with common directory */ - if( boost::filesystem::path(filename).has_root_path() ) - mThreadParams.adiosFilename = filename; - else - mThreadParams.adiosFilename = outputDirectory + "/" + filename; + virtual void checkpoint(uint32_t currentStep, std::string const& checkpointDirectory) + { + /* ISlave checkpoint interface is not needed becase IIOBackend + * checkpoint interface is used + */ + } - /* window selection */ - mThreadParams.window = MovingWindow::getInstance().getWindow(currentStep); - mThreadParams.isCheckpoint = false; - dumpData(currentStep); - } + void dumpCheckpoint( + const uint32_t currentStep, + const std::string& checkpointDirectory, + const std::string& checkpointFilename) + { + // checkpointing is only allowed if the plugin is controlled by the class Checkpoint + assert(!m_help->selfRegister); - virtual void restart( - uint32_t restartStep, - std::string const & restartDirectory - ) - { - /* ISlave restart interface is not needed becase IIOBackend - * restart interface is used - */ - } + __getTransactionEvent().waitForFinished(); + /* if file name is relative, prepend with common directory */ + if(boost::filesystem::path(checkpointFilename).has_root_path()) + mThreadParams.adiosFilename = checkpointFilename; + else + mThreadParams.adiosFilename = checkpointDirectory + "/" + checkpointFilename; - virtual void checkpoint( - uint32_t currentStep, - std::string const & checkpointDirectory - ) - { - /* ISlave checkpoint interface is not needed becase IIOBackend - * checkpoint interface is used - */ - } + mThreadParams.window = MovingWindow::getInstance().getDomainAsWindow(currentStep); + mThreadParams.isCheckpoint = true; - void dumpCheckpoint( - const uint32_t currentStep, - const std::string& checkpointDirectory, - const std::string& checkpointFilename - ) - { - // checkpointing is only allowed if the plugin is controlled by the class Checkpoint - assert(!m_help->selfRegister); - - __getTransactionEvent().waitForFinished(); - /* if file name is relative, prepend with common directory */ - if( boost::filesystem::path(checkpointFilename).has_root_path() ) - mThreadParams.adiosFilename = checkpointFilename; - else - mThreadParams.adiosFilename = checkpointDirectory + "/" + checkpointFilename; - - mThreadParams.window = MovingWindow::getInstance().getDomainAsWindow(currentStep); - mThreadParams.isCheckpoint = true; - - dumpData(currentStep); - } - - void doRestart( - const uint32_t restartStep, - const std::string& restartDirectory, - const std::string& constRestartFilename, - const uint32_t restartChunkSize - ) - { - // restart is only allowed if the plugin is controlled by the class Checkpoint - assert(!m_help->selfRegister); - - // allow to modify the restart file name - std::string restartFilename{ constRestartFilename }; - - std::stringstream adiosPathBase; - adiosPathBase << ADIOS_PATH_ROOT << restartStep << "/"; - mThreadParams.adiosBasePath = adiosPathBase.str(); - //mThreadParams.isCheckpoint = isCheckpoint; - mThreadParams.currentStep = restartStep; - mThreadParams.cellDescription = m_cellDescription; - - /** one could try ADIOS_READ_METHOD_BP_AGGREGATE too which might - * be beneficial for re-distribution on a different number of GPUs - * would need: - `export chunk_size=SIZE # in MB` - * - `mpiTransportParams.c_str()` in `adios_read_init_method` - */ - ADIOS_CMD(adios_read_init_method(ADIOS_READ_METHOD_BP, - mThreadParams.adiosComm, - "verbose=3;abort_on_error;")); + dumpData(currentStep); + } - /* if restartFilename is relative, prepend with restartDirectory */ - if (!boost::filesystem::path(restartFilename).has_root_path()) - { - restartFilename = restartDirectory + std::string("/") + restartFilename; - } + void doRestart( + const uint32_t restartStep, + const std::string& restartDirectory, + const std::string& constRestartFilename, + const uint32_t restartChunkSize) + { + // restart is only allowed if the plugin is controlled by the class Checkpoint + assert(!m_help->selfRegister); + + // allow to modify the restart file name + std::string restartFilename{constRestartFilename}; + + std::stringstream adiosPathBase; + adiosPathBase << ADIOS_PATH_ROOT << restartStep << "/"; + mThreadParams.adiosBasePath = adiosPathBase.str(); + // mThreadParams.isCheckpoint = isCheckpoint; + mThreadParams.currentStep = restartStep; + mThreadParams.cellDescription = m_cellDescription; + + /** one could try ADIOS_READ_METHOD_BP_AGGREGATE too which might + * be beneficial for re-distribution on a different number of GPUs + * would need: - `export chunk_size=SIZE # in MB` + * - `mpiTransportParams.c_str()` in `adios_read_init_method` + */ + ADIOS_CMD(adios_read_init_method( + ADIOS_READ_METHOD_BP, + mThreadParams.adiosComm, + "verbose=3;abort_on_error;")); + + /* if restartFilename is relative, prepend with restartDirectory */ + if(!boost::filesystem::path(restartFilename).has_root_path()) + { + restartFilename = restartDirectory + std::string("/") + restartFilename; + } - std::stringstream strFname; - strFname << restartFilename << "_" << mThreadParams.currentStep << ".bp"; + std::stringstream strFname; + strFname << restartFilename << "_" << mThreadParams.currentStep << ".bp"; - const std::string filename = strFname.str( ); + const std::string filename = strFname.str(); - // adios_read_open( fname, method, comm, lock_mode, timeout_sec ) - log< picLog::INPUT_OUTPUT > ("ADIOS: open file: %1%") % filename; + // adios_read_open( fname, method, comm, lock_mode, timeout_sec ) + log("ADIOS: open file: %1%") % filename; - // when reading in BG_AGGREGATE mode, adios can not distinguish between - // "file does not exist" and "stream is not (yet) available, so we - // test it our selves - if (!boost::filesystem::exists(strFname.str())) - throw std::runtime_error("ADIOS: File does not exist."); + // when reading in BG_AGGREGATE mode, adios can not distinguish between + // "file does not exist" and "stream is not (yet) available, so we + // test it our selves + if(!boost::filesystem::exists(strFname.str())) + throw std::runtime_error("ADIOS: File does not exist."); - /* <0 sec: wait forever - * >=0 sec: return immediately if stream is not available */ - float_32 timeout = 0.0f; - mThreadParams.fp = adios_read_open(filename.c_str(), - ADIOS_READ_METHOD_BP, mThreadParams.adiosComm, - ADIOS_LOCKMODE_CURRENT, timeout); + /* <0 sec: wait forever + * >=0 sec: return immediately if stream is not available */ + float_32 timeout = 0.0f; + mThreadParams.fp = adios_read_open( + filename.c_str(), + ADIOS_READ_METHOD_BP, + mThreadParams.adiosComm, + ADIOS_LOCKMODE_CURRENT, + timeout); - /* stream reading is tricky, see ADIOS manual section 8.11.1 */ - while (adios_errno == err_file_not_found) - { - /** \todo add c++11 platform independent sleep */ + /* stream reading is tricky, see ADIOS manual section 8.11.1 */ + while(adios_errno == err_file_not_found) + { + /** \todo add c++11 platform independent sleep */ #if !defined(_WIN32) - /* give the file system 1s of peace and quiet */ - usleep(1e6); + /* give the file system 1s of peace and quiet */ + usleep(1e6); #endif - mThreadParams.fp = adios_read_open(filename.c_str(), - ADIOS_READ_METHOD_BP, mThreadParams.adiosComm, - ADIOS_LOCKMODE_CURRENT, timeout); - } - if (adios_errno == err_end_of_stream ) - /* could not read full stream */ - throw std::runtime_error("ADIOS: Stream terminated too early: " + - std::string(adios_errmsg()) ); - if (mThreadParams.fp == nullptr) - throw std::runtime_error("ADIOS: Error opening stream: " + - std::string(adios_errmsg()) ); - - /* ADIOS types */ - AdiosUInt32Type adiosUInt32Type; - - /* load number of slides to initialize MovingWindow */ - log ("ADIOS: (begin) read attr (%1% available)") % - mThreadParams.fp->nattrs; - void* slidesPtr = nullptr; - int slideSize; - enum ADIOS_DATATYPES slidesType; - const std::string simSlidesPath = - mThreadParams.adiosBasePath + std::string("sim_slides"); - ADIOS_CMD(adios_get_attr( mThreadParams.fp, - simSlidesPath.c_str(), - &slidesType, - &slideSize, - &slidesPtr )); - - uint32_t slides = *( (uint32_t*)slidesPtr ); - log ("ADIOS: value of sim_slides = %1%") % - slides; - - PMACC_ASSERT(slidesType == adiosUInt32Type.type); - PMACC_ASSERT(slideSize == sizeof(uint32_t)); // uint32_t in bytes - - void* lastStepPtr = nullptr; - int lastStepSize; - enum ADIOS_DATATYPES lastStepType; - const std::string iterationPath = - mThreadParams.adiosBasePath + std::string("iteration"); - ADIOS_CMD(adios_get_attr( mThreadParams.fp, - iterationPath.c_str(), - &lastStepType, - &lastStepSize, - &lastStepPtr )); - uint32_t lastStep = *( (uint32_t*)lastStepPtr ); - log ("ADIOS: value of iteration = %1%") % - lastStep; - - PMACC_ASSERT(lastStepType == adiosUInt32Type.type); - PMACC_ASSERT(lastStep == restartStep); - - /* apply slides to set gpus to last/written configuration */ - log ("ADIOS: Setting slide count for moving window to %1%") % slides; - MovingWindow::getInstance().setSlideCounter(slides, restartStep); - - /* re-distribute the local offsets in y-direction - * this will work for restarts with moving window still enabled - * and restarts that disable the moving window - * \warning enabling the moving window from a checkpoint that - * had no moving window will not work - */ - GridController &gc = Environment::get().GridController(); - gc.setStateAfterSlides(slides); - - /* set window for restart, complete global domain */ - mThreadParams.window = MovingWindow::getInstance().getDomainAsWindow(restartStep); - mThreadParams.localWindowToDomainOffset = DataSpace::create(0); - - /* load all fields */ - meta::ForEach > forEachLoadFields; - forEachLoadFields(&mThreadParams); - - /* load all particles */ - meta::ForEach > forEachLoadSpecies; - forEachLoadSpecies(&mThreadParams, restartChunkSize); - - IdProvider::State idProvState; - ReadNDScalars()(mThreadParams, - "picongpu/idProvider/startId", &idProvState.startId, - "maxNumProc", &idProvState.maxNumProc); - ReadNDScalars()(mThreadParams, - "picongpu/idProvider/nextId", &idProvState.nextId); - log ("Setting next free id on current rank: %1%") % idProvState.nextId; - IdProvider::setState(idProvState); - - /* free memory allocated in ADIOS calls */ - free(slidesPtr); - free(lastStepPtr); - - // avoid deadlock between not finished pmacc tasks and mpi calls in adios - __getTransactionEvent().waitForFinished(); - - /* clean shut down: close file and finalize */ - adios_release_step( mThreadParams.fp ); - ADIOS_CMD(adios_read_close( mThreadParams.fp )); - ADIOS_CMD(adios_read_finalize_method(ADIOS_READ_METHOD_BP)); - } - -private: - - void endAdios() - { - /* Finalize adios library */ - ADIOS_CMD(adios_finalize(Environment::get().GridController() - .getCommunicator().getRank())); - - __deleteArray(mThreadParams.fieldBfr); - } - - void beginAdios(const std::string adiosFilename) - { - std::stringstream full_filename; - full_filename << adiosFilename << "_" << mThreadParams.currentStep << ".bp"; - - mThreadParams.fullFilename = full_filename.str(); - mThreadParams.adiosFileHandle = ADIOS_INVALID_HANDLE; - - // Note: here we always allocate for the domain-bound fields - mThreadParams.fieldBfr = new float_X[mThreadParams.window.localDimensions.size.productOfComponents()]; - - std::stringstream adiosPathBase; - adiosPathBase << ADIOS_PATH_ROOT << mThreadParams.currentStep << "/"; - mThreadParams.adiosBasePath = adiosPathBase.str(); - - ADIOS_CMD(adios_init_noxml(mThreadParams.adiosComm)); - } - - /** - * Notification for dump or checkpoint received - * - * @param currentStep current simulation step - */ - void dumpData(uint32_t currentStep) - { - const pmacc::Selection& localDomain = Environment::get().SubGrid().getLocalDomain(); - mThreadParams.cellDescription = m_cellDescription; - mThreadParams.currentStep = currentStep; - - for (uint32_t i = 0; i < simDim; ++i) - { - mThreadParams.localWindowToDomainOffset[i] = 0; - if (mThreadParams.window.globalDimensions.offset[i] > localDomain.offset[i]) - { - mThreadParams.localWindowToDomainOffset[i] = - mThreadParams.window.globalDimensions.offset[i] - - localDomain.offset[i]; + mThreadParams.fp = adios_read_open( + filename.c_str(), + ADIOS_READ_METHOD_BP, + mThreadParams.adiosComm, + ADIOS_LOCKMODE_CURRENT, + timeout); + } + if(adios_errno == err_end_of_stream) + /* could not read full stream */ + throw std::runtime_error("ADIOS: Stream terminated too early: " + std::string(adios_errmsg())); + if(mThreadParams.fp == nullptr) + throw std::runtime_error("ADIOS: Error opening stream: " + std::string(adios_errmsg())); + + /* ADIOS types */ + AdiosUInt32Type adiosUInt32Type; + + /* load number of slides to initialize MovingWindow */ + log("ADIOS: (begin) read attr (%1% available)") % mThreadParams.fp->nattrs; + void* slidesPtr = nullptr; + int slideSize; + enum ADIOS_DATATYPES slidesType; + const std::string simSlidesPath = mThreadParams.adiosBasePath + std::string("sim_slides"); + ADIOS_CMD( + adios_get_attr(mThreadParams.fp, simSlidesPath.c_str(), &slidesType, &slideSize, &slidesPtr)); + + uint32_t slides = *((uint32_t*) slidesPtr); + log("ADIOS: value of sim_slides = %1%") % slides; + + PMACC_ASSERT(slidesType == adiosUInt32Type.type); + PMACC_ASSERT(slideSize == sizeof(uint32_t)); // uint32_t in bytes + + void* lastStepPtr = nullptr; + int lastStepSize; + enum ADIOS_DATATYPES lastStepType; + const std::string iterationPath = mThreadParams.adiosBasePath + std::string("iteration"); + ADIOS_CMD(adios_get_attr( + mThreadParams.fp, + iterationPath.c_str(), + &lastStepType, + &lastStepSize, + &lastStepPtr)); + uint32_t lastStep = *((uint32_t*) lastStepPtr); + log("ADIOS: value of iteration = %1%") % lastStep; + + PMACC_ASSERT(lastStepType == adiosUInt32Type.type); + PMACC_ASSERT(lastStep == restartStep); + + /* apply slides to set gpus to last/written configuration */ + log("ADIOS: Setting slide count for moving window to %1%") % slides; + MovingWindow::getInstance().setSlideCounter(slides, restartStep); + + /* re-distribute the local offsets in y-direction + * this will work for restarts with moving window still enabled + * and restarts that disable the moving window + * \warning enabling the moving window from a checkpoint that + * had no moving window will not work + */ + GridController& gc = Environment::get().GridController(); + gc.setStateAfterSlides(slides); + + /* set window for restart, complete global domain */ + mThreadParams.window = MovingWindow::getInstance().getDomainAsWindow(restartStep); + mThreadParams.localWindowToDomainOffset = DataSpace::create(0); + + /* load all fields */ + meta::ForEach> forEachLoadFields; + forEachLoadFields(&mThreadParams); + + /* load all particles */ + meta::ForEach> forEachLoadSpecies; + forEachLoadSpecies(&mThreadParams, restartChunkSize); + + IdProvider::State idProvState; + ReadNDScalars()( + mThreadParams, + "picongpu/idProvider/startId", + &idProvState.startId, + "maxNumProc", + &idProvState.maxNumProc); + ReadNDScalars()(mThreadParams, "picongpu/idProvider/nextId", &idProvState.nextId); + log("Setting next free id on current rank: %1%") % idProvState.nextId; + IdProvider::setState(idProvState); + + /* free memory allocated in ADIOS calls */ + free(slidesPtr); + free(lastStepPtr); + + // avoid deadlock between not finished pmacc tasks and mpi calls in adios + __getTransactionEvent().waitForFinished(); + + /* clean shut down: close file and finalize */ + adios_release_step(mThreadParams.fp); + ADIOS_CMD(adios_read_close(mThreadParams.fp)); + ADIOS_CMD(adios_read_finalize_method(ADIOS_READ_METHOD_BP)); } - } - /* copy species only one time per timestep to the host */ - if( lastSpeciesSyncStep != currentStep ) - { - DataConnector &dc = Environment<>::get().DataConnector(); + private: + void endAdios() + { + /* Finalize adios library */ + ADIOS_CMD(adios_finalize(Environment::get().GridController().getCommunicator().getRank())); -#if( PMACC_CUDA_ENABLED == 1 ) - /* synchronizes the MallocMCBuffer to the host side */ - dc.get< MallocMCBuffer< DeviceHeap > >( MallocMCBuffer< DeviceHeap >::getName() ); -#endif - /* here we are copying all species to the host side since we - * can not say at this point if this time step will need all of them - * for sure (checkpoint) or just some user-defined species (dump) - */ - meta::ForEach > copySpeciesToHost; - copySpeciesToHost(); - lastSpeciesSyncStep = currentStep; -#if( PMACC_CUDA_ENABLED == 1 ) - dc.releaseData(MallocMCBuffer::getName()); -#endif - } + __deleteArray(mThreadParams.fieldBfr); + } - beginAdios(mThreadParams.adiosFilename); + void beginAdios(const std::string adiosFilename) + { + std::stringstream full_filename; + full_filename << adiosFilename << "_" << mThreadParams.currentStep << ".bp"; - writeAdios((void*) &mThreadParams, mpiTransportParams); + mThreadParams.fullFilename = full_filename.str(); + mThreadParams.adiosFileHandle = ADIOS_INVALID_HANDLE; - endAdios(); - } + // Note: here we always allocate for the domain-bound fields + mThreadParams.fieldBfr = new float_X[mThreadParams.window.localDimensions.size.productOfComponents()]; - template - static void writeField(ThreadParams *params, const uint32_t sizePtrType, - ADIOS_DATATYPES adiosType, - const uint32_t nComponents, const std::string name, - void *ptr, - const bool isDomainBound) - { - log ("ADIOS: write field: %1% %2% %3%") % - name % nComponents % ptr; - - const bool fieldTypeCorrect( boost::is_same::value ); - PMACC_CASSERT_MSG(Precision_mismatch_in_Field_Components__ADIOS,fieldTypeCorrect); - - /* data to describe source buffer */ - GridLayout field_layout = params->gridLayout; - DataSpace field_full = field_layout.getDataSpace(); - DataSpace field_no_guard = params->window.localDimensions.size; - DataSpace field_guard = field_layout.getGuard() + params->localWindowToDomainOffset; - float_X * dstBuffer = params->fieldBfr; - - /* Patch for non-domain-bound fields - * This is an ugly fix to allow output of reduced 1d PML buffers, - * that are the same size on each domain. - * This code is to be replaced with the openPMD output plugin soon. - */ - std::vector< float_X > nonDomainBoundStorage; - if( !isDomainBound ) - { - field_no_guard = field_layout.getDataSpaceWithoutGuarding(); - field_guard = field_layout.getGuard(); - /* Since params->fieldBfr allocation was of different size, - * for this case allocate a new chunk for memory for dstBuffer - */ - nonDomainBoundStorage.resize( field_no_guard.productOfComponents() ); - dstBuffer = nonDomainBoundStorage.data(); - } + std::stringstream adiosPathBase; + adiosPathBase << ADIOS_PATH_ROOT << mThreadParams.currentStep << "/"; + mThreadParams.adiosBasePath = adiosPathBase.str(); - /* write the actual field data */ - for (uint32_t d = 0; d < nComponents; d++) - { - const size_t plane_full_size = field_full[1] * field_full[0] * nComponents; - const size_t plane_no_guard_size = field_no_guard[1] * field_no_guard[0]; + ADIOS_CMD(adios_init_noxml(mThreadParams.adiosComm)); + } - /* copy strided data from source to temporary buffer + /** + * Notification for dump or checkpoint received * - * \todo use d1Access as in `include/plugins/hdf5/writer/Field.hpp` + * @param currentStep current simulation step */ - const int maxZ = simDim == DIM3 ? field_no_guard[2] : 1; - const int guardZ = simDim == DIM3 ? field_guard[2] : 0; - for (int z = 0; z < maxZ; ++z) + void dumpData(uint32_t currentStep) { - for (int y = 0; y < field_no_guard[1]; ++y) - { - const size_t base_index_src = - (z + guardZ) * plane_full_size + - (y + field_guard[1]) * field_full[0] * nComponents; - - const size_t base_index_dst = - z * plane_no_guard_size + - y * field_no_guard[0]; + const pmacc::Selection localDomain = Environment::get().SubGrid().getLocalDomain(); + mThreadParams.cellDescription = m_cellDescription; + mThreadParams.currentStep = currentStep; - for (int x = 0; x < field_no_guard[0]; ++x) + for(uint32_t i = 0; i < simDim; ++i) + { + mThreadParams.localWindowToDomainOffset[i] = 0; + if(mThreadParams.window.globalDimensions.offset[i] > localDomain.offset[i]) { - size_t index_src = base_index_src + (x + field_guard[0]) * nComponents + d; - size_t index_dst = base_index_dst + x; - - dstBuffer[index_dst] = ((float_X*)ptr)[index_src]; + mThreadParams.localWindowToDomainOffset[i] + = mThreadParams.window.globalDimensions.offset[i] - localDomain.offset[i]; } } - } - /* Write the actual field data. The id is on the front of the list. */ - if (params->adiosFieldVarIds.empty()) - throw std::runtime_error("Cannot write field (var id list is empty)"); - - int64_t adiosFieldVarId = *(params->adiosFieldVarIds.begin()); - params->adiosFieldVarIds.pop_front(); - ADIOS_CMD(adios_write_byid(params->adiosFileHandle, adiosFieldVarId, dstBuffer)); - } - } + /* copy species only one time per timestep to the host */ + if(lastSpeciesSyncStep != currentStep) + { + DataConnector& dc = Environment<>::get().DataConnector(); + + /* synchronizes the MallocMCBuffer to the host side */ + dc.get>(MallocMCBuffer::getName()); + + /* here we are copying all species to the host side since we + * can not say at this point if this time step will need all of them + * for sure (checkpoint) or just some user-defined species (dump) + */ + meta::ForEach> copySpeciesToHost; + copySpeciesToHost(); + lastSpeciesSyncStep = currentStep; + dc.releaseData(MallocMCBuffer::getName()); + } - template< typename T_ParticleFilter> - struct CallCountParticles - { + beginAdios(mThreadParams.adiosFilename); - void operator()( - const std::vector< std::string > & vectorOfDataSourceNames, - ThreadParams* params - ) - { - bool const containsDataSource = plugins::misc::containsObject( - vectorOfDataSourceNames, - T_ParticleFilter::getName() - ); + writeAdios((void*) &mThreadParams, mpiTransportParams); - if( containsDataSource ) - { - ADIOSCountParticles< - T_ParticleFilter - > count; - count(params); + endAdios(); } - } - }; - - template< typename T_ParticleFilter> - struct CallWriteSpecies - { - - template< typename Space > - void operator()( - const std::vector< std::string > & vectorOfDataSourceNames, - ThreadParams* params, - const Space domainOffset - ) - { - bool const containsDataSource = plugins::misc::containsObject( - vectorOfDataSourceNames, - T_ParticleFilter::getName() - ); - - if( containsDataSource ) + template + static void writeField( + ThreadParams* params, + const uint32_t sizePtrType, + ADIOS_DATATYPES adiosType, + const uint32_t nComponents, + const std::string name, + void* ptr, + const bool isDomainBound) { - WriteSpecies< - T_ParticleFilter - > writeSpecies; - writeSpecies(params, domainOffset); - } - - } - }; + log("ADIOS: write field: %1% %2% %3%") % name % nComponents % ptr; + + const bool fieldTypeCorrect(boost::is_same::value); + PMACC_CASSERT_MSG(Precision_mismatch_in_Field_Components__ADIOS, fieldTypeCorrect); + + /* data to describe source buffer */ + GridLayout field_layout = params->gridLayout; + DataSpace field_full = field_layout.getDataSpace(); + DataSpace field_no_guard = params->window.localDimensions.size; + DataSpace field_guard = field_layout.getGuard() + params->localWindowToDomainOffset; + float_X* dstBuffer = params->fieldBfr; + + /* Patch for non-domain-bound fields + * This is an ugly fix to allow output of reduced 1d PML buffers, + * that are the same size on each domain. + * This code is to be replaced with the openPMD output plugin soon. + */ + std::vector nonDomainBoundStorage; + if(!isDomainBound) + { + field_no_guard = field_layout.getDataSpaceWithoutGuarding(); + field_guard = field_layout.getGuard(); + /* Since params->fieldBfr allocation was of different size, + * for this case allocate a new chunk for memory for dstBuffer + */ + nonDomainBoundStorage.resize(field_no_guard.productOfComponents()); + dstBuffer = nonDomainBoundStorage.data(); + } - template< typename T_Fields > - struct CallCollectFieldsSizes - { + /* write the actual field data */ + for(uint32_t d = 0; d < nComponents; d++) + { + const size_t plane_full_size = field_full[1] * field_full[0] * nComponents; + const size_t plane_no_guard_size = field_no_guard[1] * field_no_guard[0]; + + /* copy strided data from source to temporary buffer + * + * \todo use d1Access as in `include/plugins/hdf5/writer/Field.hpp` + */ + const int maxZ = simDim == DIM3 ? field_no_guard[2] : 1; + const int guardZ = simDim == DIM3 ? field_guard[2] : 0; + for(int z = 0; z < maxZ; ++z) + { + for(int y = 0; y < field_no_guard[1]; ++y) + { + const size_t base_index_src + = (z + guardZ) * plane_full_size + (y + field_guard[1]) * field_full[0] * nComponents; - void operator()( - const std::vector< std::string > & vectorOfDataSourceNames, - ThreadParams* params - ) - { - bool const containsDataSource = plugins::misc::containsObject( - vectorOfDataSourceNames, - T_Fields::getName() - ); + const size_t base_index_dst = z * plane_no_guard_size + y * field_no_guard[0]; - if( containsDataSource ) - { - CollectFieldsSizes< - T_Fields - > count; - count(params); - } + for(int x = 0; x < field_no_guard[0]; ++x) + { + size_t index_src = base_index_src + (x + field_guard[0]) * nComponents + d; + size_t index_dst = base_index_dst + x; - } - }; + dstBuffer[index_dst] = ((float_X*) ptr)[index_src]; + } + } + } - template< typename T_Fields > - struct CallGetFields - { + /* Write the actual field data. The id is on the front of the list. */ + if(params->adiosFieldVarIds.empty()) + throw std::runtime_error("Cannot write field (var id list is empty)"); - void operator()( - const std::vector< std::string > & vectorOfDataSourceNames, - ThreadParams* params - ) - { - bool const containsDataSource = plugins::misc::containsObject( - vectorOfDataSourceNames, - T_Fields::getName() - ); - - if( containsDataSource ) - { - GetFields< - T_Fields - > getFields; - getFields( params ); + int64_t adiosFieldVarId = *(params->adiosFieldVarIds.begin()); + params->adiosFieldVarIds.pop_front(); + ADIOS_CMD(adios_write_byid(params->adiosFileHandle, adiosFieldVarId, dstBuffer)); + } } - } - }; - - void *writeAdios(void *p_args, std::string mpiTransportParams) - { - - // synchronize, because following operations will be blocking anyway - ThreadParams *threadParams = (ThreadParams*) (p_args); - threadParams->adiosGroupSize = 0; - - /* y direction can be negative for first gpu */ - const pmacc::Selection& localDomain = Environment::get().SubGrid().getLocalDomain(); - DataSpace particleOffset(localDomain.offset); - particleOffset.y() -= threadParams->window.globalDimensions.offset.y(); - - // do not generate statistics for variables on the fly - ADIOS_STATISTICS_FLAG noStatistics = adios_stat_no; - - /* create adios group for fields without statistics */ - const std::string iterationPath = - threadParams->adiosBasePath + std::string("iteration"); - ADIOS_CMD(adios_declare_group(&(threadParams->adiosGroupHandle), - ADIOS_GROUP_NAME, - iterationPath.c_str(), - noStatistics)); - - /* select MPI method, #OSTs and #aggregators */ - ADIOS_CMD(adios_select_method(threadParams->adiosGroupHandle, - "MPI_AGGREGATE", mpiTransportParams.c_str(), "")); - - threadParams->fieldsOffsetDims = precisionCast(localDomain.offset); - - /* write created variable values */ - for (uint32_t d = 0; d < simDim; ++d) - { - /* dimension 1 is y and is the direction of the moving window (if any) */ - if (1 == d) + template + struct CallCountParticles { - uint64_t offset = std::max(0, localDomain.offset.y() - - threadParams->window.globalDimensions.offset.y()); - threadParams->fieldsOffsetDims[d] = offset; - } - - threadParams->fieldsSizeDims[d] = threadParams->window.localDimensions.size[d]; - threadParams->fieldsGlobalSizeDims[d] = threadParams->window.globalDimensions.size[d]; - } + void operator()(const std::vector& vectorOfDataSourceNames, ThreadParams* params) + { + bool const containsDataSource + = plugins::misc::containsObject(vectorOfDataSourceNames, T_ParticleFilter::getName()); - std::vector< std::string > vectorOfDataSourceNames; - if( m_help->selfRegister ) - { - std::string dataSourceNames = m_help->source.get( m_id ); + if(containsDataSource) + { + ADIOSCountParticles count; + count(params); + } + } + }; - vectorOfDataSourceNames = plugins::misc::splitString( - plugins::misc::removeSpaces( dataSourceNames ) - ); - } + template + struct CallWriteSpecies + { + template + void operator()( + const std::vector& vectorOfDataSourceNames, + ThreadParams* params, + const Space domainOffset) + { + bool const containsDataSource + = plugins::misc::containsObject(vectorOfDataSourceNames, T_ParticleFilter::getName()); - bool dumpFields = plugins::misc::containsObject( - vectorOfDataSourceNames, - "fields_all" - ); + if(containsDataSource) + { + WriteSpecies writeSpecies; + writeSpecies(params, domainOffset); + } + } + }; - /* collect size information for each field to be written and define - * field variables - */ - log ("ADIOS: (begin) collecting fields."); - threadParams->adiosFieldVarIds.clear(); - if (threadParams->isCheckpoint) - { - meta::ForEach< - FileCheckpointFields, - CollectFieldsSizes< bmpl::_1 > - > forEachCollectFieldsSizes; - forEachCollectFieldsSizes(threadParams); - } - else - { - if( dumpFields ) + template + struct CallCollectFieldsSizes { - meta::ForEach< - FileOutputFields, - CollectFieldsSizes< bmpl::_1 > - > forEachCollectFieldsSizes; - forEachCollectFieldsSizes(threadParams); - } + void operator()(const std::vector& vectorOfDataSourceNames, ThreadParams* params) + { + bool const containsDataSource + = plugins::misc::containsObject(vectorOfDataSourceNames, T_Fields::getName()); - // move over all field data sources - meta::ForEach< - typename Help::AllFieldSources, - CallCollectFieldsSizes< - bmpl::_1 - > - >{}(vectorOfDataSourceNames, threadParams); - } - log ("ADIOS: ( end ) collecting fields."); + if(containsDataSource) + { + CollectFieldsSizes count; + count(params); + } + } + }; - /* collect size information for all attributes of all species and define - * particle variables - */ - threadParams->adiosParticleAttrVarIds.clear(); - threadParams->adiosSpeciesIndexVarIds.clear(); + template + struct CallGetFields + { + void operator()(const std::vector& vectorOfDataSourceNames, ThreadParams* params) + { + bool const containsDataSource + = plugins::misc::containsObject(vectorOfDataSourceNames, T_Fields::getName()); - bool dumpAllParticles = plugins::misc::containsObject( - vectorOfDataSourceNames, - "species_all" - ); + if(containsDataSource) + { + GetFields getFields; + getFields(params); + } + } + }; - log ("ADIOS: (begin) counting particles."); - if (threadParams->isCheckpoint) - { - meta::ForEach< - FileCheckpointParticles, - ADIOSCountParticles< - plugins::misc::UnfilteredSpecies< bmpl::_1 > - > - > adiosCountParticles; - adiosCountParticles( threadParams ); - } - else - { - // count particles if data source "species_all" is selected - if( dumpAllParticles ) + void* writeAdios(void* p_args, std::string mpiTransportParams) { - // move over all species defined in FileOutputParticles - meta::ForEach< - FileOutputParticles, - ADIOSCountParticles< - plugins::misc::UnfilteredSpecies< bmpl::_1 > - > - > adiosCountParticles; - adiosCountParticles( threadParams ); - } + // synchronize, because following operations will be blocking anyway + ThreadParams* threadParams = (ThreadParams*) (p_args); + threadParams->adiosGroupSize = 0; + + /* y direction can be negative for first gpu */ + const pmacc::Selection localDomain = Environment::get().SubGrid().getLocalDomain(); + DataSpace particleOffset(localDomain.offset); + particleOffset.y() -= threadParams->window.globalDimensions.offset.y(); + + // do not generate statistics for variables on the fly + ADIOS_STATISTICS_FLAG noStatistics = adios_stat_no; + + /* create adios group for fields without statistics */ + const std::string iterationPath = threadParams->adiosBasePath + std::string("iteration"); + ADIOS_CMD(adios_declare_group( + &(threadParams->adiosGroupHandle), + ADIOS_GROUP_NAME, + iterationPath.c_str(), + noStatistics)); + + /* select MPI method, #OSTs and #aggregators */ + ADIOS_CMD(adios_select_method( + threadParams->adiosGroupHandle, + "MPI_AGGREGATE", + mpiTransportParams.c_str(), + "")); + + threadParams->fieldsOffsetDims = precisionCast(localDomain.offset); + + /* write created variable values */ + for(uint32_t d = 0; d < simDim; ++d) + { + /* dimension 1 is y and is the direction of the moving window (if any) */ + if(1 == d) + { + uint64_t offset + = std::max(0, localDomain.offset.y() - threadParams->window.globalDimensions.offset.y()); + threadParams->fieldsOffsetDims[d] = offset; + } - // move over all species data sources - meta::ForEach< - typename Help::AllEligibleSpeciesSources, - CallCountParticles< - bmpl::_1 - > - >{}(vectorOfDataSourceNames, threadParams); - } - log ("ADIOS: ( end ) counting particles."); + threadParams->fieldsSizeDims[d] = threadParams->window.localDimensions.size[d]; + threadParams->fieldsGlobalSizeDims[d] = threadParams->window.globalDimensions.size[d]; + } - auto idProviderState = IdProvider::getState(); - WriteNDScalars writeIdProviderStartId("picongpu/idProvider/startId", "maxNumProc"); - WriteNDScalars writeIdProviderNextId("picongpu/idProvider/nextId"); - writeIdProviderStartId.prepare(*threadParams, idProviderState.maxNumProc); - writeIdProviderNextId.prepare(*threadParams); + std::vector vectorOfDataSourceNames; + if(m_help->selfRegister) + { + std::string dataSourceNames = m_help->source.get(m_id); - // in the past, we had to explicitly estiamte our buffers. - // this is now done automatically by ADIOS on `adios_write()` - threadParams->adiosBufferInitialized = true; + vectorOfDataSourceNames = plugins::misc::splitString(plugins::misc::removeSpaces(dataSourceNames)); + } - /* open adios file. all variables need to be defined at this point */ - log ("ADIOS: open file: %1%") % threadParams->fullFilename; - ADIOS_CMD(adios_open(&(threadParams->adiosFileHandle), ADIOS_GROUP_NAME, - threadParams->fullFilename.c_str(), "w", threadParams->adiosComm)); + bool dumpFields = plugins::misc::containsObject(vectorOfDataSourceNames, "fields_all"); - if (threadParams->adiosFileHandle == ADIOS_INVALID_HANDLE) - throw std::runtime_error("ADIOS: Failed to open file."); + /* collect size information for each field to be written and define + * field variables + */ + log("ADIOS: (begin) collecting fields."); + threadParams->adiosFieldVarIds.clear(); + if(threadParams->isCheckpoint) + { + meta::ForEach> forEachCollectFieldsSizes; + forEachCollectFieldsSizes(threadParams); + } + else + { + if(dumpFields) + { + meta::ForEach> forEachCollectFieldsSizes; + forEachCollectFieldsSizes(threadParams); + } - /* attributes written here are pure meta data */ - WriteMeta writeMetaAttributes; - writeMetaAttributes(threadParams); + // move over all field data sources + meta::ForEach>{}( + vectorOfDataSourceNames, + threadParams); + } + log("ADIOS: ( end ) collecting fields."); - /* set adios group size (total size of all data to be written) - * besides the number of bytes for variables, this call also - * calculates the overhead of meta data - */ - uint64_t adiosTotalSize; - ADIOS_CMD(adios_group_size(threadParams->adiosFileHandle, - threadParams->adiosGroupSize, &adiosTotalSize)); + /* collect size information for all attributes of all species and define + * particle variables + */ + threadParams->adiosParticleAttrVarIds.clear(); + threadParams->adiosSpeciesIndexVarIds.clear(); - /* write fields */ - log ("ADIOS: (begin) writing fields."); - if (threadParams->isCheckpoint) - { - meta::ForEach< - FileCheckpointFields, - GetFields< bmpl::_1 > - > forEachGetFields; - forEachGetFields(threadParams); - } - else - { - if( dumpFields ) - { - meta::ForEach< - FileOutputFields, - GetFields< bmpl::_1 > - > forEachGetFields; - forEachGetFields(threadParams); - } + bool dumpAllParticles = plugins::misc::containsObject(vectorOfDataSourceNames, "species_all"); - // move over all field data sources - meta::ForEach< - typename Help::AllFieldSources, - CallGetFields< - bmpl::_1 - > - >{}(vectorOfDataSourceNames, threadParams); - } - log ("ADIOS: ( end ) writing fields."); + log("ADIOS: (begin) counting particles."); + if(threadParams->isCheckpoint) + { + meta::ForEach< + FileCheckpointParticles, + ADIOSCountParticles>> + adiosCountParticles; + adiosCountParticles(threadParams); + } + else + { + // count particles if data source "species_all" is selected + if(dumpAllParticles) + { + // move over all species defined in FileOutputParticles + meta::ForEach< + FileOutputParticles, + ADIOSCountParticles>> + adiosCountParticles; + adiosCountParticles(threadParams); + } - /* print all particle species */ - log ("ADIOS: (begin) writing particle species."); - if (threadParams->isCheckpoint) - { - meta::ForEach< - FileCheckpointParticles, - WriteSpecies< - plugins::misc::SpeciesFilter< bmpl::_1 > - > - > writeSpecies; - writeSpecies(threadParams, particleOffset); - } - else - { - // dump data if data source "species_all" is selected - if( dumpAllParticles ) - { - // move over all species defined in FileOutputParticles - meta::ForEach< - FileOutputParticles, - WriteSpecies< - plugins::misc::UnfilteredSpecies< bmpl::_1 > - > - > writeSpecies; - writeSpecies( threadParams, particleOffset ); - } + // move over all species data sources + meta::ForEach>{}( + vectorOfDataSourceNames, + threadParams); + } + log("ADIOS: ( end ) counting particles."); + + auto idProviderState = IdProvider::getState(); + WriteNDScalars writeIdProviderStartId("picongpu/idProvider/startId", "maxNumProc"); + WriteNDScalars writeIdProviderNextId("picongpu/idProvider/nextId"); + writeIdProviderStartId.prepare(*threadParams, idProviderState.maxNumProc); + writeIdProviderNextId.prepare(*threadParams); + + // in the past, we had to explicitly estiamte our buffers. + // this is now done automatically by ADIOS on `adios_write()` + threadParams->adiosBufferInitialized = true; + + /* open adios file. all variables need to be defined at this point */ + log("ADIOS: open file: %1%") % threadParams->fullFilename; + ADIOS_CMD(adios_open( + &(threadParams->adiosFileHandle), + ADIOS_GROUP_NAME, + threadParams->fullFilename.c_str(), + "w", + threadParams->adiosComm)); + + if(threadParams->adiosFileHandle == ADIOS_INVALID_HANDLE) + throw std::runtime_error("ADIOS: Failed to open file."); + + /* attributes written here are pure meta data */ + WriteMeta writeMetaAttributes; + writeMetaAttributes(threadParams); + + /* set adios group size (total size of all data to be written) + * besides the number of bytes for variables, this call also + * calculates the overhead of meta data + */ + uint64_t adiosTotalSize; + ADIOS_CMD( + adios_group_size(threadParams->adiosFileHandle, threadParams->adiosGroupSize, &adiosTotalSize)); + + /* write fields */ + log("ADIOS: (begin) writing fields."); + if(threadParams->isCheckpoint) + { + meta::ForEach> forEachGetFields; + forEachGetFields(threadParams); + } + else + { + if(dumpFields) + { + meta::ForEach> forEachGetFields; + forEachGetFields(threadParams); + } - // move over all species data sources - meta::ForEach< - typename Help::AllEligibleSpeciesSources, - CallWriteSpecies< - bmpl::_1 - > - >{}(vectorOfDataSourceNames, threadParams, particleOffset); - } - log ("ADIOS: ( end ) writing particle species."); + // move over all field data sources + meta::ForEach>{}( + vectorOfDataSourceNames, + threadParams); + } + log("ADIOS: ( end ) writing fields."); - log("ADIOS: Writing IdProvider state (StartId: %1%, NextId: %2%, maxNumProc: %3%)") - % idProviderState.startId % idProviderState.nextId % idProviderState.maxNumProc; - writeIdProviderStartId(*threadParams, idProviderState.startId); - writeIdProviderNextId(*threadParams, idProviderState.nextId); + /* print all particle species */ + log("ADIOS: (begin) writing particle species."); + if(threadParams->isCheckpoint) + { + meta::ForEach>> + writeSpecies; + writeSpecies(threadParams, particleOffset); + } + else + { + // dump data if data source "species_all" is selected + if(dumpAllParticles) + { + // move over all species defined in FileOutputParticles + meta::ForEach>> + writeSpecies; + writeSpecies(threadParams, particleOffset); + } - // avoid deadlock between not finished pmacc tasks and mpi calls in adios - __getTransactionEvent().waitForFinished(); + // move over all species data sources + meta::ForEach>{}( + vectorOfDataSourceNames, + threadParams, + particleOffset); + } + log("ADIOS: ( end ) writing particle species."); - /* close adios file, most likely the actual write point */ - log ("ADIOS: closing file: %1%") % threadParams->fullFilename; - ADIOS_CMD(adios_close(threadParams->adiosFileHandle)); + log( + "ADIOS: Writing IdProvider state (StartId: %1%, NextId: %2%, maxNumProc: %3%)") + % idProviderState.startId % idProviderState.nextId % idProviderState.maxNumProc; + writeIdProviderStartId(*threadParams, idProviderState.startId); + writeIdProviderNextId(*threadParams, idProviderState.nextId); - /*\todo: copied from adios example, we might not need this ? */ - MPI_CHECK(MPI_Barrier(threadParams->adiosComm)); + // avoid deadlock between not finished pmacc tasks and mpi calls in adios + __getTransactionEvent().waitForFinished(); - return nullptr; - } + /* close adios file, most likely the actual write point */ + log("ADIOS: closing file: %1%") % threadParams->fullFilename; + ADIOS_CMD(adios_close(threadParams->adiosFileHandle)); - ThreadParams mThreadParams; + /*\todo: copied from adios example, we might not need this ? */ + MPI_CHECK(MPI_Barrier(threadParams->adiosComm)); - std::shared_ptr< Help > m_help; - size_t m_id; + return nullptr; + } - MappingDesc *m_cellDescription; + ThreadParams mThreadParams; - std::string outputDirectory; + std::shared_ptr m_help; + size_t m_id; - /* select MPI method, #OSTs and #aggregators */ - std::string mpiTransportParams; + MappingDesc* m_cellDescription; - uint32_t lastSpeciesSyncStep; + std::string outputDirectory; - DataSpace mpi_pos; - DataSpace mpi_size; -}; + /* select MPI method, #OSTs and #aggregators */ + std::string mpiTransportParams; -} //namespace adios -} //namespace picongpu + uint32_t lastSpeciesSyncStep; + + DataSpace mpi_pos; + DataSpace mpi_size; + }; + } // namespace adios +} // namespace picongpu diff --git a/include/picongpu/plugins/adios/NDScalars.hpp b/include/picongpu/plugins/adios/NDScalars.hpp index e44202f5ea..e86730585c 100644 --- a/include/picongpu/plugins/adios/NDScalars.hpp +++ b/include/picongpu/plugins/adios/NDScalars.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Grund +/* Copyright 2016-2021 Alexander Grund * * This file is part of PIConGPU. * @@ -26,52 +26,54 @@ #include #include -namespace picongpu { -namespace adios { - -/** Functor for writing ND scalar fields with N=simDim - * In the current implementation each process (of the ND grid of processes) writes 1 scalar value - * Optionally the processes can also write an attribute for this dataset by using a non-empty attrName - * - * @tparam T_Scalar Type of the scalar value to write - * @tparam T_Attribute Type of the attribute (can be omitted if attribute is not written, defaults to uint64_t) - */ -template -struct WriteNDScalars +namespace picongpu { - WriteNDScalars(const std::string& name, const std::string& attrName = ""): - name(name), attrName(attrName){} - - /** Prepare the write operation: - * Define ADIOS variable, increase params.adiosGroupSize and write attribute (if attrName is non-empty) - * - * Must be called before executing the functor - */ - void prepare(ThreadParams& params, T_Attribute attribute = T_Attribute()) + namespace adios { - typedef traits::PICToAdios AdiosSkalarType; - typedef pmacc::math::UInt64 Dimensions; - - log ("ADIOS: prepare write %1%D scalars: %2%") % simDim % name; - - params.adiosGroupSize += sizeof(T_Scalar); - if(!attrName.empty()) - params.adiosGroupSize += sizeof(T_Attribute); - - // Size over all processes - Dimensions globalDomainSize = Dimensions::create(1); - // Offset for this process - Dimensions localDomainOffset = Dimensions::create(0); - - for (uint32_t d = 0; d < simDim; ++d) + /** Functor for writing ND scalar fields with N=simDim + * In the current implementation each process (of the ND grid of processes) writes 1 scalar value + * Optionally the processes can also write an attribute for this dataset by using a non-empty attrName + * + * @tparam T_Scalar Type of the scalar value to write + * @tparam T_Attribute Type of the attribute (can be omitted if attribute is not written, defaults to uint64_t) + */ + template + struct WriteNDScalars { - globalDomainSize[d] = Environment::get().GridController().getGpuNodes()[d]; - localDomainOffset[d] = Environment::get().GridController().getPosition()[d]; - } - - std::string datasetName = params.adiosBasePath + name; - - varId = defineAdiosVar( + WriteNDScalars(const std::string& name, const std::string& attrName = "") : name(name), attrName(attrName) + { + } + + /** Prepare the write operation: + * Define ADIOS variable, increase params.adiosGroupSize and write attribute (if attrName is non-empty) + * + * Must be called before executing the functor + */ + void prepare(ThreadParams& params, T_Attribute attribute = T_Attribute()) + { + typedef traits::PICToAdios AdiosSkalarType; + typedef pmacc::math::UInt64 Dimensions; + + log("ADIOS: prepare write %1%D scalars: %2%") % simDim % name; + + params.adiosGroupSize += sizeof(T_Scalar); + if(!attrName.empty()) + params.adiosGroupSize += sizeof(T_Attribute); + + // Size over all processes + Dimensions globalDomainSize = Dimensions::create(1); + // Offset for this process + Dimensions localDomainOffset = Dimensions::create(0); + + for(uint32_t d = 0; d < simDim; ++d) + { + globalDomainSize[d] = Environment::get().GridController().getGpuNodes()[d]; + localDomainOffset[d] = Environment::get().GridController().getPosition()[d]; + } + + std::string datasetName = params.adiosBasePath + name; + + varId = defineAdiosVar( params.adiosGroupHandle, datasetName.c_str(), nullptr, @@ -82,86 +84,96 @@ struct WriteNDScalars true, params.adiosCompression); - if(!attrName.empty()) - { - typedef traits::PICToAdios AdiosAttrType; - - log ("ADIOS: write attribute %1% of %2%D scalars: %3%") % attrName % simDim % name; - ADIOS_CMD( adios_define_attribute_byvalue(params.adiosGroupHandle, - attrName.c_str(), datasetName.c_str(), - AdiosAttrType().type, 1, (void*)&attribute) ); - } - } - - void operator()(ThreadParams& params, T_Scalar value) - { - log ("ADIOS: write %1%D scalars: %2%") % simDim % name; - - ADIOS_CMD( adios_write_byid(params.adiosFileHandle, varId, &value) ); - } -private: - const std::string name, attrName; - int64_t varId; -}; - -/** Functor for reading ND scalar fields with N=simDim - * In the current implementation each process (of the ND grid of processes) reads 1 scalar value - * Optionally the processes can also read an attribute for this dataset by using a non-empty attrName - * - * @tparam T_Scalar Type of the scalar value to read - * @tparam T_Attribute Type of the attribute (can be omitted if attribute is not read, defaults to uint64_t) - */ -template -struct ReadNDScalars -{ - /** Read the skalar field and optionally the attribute into the values referenced by the pointers */ - void operator()(ThreadParams& params, - const std::string& name, T_Scalar* value, - const std::string& attrName = "", T_Attribute* attribute = nullptr) - { - log ("ADIOS: read %1%D scalars: %2%") % simDim % name; - std::string datasetName = params.adiosBasePath + name; - - ADIOS_VARINFO* varInfo; - ADIOS_CMD_EXPECT_NONNULL( varInfo = adios_inq_var(params.fp, datasetName.c_str()) ); - if(varInfo->ndim != simDim) - throw std::runtime_error(std::string("Invalid dimensionality for ") + name); - if(varInfo->type != traits::PICToAdios().type) - throw std::runtime_error(std::string("Invalid type for ") + name); - - DataSpace gridPos = Environment::get().GridController().getPosition(); - uint64_t start[varInfo->ndim]; - uint64_t count[varInfo->ndim]; - for(int d = 0; d < varInfo->ndim; ++d) - { - /* \see adios_define_var: z,y,x in C-order */ - start[d] = gridPos.revert()[d]; - count[d] = 1; - } - - ADIOS_SELECTION* fSel = adios_selection_boundingbox(varInfo->ndim, start, count); - - // avoid deadlock between not finished pmacc tasks and mpi calls in adios - __getTransactionEvent().waitForFinished(); - - /* specify what we want to read, but start reading at below at `adios_perform_reads` */ - /* magic parameters (0, 1): `from_step` (not used in streams), `nsteps` to read (must be 1 for stream) */ - log ("ADIOS: Schedule read skalar %1%)") % datasetName; - ADIOS_CMD( adios_schedule_read(params.fp, fSel, datasetName.c_str(), 0, 1, (void*)value) ); - - /* start a blocking read of all scheduled variables */ - ADIOS_CMD( adios_perform_reads(params.fp, 1) ); - - adios_selection_delete(fSel); - adios_free_varinfo(varInfo); - - if(!attrName.empty()) + if(!attrName.empty()) + { + typedef traits::PICToAdios AdiosAttrType; + + log("ADIOS: write attribute %1% of %2%D scalars: %3%") % attrName % simDim + % name; + ADIOS_CMD(adios_define_attribute_byvalue( + params.adiosGroupHandle, + attrName.c_str(), + datasetName.c_str(), + AdiosAttrType().type, + 1, + (void*) &attribute)); + } + } + + void operator()(ThreadParams& params, T_Scalar value) + { + log("ADIOS: write %1%D scalars: %2%") % simDim % name; + + ADIOS_CMD(adios_write_byid(params.adiosFileHandle, varId, &value)); + } + + private: + const std::string name, attrName; + int64_t varId; + }; + + /** Functor for reading ND scalar fields with N=simDim + * In the current implementation each process (of the ND grid of processes) reads 1 scalar value + * Optionally the processes can also read an attribute for this dataset by using a non-empty attrName + * + * @tparam T_Scalar Type of the scalar value to read + * @tparam T_Attribute Type of the attribute (can be omitted if attribute is not read, defaults to uint64_t) + */ + template + struct ReadNDScalars { - log ("ADIOS: read attribute %1% for scalars: %2%") % attrName % name; - *attribute = readAttribute(params.fp, datasetName, attrName); - } - } -}; - -} // namespace adios -} // namespace picongpu + /** Read the skalar field and optionally the attribute into the values referenced by the pointers */ + void operator()( + ThreadParams& params, + const std::string& name, + T_Scalar* value, + const std::string& attrName = "", + T_Attribute* attribute = nullptr) + { + log("ADIOS: read %1%D scalars: %2%") % simDim % name; + std::string datasetName = params.adiosBasePath + name; + + ADIOS_VARINFO* varInfo; + ADIOS_CMD_EXPECT_NONNULL(varInfo = adios_inq_var(params.fp, datasetName.c_str())); + if(varInfo->ndim != simDim) + throw std::runtime_error(std::string("Invalid dimensionality for ") + name); + if(varInfo->type != traits::PICToAdios().type) + throw std::runtime_error(std::string("Invalid type for ") + name); + + DataSpace gridPos = Environment::get().GridController().getPosition(); + uint64_t start[varInfo->ndim]; + uint64_t count[varInfo->ndim]; + for(int d = 0; d < varInfo->ndim; ++d) + { + /* \see adios_define_var: z,y,x in C-order */ + start[d] = gridPos.revert()[d]; + count[d] = 1; + } + + ADIOS_SELECTION* fSel = adios_selection_boundingbox(varInfo->ndim, start, count); + + // avoid deadlock between not finished pmacc tasks and mpi calls in adios + __getTransactionEvent().waitForFinished(); + + /* specify what we want to read, but start reading at below at `adios_perform_reads` */ + /* magic parameters (0, 1): `from_step` (not used in streams), `nsteps` to read (must be 1 for stream) + */ + log("ADIOS: Schedule read skalar %1%)") % datasetName; + ADIOS_CMD(adios_schedule_read(params.fp, fSel, datasetName.c_str(), 0, 1, (void*) value)); + + /* start a blocking read of all scheduled variables */ + ADIOS_CMD(adios_perform_reads(params.fp, 1)); + + adios_selection_delete(fSel); + adios_free_varinfo(varInfo); + + if(!attrName.empty()) + { + log("ADIOS: read attribute %1% for scalars: %2%") % attrName % name; + *attribute = readAttribute(params.fp, datasetName, attrName); + } + } + }; + + } // namespace adios +} // namespace picongpu diff --git a/include/picongpu/plugins/adios/WriteMeta.hpp b/include/picongpu/plugins/adios/WriteMeta.hpp index 5ef96f6573..0d55bd705d 100644 --- a/include/picongpu/plugins/adios/WriteMeta.hpp +++ b/include/picongpu/plugins/adios/WriteMeta.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl +/* Copyright 2013-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -37,280 +37,440 @@ namespace picongpu { -namespace adios -{ -using namespace pmacc; - -namespace writeMeta -{ - /** write openPMD species meta data - * - * @tparam numSpecies count of defined species - */ - template< uint32_t numSpecies = bmpl::size::type::value > - struct OfAllSpecies + namespace adios { - /** write meta data for species - * - * @param threadParams context of the adios plugin - * @param fullMeshesPath path to mesh entry - */ - void operator()( - ThreadParams* threadParams, - const std::string& fullMeshesPath - ) const + using namespace pmacc; + + namespace writeMeta { - // assume all boundaries are like the first species for openPMD 1.0.0 - GetStringProperties::type> particleBoundaryProp; - std::list listParticleBoundary; - std::list listParticleBoundaryParam; - for( uint32_t i = NumberOfExchanges::value - 1; i > 0; --i ) + /** write openPMD species meta data + * + * @tparam numSpecies count of defined species + */ + template::type::value> + struct OfAllSpecies { - if( FRONT % i == 0 ) + /** write meta data for species + * + * @param threadParams context of the adios plugin + * @param fullMeshesPath path to mesh entry + */ + void operator()(ThreadParams* threadParams, const std::string& fullMeshesPath) const { - listParticleBoundary.push_back( - particleBoundaryProp[ExchangeTypeNames()[i]]["name"].value - ); - listParticleBoundaryParam.push_back( - particleBoundaryProp[ExchangeTypeNames()[i]]["param"].value - ); + // assume all boundaries are like the first species for openPMD 1.0.0 + GetStringProperties::type> particleBoundaryProp; + std::list listParticleBoundary; + std::list listParticleBoundaryParam; + for(uint32_t i = NumberOfExchanges::value - 1; i > 0; --i) + { + if(FRONT % i == 0) + { + listParticleBoundary.push_back(particleBoundaryProp[ExchangeTypeNames()[i]]["name"].value); + listParticleBoundaryParam.push_back( + particleBoundaryProp[ExchangeTypeNames()[i]]["param"].value); + } + } + helper::GetADIOSArrayOfString getADIOSArrayOfString; + auto arrParticleBoundary = getADIOSArrayOfString(listParticleBoundary); + auto arrParticleBoundaryParam = getADIOSArrayOfString(listParticleBoundaryParam); + + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "particleBoundary", + fullMeshesPath.c_str(), + adios_string_array, + listParticleBoundary.size(), + &(arrParticleBoundary.starts.at(0)))); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "particleBoundaryParameters", + fullMeshesPath.c_str(), + adios_string_array, + listParticleBoundaryParam.size(), + &(arrParticleBoundaryParam.starts.at(0)))); } - } - helper::GetADIOSArrayOfString getADIOSArrayOfString; - auto arrParticleBoundary = getADIOSArrayOfString( listParticleBoundary ); - auto arrParticleBoundaryParam = getADIOSArrayOfString( listParticleBoundaryParam ); - - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "particleBoundary", fullMeshesPath.c_str(), adios_string_array, - listParticleBoundary.size(), &( arrParticleBoundary.starts.at( 0 ) ))); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "particleBoundaryParameters", fullMeshesPath.c_str(), adios_string_array, - listParticleBoundaryParam.size(), &( arrParticleBoundaryParam.starts.at( 0 ) ))); - } - }; - - /** specialization if no species are defined */ - template< > - struct OfAllSpecies< 0 > - { - /** write meta data for species - * - * @param threadParams context of the adios plugin - * @param fullMeshesPath path to mesh entry - */ - void operator()( - ThreadParams* /* threadParams */, - const std::string& /* fullMeshesPath */ - ) const - { - } - }; - -} // namespace writeMeta + }; - struct WriteMeta - { - void operator()(ThreadParams *threadParams) - { - log ("ADIOS: (begin) write meta attributes."); - - traits::PICToAdios adiosUInt32Type; - traits::PICToAdios adiosFloatXType; - traits::PICToAdios adiosDoubleType; - - /* openPMD attributes */ - /* required */ - const std::string openPMDversion( "1.0.0" ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "openPMD", "/", adios_string, 1, (void*)openPMDversion.c_str())); - - const uint32_t openPMDextension = 1; // ED-PIC ID - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "openPMDextension", "/", adiosUInt32Type.type, 1, (void*)&openPMDextension)); - - const std::string basePath( ADIOS_PATH_ROOT"%T/" ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "basePath", "/", adios_string, 1, (void*)basePath.c_str())); - - const std::string meshesPath( ADIOS_PATH_FIELDS ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "meshesPath", "/", adios_string, 1, (void*)meshesPath.c_str())); - - const std::string particlesPath( ADIOS_PATH_PARTICLES ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "particlesPath", "/", adios_string, 1, (void*)particlesPath.c_str())); - - const std::string iterationEncoding( "fileBased" ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "iterationEncoding", "/", adios_string, 1, (void*)iterationEncoding.c_str())); - - const std::string iterationFormat( - Environment< simDim >::get().Filesystem().basename( threadParams->adiosFilename ) + - std::string("_%T.bp") - ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "iterationFormat", "/", adios_string, 1, (void*)iterationFormat.c_str())); - - /* recommended */ - const std::string author = Environment<>::get().SimulationDescription().getAuthor(); - if( author.length() > 0 ) + /** specialization if no species are defined */ + template<> + struct OfAllSpecies<0> { - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "author", "/", adios_string, 1, (void*)author.c_str())); - } + /** write meta data for species + * + * @param threadParams context of the adios plugin + * @param fullMeshesPath path to mesh entry + */ + void operator()( + ThreadParams* /* threadParams */, + const std::string& /* fullMeshesPath */ + ) const + { + } + }; - const std::string software( "PIConGPU" ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "software", "/", adios_string, 1, (void*)software.c_str())); - - std::stringstream softwareVersion; - softwareVersion << PICONGPU_VERSION_MAJOR << "." - << PICONGPU_VERSION_MINOR << "." - << PICONGPU_VERSION_PATCH; - if( ! std::string(PICONGPU_VERSION_LABEL).empty() ) - softwareVersion << "-" << PICONGPU_VERSION_LABEL; - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "softwareVersion", "/", adios_string, 1, (void*)softwareVersion.str().c_str())); - - const std::string date = helper::getDateString( "%F %T %z" ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "date", "/", adios_string, 1, (void*)date.c_str())); - - /* ED-PIC */ - const std::string fullMeshesPath( threadParams->adiosBasePath + - std::string(ADIOS_PATH_FIELDS) ); - - GetStringProperties fieldSolverProps; - const std::string fieldSolver( fieldSolverProps["name"].value ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "fieldSolver", fullMeshesPath.c_str(), adios_string, 1, (void*)fieldSolver.c_str())); - - /* order as in axisLabels: - * 3D: z-lower, z-upper, y-lower, y-upper, x-lower, x-upper - * 2D: y-lower, y-upper, x-lower, x-upper - */ - GetStringProperties fieldBoundaryProp; - std::list listFieldBoundary; - std::list listFieldBoundaryParam; - for( uint32_t i = NumberOfExchanges::value - 1; i > 0; --i ) + } // namespace writeMeta + + struct WriteMeta + { + void operator()(ThreadParams* threadParams) { - if( FRONT % i == 0 ) + log("ADIOS: (begin) write meta attributes."); + + traits::PICToAdios adiosUInt32Type; + traits::PICToAdios adiosFloatXType; + traits::PICToAdios adiosDoubleType; + + /* openPMD attributes */ + /* required */ + const std::string openPMDversion("1.0.0"); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "openPMD", + "/", + adios_string, + 1, + (void*) openPMDversion.c_str())); + + const uint32_t openPMDextension = 1; // ED-PIC ID + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "openPMDextension", + "/", + adiosUInt32Type.type, + 1, + (void*) &openPMDextension)); + + const std::string basePath(ADIOS_PATH_ROOT "%T/"); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "basePath", + "/", + adios_string, + 1, + (void*) basePath.c_str())); + + const std::string meshesPath(ADIOS_PATH_FIELDS); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "meshesPath", + "/", + adios_string, + 1, + (void*) meshesPath.c_str())); + + const std::string particlesPath(ADIOS_PATH_PARTICLES); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "particlesPath", + "/", + adios_string, + 1, + (void*) particlesPath.c_str())); + + const std::string iterationEncoding("fileBased"); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "iterationEncoding", + "/", + adios_string, + 1, + (void*) iterationEncoding.c_str())); + + const std::string iterationFormat( + Environment::get().Filesystem().basename(threadParams->adiosFilename) + + std::string("_%T.bp")); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "iterationFormat", + "/", + adios_string, + 1, + (void*) iterationFormat.c_str())); + + /* recommended */ + const std::string author = Environment<>::get().SimulationDescription().getAuthor(); + if(author.length() > 0) { - listFieldBoundary.push_back( - fieldBoundaryProp[ExchangeTypeNames()[i]]["name"].value - ); - listFieldBoundaryParam.push_back( - fieldBoundaryProp[ExchangeTypeNames()[i]]["param"].value - ); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "author", + "/", + adios_string, + 1, + (void*) author.c_str())); } - } - helper::GetADIOSArrayOfString getADIOSArrayOfString; - auto arrFieldBoundary = getADIOSArrayOfString( listFieldBoundary ); - auto arrFieldBoundaryParam = getADIOSArrayOfString( listFieldBoundaryParam ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "fieldBoundary", fullMeshesPath.c_str(), adios_string_array, - listFieldBoundary.size(), &( arrFieldBoundary.starts.at( 0 ) ))); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "fieldBoundaryParameters", fullMeshesPath.c_str(), adios_string_array, - listFieldBoundaryParam.size(), &( arrFieldBoundaryParam.starts.at( 0 ) ))); - - writeMeta::OfAllSpecies<>()( threadParams, fullMeshesPath ); + const std::string software("PIConGPU"); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "software", + "/", + adios_string, + 1, + (void*) software.c_str())); + + std::stringstream softwareVersion; + softwareVersion << PICONGPU_VERSION_MAJOR << "." << PICONGPU_VERSION_MINOR << "." + << PICONGPU_VERSION_PATCH; + if(!std::string(PICONGPU_VERSION_LABEL).empty()) + softwareVersion << "-" << PICONGPU_VERSION_LABEL; + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "softwareVersion", + "/", + adios_string, + 1, + (void*) softwareVersion.str().c_str())); + + const std::string date = helper::getDateString("%F %T %z"); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "date", + "/", + adios_string, + 1, + (void*) date.c_str())); + + /* ED-PIC */ + const std::string fullMeshesPath(threadParams->adiosBasePath + std::string(ADIOS_PATH_FIELDS)); + + GetStringProperties fieldSolverProps; + const std::string fieldSolver(fieldSolverProps["name"].value); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "fieldSolver", + fullMeshesPath.c_str(), + adios_string, + 1, + (void*) fieldSolver.c_str())); + if(fieldSolverProps.find("param") != fieldSolverProps.end()) + { + const std::string fieldSolverParam(fieldSolverProps["param"].value); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "fieldSolverParameters", + fullMeshesPath.c_str(), + adios_string, + 1, + (void*) fieldSolverParam.c_str())); + } - GetStringProperties currentSmoothingProp; - const std::string currentSmoothing( currentSmoothingProp["name"].value ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "currentSmoothing", fullMeshesPath.c_str(), adios_string, 1, (void*)currentSmoothing.c_str())); + /* order as in axisLabels: + * 3D: z-lower, z-upper, y-lower, y-upper, x-lower, x-upper + * 2D: y-lower, y-upper, x-lower, x-upper + */ + GetStringProperties fieldBoundaryProp; + std::list listFieldBoundary; + std::list listFieldBoundaryParam; + for(uint32_t i = NumberOfExchanges::value - 1; i > 0; --i) + { + if(FRONT % i == 0) + { + listFieldBoundary.push_back(fieldBoundaryProp[ExchangeTypeNames()[i]]["name"].value); + listFieldBoundaryParam.push_back(fieldBoundaryProp[ExchangeTypeNames()[i]]["param"].value); + } + } + helper::GetADIOSArrayOfString getADIOSArrayOfString; + auto arrFieldBoundary = getADIOSArrayOfString(listFieldBoundary); + auto arrFieldBoundaryParam = getADIOSArrayOfString(listFieldBoundaryParam); + + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "fieldBoundary", + fullMeshesPath.c_str(), + adios_string_array, + listFieldBoundary.size(), + &(arrFieldBoundary.starts.at(0)))); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "fieldBoundaryParameters", + fullMeshesPath.c_str(), + adios_string_array, + listFieldBoundaryParam.size(), + &(arrFieldBoundaryParam.starts.at(0)))); + + writeMeta::OfAllSpecies<>()(threadParams, fullMeshesPath); + + GetStringProperties currentSmoothingProp; + const std::string currentSmoothing(currentSmoothingProp["name"].value); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "currentSmoothing", + fullMeshesPath.c_str(), + adios_string, + 1, + (void*) currentSmoothing.c_str())); + + if(currentSmoothingProp.find("param") != currentSmoothingProp.end()) + { + const std::string currentSmoothingParam(currentSmoothingProp["param"].value); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "currentSmoothingParameters", + fullMeshesPath.c_str(), + adios_string, + 1, + (void*) currentSmoothingParam.c_str())); + } - if( currentSmoothingProp.find( "param" ) != currentSmoothingProp.end() ) - { - const std::string currentSmoothingParam( currentSmoothingProp["param"].value ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "currentSmoothingParameters", fullMeshesPath.c_str(), adios_string, - 1, (void*)currentSmoothingParam.c_str())); + const std::string chargeCorrection("none"); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "chargeCorrection", + fullMeshesPath.c_str(), + adios_string, + 1, + (void*) chargeCorrection.c_str())); + + /* write current iteration */ + log("ADIOS: meta: iteration"); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "iteration", + threadParams->adiosBasePath.c_str(), + adiosUInt32Type.type, + 1, + (void*) &threadParams->currentStep)); + + /* write number of slides */ + log("ADIOS: meta: sim_slides"); + uint32_t slides = MovingWindow::getInstance().getSlideCounter(threadParams->currentStep); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "sim_slides", + threadParams->adiosBasePath.c_str(), + adiosUInt32Type.type, + 1, + (void*) &slides)); + + /* openPMD: required time attributes */ + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "dt", + threadParams->adiosBasePath.c_str(), + adiosFloatXType.type, + 1, + (void*) &DELTA_T)); + const float_X time = float_X(threadParams->currentStep) * DELTA_T; + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "time", + threadParams->adiosBasePath.c_str(), + adiosFloatXType.type, + 1, + (void*) &time)); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "timeUnitSI", + threadParams->adiosBasePath.c_str(), + adiosDoubleType.type, + 1, + (void*) &UNIT_TIME)); + + /* write normed grid parameters */ + log("ADIOS: meta: grid"); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "cell_width", + threadParams->adiosBasePath.c_str(), + adiosFloatXType.type, + 1, + (void*) &cellSize[0])); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "cell_height", + threadParams->adiosBasePath.c_str(), + adiosFloatXType.type, + 1, + (void*) &cellSize[1])); + + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "cell_depth", + threadParams->adiosBasePath.c_str(), + adiosFloatXType.type, + 1, + (void*) &cellSize[2])); + + + /* write base units */ + log("ADIOS: meta: units"); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "unit_energy", + threadParams->adiosBasePath.c_str(), + adiosDoubleType.type, + 1, + (void*) &UNIT_ENERGY)); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "unit_length", + threadParams->adiosBasePath.c_str(), + adiosDoubleType.type, + 1, + (void*) &UNIT_LENGTH)); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "unit_speed", + threadParams->adiosBasePath.c_str(), + adiosDoubleType.type, + 1, + (void*) &UNIT_SPEED)); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "unit_time", + threadParams->adiosBasePath.c_str(), + adiosDoubleType.type, + 1, + (void*) &UNIT_TIME)); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "unit_mass", + threadParams->adiosBasePath.c_str(), + adiosDoubleType.type, + 1, + (void*) &UNIT_MASS)); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "unit_charge", + threadParams->adiosBasePath.c_str(), + adiosDoubleType.type, + 1, + (void*) &UNIT_CHARGE)); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "unit_efield", + threadParams->adiosBasePath.c_str(), + adiosDoubleType.type, + 1, + (void*) &UNIT_EFIELD)); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "unit_bfield", + threadParams->adiosBasePath.c_str(), + adiosDoubleType.type, + 1, + (void*) &UNIT_BFIELD)); + + /* write physical constants */ + log("ADIOS: meta: mue0/eps0"); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "mue0", + threadParams->adiosBasePath.c_str(), + adiosFloatXType.type, + 1, + (void*) &MUE0)); + ADIOS_CMD(adios_define_attribute_byvalue( + threadParams->adiosGroupHandle, + "eps0", + threadParams->adiosBasePath.c_str(), + adiosFloatXType.type, + 1, + (void*) &EPS0)); + + log("ADIOS: ( end ) wite meta attributes."); } - - const std::string chargeCorrection( "none" ); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "chargeCorrection", fullMeshesPath.c_str(), adios_string, 1, (void*)chargeCorrection.c_str())); - - /* write current iteration */ - log ("ADIOS: meta: iteration"); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "iteration", threadParams->adiosBasePath.c_str(), - adiosUInt32Type.type, 1, (void*)&threadParams->currentStep )); - - /* write number of slides */ - log ("ADIOS: meta: sim_slides"); - uint32_t slides = MovingWindow::getInstance().getSlideCounter(threadParams->currentStep); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "sim_slides", threadParams->adiosBasePath.c_str(), - adiosUInt32Type.type, 1, (void*)&slides )); - - /* openPMD: required time attributes */ - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "dt", threadParams->adiosBasePath.c_str(), - adiosFloatXType.type, 1, (void*)&DELTA_T )); - const float_X time = float_X( threadParams->currentStep ) * DELTA_T; - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "time", threadParams->adiosBasePath.c_str(), - adiosFloatXType.type, 1, (void*)&time )); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "timeUnitSI", threadParams->adiosBasePath.c_str(), - adiosDoubleType.type, 1, (void*)&UNIT_TIME )); - - /* write normed grid parameters */ - log ("ADIOS: meta: grid"); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "cell_width", threadParams->adiosBasePath.c_str(), - adiosFloatXType.type, 1, (void*)&cellSize[0] )); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "cell_height", threadParams->adiosBasePath.c_str(), - adiosFloatXType.type, 1, (void*)&cellSize[1] )); - - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "cell_depth", threadParams->adiosBasePath.c_str(), - adiosFloatXType.type, 1, (void*)&cellSize[2] )); - - - /* write base units */ - log ("ADIOS: meta: units"); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "unit_energy", threadParams->adiosBasePath.c_str(), - adiosDoubleType.type, 1, (void*)&UNIT_ENERGY )); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "unit_length", threadParams->adiosBasePath.c_str(), - adiosDoubleType.type, 1, (void*)&UNIT_LENGTH )); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "unit_speed", threadParams->adiosBasePath.c_str(), - adiosDoubleType.type, 1, (void*)&UNIT_SPEED )); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "unit_time", threadParams->adiosBasePath.c_str(), - adiosDoubleType.type, 1, (void*)&UNIT_TIME )); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "unit_mass", threadParams->adiosBasePath.c_str(), - adiosDoubleType.type, 1, (void*)&UNIT_MASS )); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "unit_charge", threadParams->adiosBasePath.c_str(), - adiosDoubleType.type, 1, (void*)&UNIT_CHARGE )); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "unit_efield", threadParams->adiosBasePath.c_str(), - adiosDoubleType.type, 1, (void*)&UNIT_EFIELD )); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "unit_bfield", threadParams->adiosBasePath.c_str(), - adiosDoubleType.type, 1, (void*)&UNIT_BFIELD )); - - /* write physical constants */ - log ("ADIOS: meta: mue0/eps0"); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "mue0", threadParams->adiosBasePath.c_str(), - adiosFloatXType.type, 1, (void*)&MUE0 )); - ADIOS_CMD(adios_define_attribute_byvalue(threadParams->adiosGroupHandle, - "eps0", threadParams->adiosBasePath.c_str(), - adiosFloatXType.type, 1, (void*)&EPS0 )); - - log ("ADIOS: ( end ) wite meta attributes."); - } - }; -} // namespace adios + }; + } // namespace adios } // namespace picongpu diff --git a/include/picongpu/plugins/adios/WriteSpecies.hpp b/include/picongpu/plugins/adios/WriteSpecies.hpp index 1f22849ad4..17f1255ec5 100644 --- a/include/picongpu/plugins/adios/WriteSpecies.hpp +++ b/include/picongpu/plugins/adios/WriteSpecies.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Felix Schmitt, Axel Huebl, +/* Copyright 2014-2021 Rene Widera, Felix Schmitt, Axel Huebl, * Alexander Grund * * This file is part of PIConGPU. @@ -37,9 +37,7 @@ #include #include #include -#if( PMACC_CUDA_ENABLED == 1 ) #include -#endif #include #include @@ -53,162 +51,159 @@ namespace picongpu { - -namespace adios -{ -using namespace pmacc; - -/** Write copy particle to host memory and dump to ADIOS file - * - * @tparam T_Species type of species - * - */ -template< typename T_SpeciesFilter > -struct WriteSpecies -{ -public: - - typedef typename T_SpeciesFilter::Species ThisSpecies; - typedef typename ThisSpecies::FrameType FrameType; - typedef typename FrameType::ParticleDescription ParticleDescription; - typedef typename FrameType::ValueTypeSeq ParticleAttributeList; - - /* delete multiMask and localCellIdx in adios particle*/ - typedef bmpl::vector TypesToDelete; - typedef typename RemoveFromSeq::type ParticleCleanedAttributeList; - - /* add totalCellIdx for adios particle*/ - typedef typename MakeSeq< - ParticleCleanedAttributeList, - totalCellIdx - >::type ParticleNewAttributeList; - - typedef - typename ReplaceValueTypeSeq::type - NewParticleDescription; - - typedef Frame AdiosFrameType; - - template - HINLINE void operator()(ThreadParams* params, - const Space particleOffset) + namespace adios { - log ("ADIOS: (begin) write species: %1%") % T_SpeciesFilter::getName(); - DataConnector &dc = Environment<>::get().DataConnector(); - /* load particle without copy particle data to host */ - auto speciesTmp = dc.get< ThisSpecies >( ThisSpecies::FrameType::getName(), true ); - - /* count total number of particles on the device */ - log ("ADIOS: (begin) count particles: %1%") % T_SpeciesFilter::getName(); - // enforce that the filter interface is fulfilled - particles::filter::IUnary< typename T_SpeciesFilter::Filter > particleFilter{ params->currentStep }; - uint64_cu totalNumParticles = 0; - totalNumParticles = pmacc::CountParticles::countOnDevice < CORE + BORDER > ( - *speciesTmp, - *(params->cellDescription), - params->localWindowToDomainOffset, - params->window.localDimensions.size, - particleFilter); - log ("ADIOS: ( end ) count particles: %1% = %2%") % T_SpeciesFilter::getName() % totalNumParticles; - - AdiosFrameType hostFrame; - - /* malloc host memory */ - log ("ADIOS: (begin) malloc host memory: %1%") % T_SpeciesFilter::getName(); - meta::ForEach > mallocMem; - mallocMem(hostFrame, totalNumParticles); - log ("ADIOS: ( end ) malloc host memory: %1%") % T_SpeciesFilter::getName(); - - if (totalNumParticles > 0) + using namespace pmacc; + + /** Write copy particle to host memory and dump to ADIOS file + * + * @tparam T_Species type of species + * + */ + template + struct WriteSpecies { - log ("ADIOS: (begin) copy particle host (with hierarchy) to host (without hierarchy): %1%") % T_SpeciesFilter::getName(); - typedef bmpl::vector< typename GetPositionFilter::type > usedFilters; - typedef typename FilterFactory::FilterType MyParticleFilter; - MyParticleFilter filter; - /* activate filter pipeline if moving window is activated */ - filter.setStatus(MovingWindow::getInstance().isEnabled()); - filter.setWindowPosition(params->localWindowToDomainOffset, - params->window.localDimensions.size); - - DataConnector &dc = Environment<>::get().DataConnector(); -#if( PMACC_CUDA_ENABLED == 1 ) - auto mallocMCBuffer = dc.get< MallocMCBuffer< DeviceHeap > >( MallocMCBuffer< DeviceHeap >::getName(), true ); -#endif - int globalParticleOffset = 0; - AreaMapping < CORE + BORDER, MappingDesc > mapper(*(params->cellDescription)); - - pmacc::particles::operations::ConcatListOfFrames concatListOfFrames(mapper.getGridDim()); - -#if( PMACC_CUDA_ENABLED == 1 ) - auto particlesBox = speciesTmp->getHostParticlesBox( mallocMCBuffer->getOffset() ); + public: + typedef typename T_SpeciesFilter::Species ThisSpecies; + typedef typename ThisSpecies::FrameType FrameType; + typedef typename FrameType::ParticleDescription ParticleDescription; + typedef typename FrameType::ValueTypeSeq ParticleAttributeList; + + /* delete multiMask and localCellIdx in adios particle*/ + typedef bmpl::vector TypesToDelete; + typedef typename RemoveFromSeq::type ParticleCleanedAttributeList; + + /* add totalCellIdx for adios particle*/ + typedef typename MakeSeq::type ParticleNewAttributeList; + + typedef typename ReplaceValueTypeSeq::type + NewParticleDescription; + + typedef Frame AdiosFrameType; + + template + HINLINE void operator()(ThreadParams* params, const Space particleOffset) + { + log("ADIOS: (begin) write species: %1%") % T_SpeciesFilter::getName(); + DataConnector& dc = Environment<>::get().DataConnector(); + /* load particle without copy particle data to host */ + auto speciesTmp = dc.get(ThisSpecies::FrameType::getName(), true); + + /* count total number of particles on the device */ + log("ADIOS: (begin) count particles: %1%") % T_SpeciesFilter::getName(); + // enforce that the filter interface is fulfilled + particles::filter::IUnary particleFilter{params->currentStep}; + uint64_cu totalNumParticles = 0; + totalNumParticles = pmacc::CountParticles::countOnDevice( + *speciesTmp, + *(params->cellDescription), + params->localWindowToDomainOffset, + params->window.localDimensions.size, + particleFilter); + log("ADIOS: ( end ) count particles: %1% = %2%") % T_SpeciesFilter::getName() + % totalNumParticles; + + AdiosFrameType hostFrame; + + /* malloc host memory */ + log("ADIOS: (begin) malloc host memory: %1%") % T_SpeciesFilter::getName(); + meta::ForEach> mallocMem; + mallocMem(hostFrame, totalNumParticles); + log("ADIOS: ( end ) malloc host memory: %1%") % T_SpeciesFilter::getName(); + + if(totalNumParticles > 0) + { + log( + "ADIOS: (begin) copy particle host (with hierarchy) to host (without hierarchy): %1%") + % T_SpeciesFilter::getName(); + typedef bmpl::vector::type> usedFilters; + typedef typename FilterFactory::FilterType MyParticleFilter; + MyParticleFilter filter; + /* activate filter pipeline if moving window is activated */ + filter.setStatus(MovingWindow::getInstance().isEnabled()); + filter.setWindowPosition(params->localWindowToDomainOffset, params->window.localDimensions.size); + + DataConnector& dc = Environment<>::get().DataConnector(); + + auto mallocMCBuffer + = dc.get>(MallocMCBuffer::getName(), true); + + int globalParticleOffset = 0; + AreaMapping mapper(*(params->cellDescription)); + + pmacc::particles::operations::ConcatListOfFrames concatListOfFrames(mapper.getGridDim()); + +#if(PMACC_CUDA_ENABLED == 1 || ALPAKA_ACC_GPU_HIP_ENABLED == 1) + auto particlesBox = speciesTmp->getHostParticlesBox(mallocMCBuffer->getOffset()); #else - /* This separate code path is only a workaround until MallocMCBuffer - * is alpaka compatible. - * - * @todo remove this workaround: we know that we are allowed to access the - * device memory directly. - */ - auto particlesBox = speciesTmp->getDeviceParticlesBox( ); - /* Notify to the event system that the particles box is used on the host. - * - * @todo remove this workaround - */ - __startOperation(ITask::TASK_HOST); + /* This separate code path is only a workaround until MallocMCBuffer + * is alpaka compatible. + * + * @todo remove this workaround: we know that we are allowed to access the + * device memory directly. + */ + auto particlesBox = speciesTmp->getDeviceParticlesBox(); + /* Notify to the event system that the particles box is used on the host. + * + * @todo remove this workaround + */ + __startOperation(ITask::TASK_HOST); #endif - concatListOfFrames( - globalParticleOffset, - hostFrame, - particlesBox, - filter, - particleOffset, /*relative to data domain (not to physical domain)*/ - totalCellIdx_, - mapper, - particleFilter - ); -#if( PMACC_CUDA_ENABLED == 1 ) - dc.releaseData( MallocMCBuffer< DeviceHeap >::getName() ); -#endif - /* this costs a little bit of time but adios writing is slower */ - PMACC_ASSERT((uint64_cu) globalParticleOffset == totalNumParticles); - } - /* dump to adios file */ - meta::ForEach > writeToAdios; - writeToAdios(params, hostFrame, totalNumParticles); - - /* free host memory */ - meta::ForEach > freeMem; - freeMem(hostFrame); - log ("ADIOS: ( end ) writing species: %1%") % T_SpeciesFilter::getName(); - - /* write species counter table to adios file */ - log ("ADIOS: (begin) writing particle index table for %1%") % T_SpeciesFilter::getName(); - { - GridController& gc = Environment::get().GridController(); - - const size_t pos_offset = 2; - - /* particlesMetaInfo = (num particles, scalar position, particle offset x, y, z) */ - uint64_t particlesMetaInfo[5] = {totalNumParticles, gc.getScalarPosition(), 0, 0, 0}; - for (size_t d = 0; d < simDim; ++d) - particlesMetaInfo[pos_offset + d] = particleOffset[d]; - - /* prevent that top (y) gpus have negative value here */ - if (gc.getPosition().y() == 0) - particlesMetaInfo[pos_offset + 1] = 0; - - if (particleOffset[1] < 0) // 1 == y - particlesMetaInfo[pos_offset + 1] = 0; - - int64_t adiosIndexVarId = *(params->adiosSpeciesIndexVarIds.begin()); - params->adiosSpeciesIndexVarIds.pop_front(); - ADIOS_CMD(adios_write_byid(params->adiosFileHandle, adiosIndexVarId, particlesMetaInfo)); - } - log ("ADIOS: ( end ) writing particle index table for %1%") % T_SpeciesFilter::getName(); - } -}; - - -} //namspace adios - -} //namespace picongpu + concatListOfFrames( + globalParticleOffset, + hostFrame, + particlesBox, + filter, + particleOffset, /*relative to data domain (not to physical domain)*/ + totalCellIdx_, + mapper, + particleFilter); + + dc.releaseData(MallocMCBuffer::getName()); + + /* this costs a little bit of time but adios writing is slower */ + PMACC_ASSERT((uint64_cu) globalParticleOffset == totalNumParticles); + } + /* dump to adios file */ + meta::ForEach> writeToAdios; + writeToAdios(params, hostFrame, totalNumParticles); + + /* free host memory */ + meta::ForEach> freeMem; + freeMem(hostFrame); + log("ADIOS: ( end ) writing species: %1%") % T_SpeciesFilter::getName(); + + /* write species counter table to adios file */ + log("ADIOS: (begin) writing particle index table for %1%") + % T_SpeciesFilter::getName(); + { + GridController& gc = Environment::get().GridController(); + + const size_t pos_offset = 2; + + /* particlesMetaInfo = (num particles, scalar position, particle offset x, y, z) */ + uint64_t particlesMetaInfo[5] = {totalNumParticles, gc.getScalarPosition(), 0, 0, 0}; + for(size_t d = 0; d < simDim; ++d) + particlesMetaInfo[pos_offset + d] = particleOffset[d]; + + /* prevent that top (y) gpus have negative value here */ + if(gc.getPosition().y() == 0) + particlesMetaInfo[pos_offset + 1] = 0; + + if(particleOffset[1] < 0) // 1 == y + particlesMetaInfo[pos_offset + 1] = 0; + + int64_t adiosIndexVarId = *(params->adiosSpeciesIndexVarIds.begin()); + params->adiosSpeciesIndexVarIds.pop_front(); + ADIOS_CMD(adios_write_byid(params->adiosFileHandle, adiosIndexVarId, particlesMetaInfo)); + } + log("ADIOS: ( end ) writing particle index table for %1%") + % T_SpeciesFilter::getName(); + } + }; + + + } // namespace adios + +} // namespace picongpu diff --git a/include/picongpu/plugins/adios/restart/LoadParticleAttributesFromADIOS.hpp b/include/picongpu/plugins/adios/restart/LoadParticleAttributesFromADIOS.hpp index 91bd349f94..21f21f03b5 100644 --- a/include/picongpu/plugins/adios/restart/LoadParticleAttributesFromADIOS.hpp +++ b/include/picongpu/plugins/adios/restart/LoadParticleAttributesFromADIOS.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Rene Widera * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once @@ -34,110 +33,106 @@ namespace picongpu { - -namespace adios -{ -using namespace pmacc; - -/** Load attribute of a species from ADIOS checkpoint file - * - * @tparam T_Identifier identifier of species attribute - */ -template< typename T_Identifier> -struct LoadParticleAttributesFromADIOS -{ - - /** read attributes from ADIOS file - * - * @param params thread params with ADIOS_FILE, ... - * @param frame frame with all particles - * @param particlePath path to the group in the ADIOS file - * @param particlesOffset read offset in the attribute array - * @param elements number of elements which should be read the attribute array - */ - template - HINLINE void operator()( - ThreadParams* params, - FrameType& frame, - const std::string particlePath, - const uint64_t particlesOffset, - const uint64_t elements) + namespace adios { - - typedef T_Identifier Identifier; - typedef typename pmacc::traits::Resolve::type::type ValueType; - const uint32_t components = GetNComponents::value; - typedef typename GetComponentsType::type ComponentType; - - log ("ADIOS: ( begin ) load species attribute: %1%") % Identifier::getName(); - - const auto componentNames = plugins::misc::getComponentNames( components ); - - ComponentType* tmpArray = nullptr; - if( elements > 0 ) - tmpArray = new ComponentType[elements]; - - // dev assert! - if( elements > 0 ) - PMACC_ASSERT(tmpArray); - - for (uint32_t n = 0; n < components; ++n) + using namespace pmacc; + + /** Load attribute of a species from ADIOS checkpoint file + * + * @tparam T_Identifier identifier of species attribute + */ + template + struct LoadParticleAttributesFromADIOS { - OpenPMDName openPMDName; - std::stringstream datasetName; - datasetName << particlePath << openPMDName(); - if (components > 1) - datasetName << "/" << componentNames[n]; - - ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer(); - - ADIOS_VARINFO* varInfo = adios_inq_var( params->fp, datasetName.str().c_str() ); - // it's possible to aquire the local block with that call again and - // the local elements to-be-read, but the block-ID must be known (MPI rank?) - //ADIOS_CMD(adios_inq_var_blockinfo( params->fp, varInfo )); - - ADIOS_SELECTION* sel = adios_selection_boundingbox( 1, &particlesOffset, &elements ); - - /** Note: adios_schedule_read is not a collective call in any - * ADIOS method and can therefore be skipped for empty reads */ - if( elements > 0 ) + /** read attributes from ADIOS file + * + * @param params thread params with ADIOS_FILE, ... + * @param frame frame with all particles + * @param particlePath path to the group in the ADIOS file + * @param particlesOffset read offset in the attribute array + * @param elements number of elements which should be read the attribute array + */ + template + HINLINE void operator()( + ThreadParams* params, + FrameType& frame, + const std::string particlePath, + const uint64_t particlesOffset, + const uint64_t elements) { - // avoid deadlock between not finished pmacc tasks and mpi calls in adios - __getTransactionEvent().waitForFinished(); - ADIOS_CMD(adios_schedule_read( params->fp, - sel, - datasetName.str().c_str(), - 0, /* from_step (not used in streams) */ - 1, /* nsteps to read (must be 1 for stream) */ - (void*)tmpArray )); + typedef T_Identifier Identifier; + typedef typename pmacc::traits::Resolve::type::type ValueType; + const uint32_t components = GetNComponents::value; + typedef typename GetComponentsType::type ComponentType; + + log("ADIOS: ( begin ) load species attribute: %1%") % Identifier::getName(); + + const auto componentNames = plugins::misc::getComponentNames(components); + + ComponentType* tmpArray = nullptr; + if(elements > 0) + tmpArray = new ComponentType[elements]; + + // dev assert! + if(elements > 0) + PMACC_ASSERT(tmpArray); + + for(uint32_t n = 0; n < components; ++n) + { + OpenPMDName openPMDName; + std::stringstream datasetName; + datasetName << particlePath << openPMDName(); + if(components > 1) + datasetName << "/" << componentNames[n]; + + ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer(); + + ADIOS_VARINFO* varInfo = adios_inq_var(params->fp, datasetName.str().c_str()); + // it's possible to aquire the local block with that call again and + // the local elements to-be-read, but the block-ID must be known (MPI rank?) + // ADIOS_CMD(adios_inq_var_blockinfo( params->fp, varInfo )); + + ADIOS_SELECTION* sel = adios_selection_boundingbox(1, &particlesOffset, &elements); + + /** Note: adios_schedule_read is not a collective call in any + * ADIOS method and can therefore be skipped for empty reads */ + if(elements > 0) + { + // avoid deadlock between not finished pmacc tasks and mpi calls in adios + __getTransactionEvent().waitForFinished(); + ADIOS_CMD(adios_schedule_read( + params->fp, + sel, + datasetName.str().c_str(), + 0, /* from_step (not used in streams) */ + 1, /* nsteps to read (must be 1 for stream) */ + (void*) tmpArray)); + } + + /** start a blocking read of all scheduled variables + * (this is collective call in many ADIOS methods) */ + ADIOS_CMD(adios_perform_reads(params->fp, 1)); + + log("ADIOS: Did read %1% local of %2% global elements for %3%") % elements + % varInfo->dims[0] % datasetName.str(); + +/* copy component from temporary array to array of structs */ +#pragma omp parallel for + for(size_t i = 0; i < elements; ++i) + { + ComponentType& ref = ((ComponentType*) dataPtr)[i * components + n]; + ref = tmpArray[i]; + } + + adios_selection_delete(sel); + adios_free_varinfo(varInfo); + } + __deleteArray(tmpArray); + + log("ADIOS: ( end ) load species attribute: %1%") % Identifier::getName(); } + }; - /** start a blocking read of all scheduled variables - * (this is collective call in many ADIOS methods) */ - ADIOS_CMD(adios_perform_reads( params->fp, 1 )); - - log ("ADIOS: Did read %1% local of %2% global elements for %3%") % - elements % varInfo->dims[0] % datasetName.str(); - - /* copy component from temporary array to array of structs */ - #pragma omp parallel for - for (size_t i = 0; i < elements; ++i) - { - ComponentType& ref = ((ComponentType*) dataPtr)[i * components + n]; - ref = tmpArray[i]; - } - - adios_selection_delete( sel ); - adios_free_varinfo( varInfo ); - } - __deleteArray(tmpArray); - - log ("ADIOS: ( end ) load species attribute: %1%") % - Identifier::getName(); - } - -}; - -} /* namespace adios */ + } /* namespace adios */ } /* namespace picongpu */ diff --git a/include/picongpu/plugins/adios/restart/LoadSpecies.hpp b/include/picongpu/plugins/adios/restart/LoadSpecies.hpp index 52a2fad1ca..fc21cba1a2 100644 --- a/include/picongpu/plugins/adios/restart/LoadSpecies.hpp +++ b/include/picongpu/plugins/adios/restart/LoadSpecies.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Axel Huebl +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Axel Huebl * * This file is part of PIConGPU. * @@ -47,151 +47,151 @@ namespace picongpu { - -namespace adios -{ -using namespace pmacc; - -/** Load species from ADIOS checkpoint file - * - * @tparam T_Species type of species - * - */ -template< typename T_Species > -struct LoadSpecies -{ -public: - - typedef T_Species ThisSpecies; - typedef typename ThisSpecies::FrameType FrameType; - typedef typename FrameType::ParticleDescription ParticleDescription; - typedef typename FrameType::ValueTypeSeq ParticleAttributeList; - - - /* delete multiMask and localCellIdx in adios particle*/ - typedef bmpl::vector2 TypesToDelete; - typedef typename RemoveFromSeq::type ParticleCleanedAttributeList; - - /* add totalCellIdx for adios particle*/ - typedef typename MakeSeq< - ParticleCleanedAttributeList, - totalCellIdx - >::type ParticleNewAttributeList; - - typedef - typename ReplaceValueTypeSeq::type - NewParticleDescription; - - typedef Frame AdiosFrameType; - - /** Load species from ADIOS checkpoint file - * - * @param params thread params with ADIOS_FILE, ... - * @param restartChunkSize number of particles processed in one kernel call - */ - HINLINE void operator()(ThreadParams* params, const uint32_t restartChunkSize) + namespace adios { - std::string const speciesName = FrameType::getName(); - log ("ADIOS: (begin) load species: %1%") % speciesName; - DataConnector &dc = Environment<>::get().DataConnector(); - GridController &gc = Environment::get().GridController(); - - std::string particlePath = params->adiosBasePath + std::string(ADIOS_PATH_PARTICLES) + - speciesName + std::string("/"); - const pmacc::Selection& localDomain = Environment::get().SubGrid().getLocalDomain(); - - /* load particle without copying particle data to host */ - auto speciesTmp = dc.get< ThisSpecies >( FrameType::getName(), true ); - - /* count total number of particles on the device */ - uint64_t totalNumParticles = 0; - - /* load particles info table entry for ONE process - (note: this is NOT necessarily THIS process!) - particlesInfo is (part-count, scalar pos, x, y, z) */ - uint64_t particlesInfo[5]; - - uint64_t start = 5 * gc.getGlobalRank(); - uint64_t count = 5; // ADIOSCountParticles: uint64_t - ADIOS_SELECTION* piSel = adios_selection_boundingbox( 1, &start, &count ); - - // avoid deadlock between not finished pmacc tasks and mpi calls in adios - __getTransactionEvent().waitForFinished(); - ADIOS_CMD(adios_schedule_read( params->fp, - piSel, - (particlePath + std::string("particles_info")).c_str(), - 0, - 1, - (void*)particlesInfo )); - - /* start a blocking read of all scheduled variables */ - ADIOS_CMD(adios_perform_reads( params->fp, 1 )); - adios_selection_delete(piSel); - - /* Run a prefix sum over the numParticles[0] element in particlesInfo - * to retreive the offset of particles before gc.getGlobalRank() */ - uint64_t particleOffset = 0; - - uint64_t fullParticlesInfo[gc.getGlobalSize()]; - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Allgather( particlesInfo, 1, MPI_UINT64_T, - fullParticlesInfo, 1, MPI_UINT64_T, - gc.getCommunicator().getMPIComm() )); - - for (size_t i = 0; i < gc.getGlobalSize(); ++i) - { - /* this comparison is potentially harmful, since the order of ranks - is not necessarily the same in subsequent MPI jobs. - But due to the wrong sorting by rank in `ADIOSCountParticles.hpp` - while calculating the `myParticleOffset` we have to immitate that. */ - if( i < gc.getGlobalRank() ) - particleOffset += fullParticlesInfo[i]; - if( i == gc.getGlobalRank() ) - totalNumParticles = fullParticlesInfo[i]; - } - - log ("ADIOS: Loading %1% particles from offset %2%") % - (long long unsigned) totalNumParticles % (long long unsigned) particleOffset; - - AdiosFrameType hostFrame; - log ("ADIOS: malloc mapped memory: %1%") % speciesName; - /*malloc mapped memory*/ - meta::ForEach > mallocMem; - mallocMem(hostFrame, totalNumParticles); - - log ("ADIOS: get mapped memory device pointer: %1%") % speciesName; - /*load device pointer of mapped memory*/ - AdiosFrameType deviceFrame; - meta::ForEach > getDevicePtr; - getDevicePtr(deviceFrame, hostFrame); - - meta::ForEach > loadAttributes; - loadAttributes(params, hostFrame, particlePath, particleOffset, totalNumParticles); - - if (totalNumParticles != 0) + using namespace pmacc; + + /** Load species from ADIOS checkpoint file + * + * @tparam T_Species type of species + * + */ + template + struct LoadSpecies { - pmacc::particles::operations::splitIntoListOfFrames( - *speciesTmp, - deviceFrame, - totalNumParticles, - restartChunkSize, - localDomain.offset, - totalCellIdx_, - *(params->cellDescription), - picLog::INPUT_OUTPUT() - ); - - /*free host memory*/ - meta::ForEach > freeMem; - freeMem(hostFrame); - } - log ("ADIOS: ( end ) load species: %1%") % speciesName; - } -}; - - -} /* namespace adios */ + public: + typedef T_Species ThisSpecies; + typedef typename ThisSpecies::FrameType FrameType; + typedef typename FrameType::ParticleDescription ParticleDescription; + typedef typename FrameType::ValueTypeSeq ParticleAttributeList; + + + /* delete multiMask and localCellIdx in adios particle*/ + typedef bmpl::vector2 TypesToDelete; + typedef typename RemoveFromSeq::type ParticleCleanedAttributeList; + + /* add totalCellIdx for adios particle*/ + typedef typename MakeSeq::type ParticleNewAttributeList; + + typedef typename ReplaceValueTypeSeq::type + NewParticleDescription; + + typedef Frame AdiosFrameType; + + /** Load species from ADIOS checkpoint file + * + * @param params thread params with ADIOS_FILE, ... + * @param restartChunkSize number of particles processed in one kernel call + */ + HINLINE void operator()(ThreadParams* params, const uint32_t restartChunkSize) + { + std::string const speciesName = FrameType::getName(); + log("ADIOS: (begin) load species: %1%") % speciesName; + DataConnector& dc = Environment<>::get().DataConnector(); + GridController& gc = Environment::get().GridController(); + + std::string particlePath + = params->adiosBasePath + std::string(ADIOS_PATH_PARTICLES) + speciesName + std::string("/"); + const pmacc::Selection localDomain = Environment::get().SubGrid().getLocalDomain(); + + /* load particle without copying particle data to host */ + auto speciesTmp = dc.get(FrameType::getName(), true); + + /* count total number of particles on the device */ + uint64_t totalNumParticles = 0; + + /* load particles info table entry for ONE process + (note: this is NOT necessarily THIS process!) + particlesInfo is (part-count, scalar pos, x, y, z) */ + uint64_t particlesInfo[5]; + + uint64_t start = 5 * gc.getGlobalRank(); + uint64_t count = 5; // ADIOSCountParticles: uint64_t + ADIOS_SELECTION* piSel = adios_selection_boundingbox(1, &start, &count); + + // avoid deadlock between not finished pmacc tasks and mpi calls in adios + __getTransactionEvent().waitForFinished(); + ADIOS_CMD(adios_schedule_read( + params->fp, + piSel, + (particlePath + std::string("particles_info")).c_str(), + 0, + 1, + (void*) particlesInfo)); + + /* start a blocking read of all scheduled variables */ + ADIOS_CMD(adios_perform_reads(params->fp, 1)); + adios_selection_delete(piSel); + + /* Run a prefix sum over the numParticles[0] element in particlesInfo + * to retreive the offset of particles before gc.getGlobalRank() */ + uint64_t particleOffset = 0; + + uint64_t fullParticlesInfo[gc.getGlobalSize()]; + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather( + particlesInfo, + 1, + MPI_UINT64_T, + fullParticlesInfo, + 1, + MPI_UINT64_T, + gc.getCommunicator().getMPIComm())); + + for(size_t i = 0; i < gc.getGlobalSize(); ++i) + { + /* this comparison is potentially harmful, since the order of ranks + is not necessarily the same in subsequent MPI jobs. + But due to the wrong sorting by rank in `ADIOSCountParticles.hpp` + while calculating the `myParticleOffset` we have to immitate that. */ + if(i < gc.getGlobalRank()) + particleOffset += fullParticlesInfo[i]; + if(i == gc.getGlobalRank()) + totalNumParticles = fullParticlesInfo[i]; + } + + log("ADIOS: Loading %1% particles from offset %2%") + % (long long unsigned) totalNumParticles % (long long unsigned) particleOffset; + + AdiosFrameType hostFrame; + log("ADIOS: malloc mapped memory: %1%") % speciesName; + /*malloc mapped memory*/ + meta::ForEach> mallocMem; + mallocMem(hostFrame, totalNumParticles); + + log("ADIOS: get mapped memory device pointer: %1%") % speciesName; + /*load device pointer of mapped memory*/ + AdiosFrameType deviceFrame; + meta::ForEach> getDevicePtr; + getDevicePtr(deviceFrame, hostFrame); + + meta::ForEach> + loadAttributes; + loadAttributes(params, hostFrame, particlePath, particleOffset, totalNumParticles); + + if(totalNumParticles != 0) + { + pmacc::particles::operations::splitIntoListOfFrames( + *speciesTmp, + deviceFrame, + totalNumParticles, + restartChunkSize, + localDomain.offset, + totalCellIdx_, + *(params->cellDescription), + picLog::INPUT_OUTPUT()); + + /*free host memory*/ + meta::ForEach> freeMem; + freeMem(hostFrame); + } + log("ADIOS: ( end ) load species: %1%") % speciesName; + } + }; + + + } /* namespace adios */ } /* namespace picongpu */ diff --git a/include/picongpu/plugins/adios/restart/ReadAttribute.hpp b/include/picongpu/plugins/adios/restart/ReadAttribute.hpp index b74668467b..e7997c8b45 100644 --- a/include/picongpu/plugins/adios/restart/ReadAttribute.hpp +++ b/include/picongpu/plugins/adios/restart/ReadAttribute.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Grund +/* Copyright 2016-2021 Alexander Grund * * This file is part of PIConGPU. * @@ -28,45 +28,42 @@ #include #include -namespace picongpu { -namespace adios { - - /** - * Read an attribute from an open ADIOS file, check that its type is correct and return it - * - * @param fp Open ADIOS file handle - * @param basePath Path where the attribute is located in the file (with or w/o trailing slash) - * @param attrName Name of the attribute. Used for status output and concatenated with basePath - * @retval Attribute value - */ - template - T_Attribute readAttribute(ADIOS_FILE* fp, const std::string& basePath, const std::string& attrName) +namespace picongpu +{ + namespace adios { - // Build full path - std::string attrPath = basePath; - if(!attrPath.empty() && attrPath[attrPath.size() - 1] != '/') - attrPath += '/'; - attrPath += attrName; - // Actually read the data - enum ADIOS_DATATYPES attrType; - int attrSize; - T_Attribute* attrValuePtr; - ADIOS_CMD( adios_get_attr(fp, - attrPath.c_str(), - &attrType, - &attrSize, - (void**) &attrValuePtr) ); - // Sanity checks - if(attrType != traits::PICToAdios().type) - throw std::runtime_error(std::string("Invalid type of ADIOS attribute ") + attrName); - if(attrSize != sizeof(T_Attribute)) - throw std::runtime_error(std::string("Invalid size of ADIOS attribute ") + attrName); + /** + * Read an attribute from an open ADIOS file, check that its type is correct and return it + * + * @param fp Open ADIOS file handle + * @param basePath Path where the attribute is located in the file (with or w/o trailing slash) + * @param attrName Name of the attribute. Used for status output and concatenated with basePath + * @retval Attribute value + */ + template + T_Attribute readAttribute(ADIOS_FILE* fp, const std::string& basePath, const std::string& attrName) + { + // Build full path + std::string attrPath = basePath; + if(!attrPath.empty() && attrPath[attrPath.size() - 1] != '/') + attrPath += '/'; + attrPath += attrName; + // Actually read the data + enum ADIOS_DATATYPES attrType; + int attrSize; + T_Attribute* attrValuePtr; + ADIOS_CMD(adios_get_attr(fp, attrPath.c_str(), &attrType, &attrSize, (void**) &attrValuePtr)); + // Sanity checks + if(attrType != traits::PICToAdios().type) + throw std::runtime_error(std::string("Invalid type of ADIOS attribute ") + attrName); + if(attrSize != sizeof(T_Attribute)) + throw std::runtime_error(std::string("Invalid size of ADIOS attribute ") + attrName); - T_Attribute attribute = *attrValuePtr; - __delete(attrValuePtr); - log ("ADIOS: value of %1% = %2%") % attrName % attribute; - return attribute; - } + T_Attribute attribute = *attrValuePtr; + __delete(attrValuePtr); + log("ADIOS: value of %1% = %2%") % attrName % attribute; + return attribute; + } -} // namespace adios -} // namespace picongpu + } // namespace adios +} // namespace picongpu diff --git a/include/picongpu/plugins/adios/restart/RestartFieldLoader.hpp b/include/picongpu/plugins/adios/restart/RestartFieldLoader.hpp index 8cad0b32b9..3f57817b15 100644 --- a/include/picongpu/plugins/adios/restart/RestartFieldLoader.hpp +++ b/include/picongpu/plugins/adios/restart/RestartFieldLoader.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2014-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera * Benjamin Worpitz, Sergei Bastrakov * * This file is part of PIConGPU. @@ -26,10 +26,12 @@ #include "picongpu/plugins/misc/ComponentNames.hpp" #include "picongpu/traits/IsFieldDomainBound.hpp" +#include #include #include #include #include +#include #include "picongpu/simulation/control/MovingWindow.hpp" #include @@ -43,187 +45,212 @@ namespace picongpu { - -namespace adios -{ - -/** - * Helper class for ADIOS plugin to load fields from parallel ADIOS BP files. - */ -class RestartFieldLoader -{ -public: - template - static void loadField( - Data& field, - const uint32_t numComponents, - std::string objectName, - ThreadParams *params, - const bool isDomainBound - ) + namespace adios { - log ("Begin loading field '%1%'") % objectName; - - const auto componentNames = plugins::misc::getComponentNames( numComponents ); - const DataSpace field_guard = field.getGridLayout().getGuard(); - - const pmacc::Selection& localDomain = Environment::get().SubGrid().getLocalDomain(); - - using ValueType = typename Data::ValueType; - field.getHostBuffer().setValue(ValueType::create(0.0)); - - DataSpace domain_offset = localDomain.offset; - DataSpace local_domain_size = params->window.localDimensions.size; - bool useLinearIdxAsDestination = false; - - /* Patch for non-domain-bound fields - * This is an ugly fix to allow output of reduced 1d PML buffers, - * that are the same size on each domain. - * This code is to be replaced with the openPMD output plugin soon. - */ - if( !isDomainBound ) - { - auto const field_layout = params->gridLayout; - auto const field_no_guard = field_layout.getDataSpaceWithoutGuarding(); - auto const elementCount = field_no_guard.productOfComponents(); - auto const & gridController = Environment::get().GridController(); - auto const rank = gridController.getGlobalRank(); - domain_offset = DataSpace::create( 0 ); - domain_offset[ 0 ] = rank * elementCount; - local_domain_size = DataSpace::create( 1 ); - local_domain_size[ 0 ] = elementCount; - useLinearIdxAsDestination = true; - } - - auto destBox = field.getHostBuffer().getDataBox(); - for (uint32_t n = 0; n < numComponents; ++n) + /** + * Helper class for ADIOS plugin to load fields from parallel ADIOS BP files. + */ + class RestartFieldLoader { - // Read the subdomain which belongs to our mpi position. - // The total grid size must match the grid size of the stored data. - log ("ADIOS: Read from domain: offset=%1% size=%2%") % - domain_offset % local_domain_size; - - std::stringstream datasetName; - datasetName << params->adiosBasePath << ADIOS_PATH_FIELDS << objectName; - if (numComponents > 1) - datasetName << "/" << componentNames[n]; - - log ("ADIOS: Read from field '%1%'") % - datasetName.str(); - - ADIOS_VARINFO* varInfo = adios_inq_var( params->fp, datasetName.str().c_str() ); - if( varInfo == nullptr ) + public: + template + static void loadField( + Data& field, + const uint32_t numComponents, + std::string objectName, + ThreadParams* params, + const bool isDomainBound) { - std::string errMsg( adios_errmsg() ); - if( errMsg.empty() ) errMsg = '\n'; - std::stringstream s; - s << "ADIOS: error at adios_inq_var '" - << "' (" << adios_errno << ") in " - << __FILE__ << ":" << __LINE__ << " " << errMsg; - throw std::runtime_error(s.str()); - } - uint64_t start[varInfo->ndim]; - uint64_t count[varInfo->ndim]; - for(int d = 0; d < varInfo->ndim; ++d) - { - /* \see adios_define_var: z,y,x in C-order */ - start[d] = domain_offset.revert()[d]; - count[d] = local_domain_size.revert()[d]; - } - - ADIOS_SELECTION* fSel = adios_selection_boundingbox( varInfo->ndim, start, count ); + log("Begin loading field '%1%'") % objectName; - /* specify what we want to read, but start reading at below at - * `adios_perform_reads` */ - log ("ADIOS: Allocate %1% elements") % - local_domain_size.productOfComponents(); + const auto componentNames = plugins::misc::getComponentNames(numComponents); + const DataSpace field_guard = field.getGridLayout().getGuard(); - /// \todo float_X should be some kind of gridBuffer's GetComponentsType::type - float_X* field_container = new float_X[local_domain_size.productOfComponents()]; - /* magic parameters (0, 1): `from_step` (not used in streams), `nsteps` to read (must be 1 for stream) */ - log ("ADIOS: Schedule read from field (%1%, %2%, %3%, %4%)") % - params->fp % fSel % datasetName.str() % (void*)field_container; + const pmacc::Selection localDomain = Environment::get().SubGrid().getLocalDomain(); - // avoid deadlock between not finished pmacc tasks and mpi calls in adios - __getTransactionEvent().waitForFinished(); - ADIOS_CMD(adios_schedule_read( params->fp, fSel, datasetName.str().c_str(), 0, 1, (void*)field_container )); + using ValueType = typename Data::ValueType; + field.getHostBuffer().setValue(ValueType::create(0.0)); - /* start a blocking read of all scheduled variables */ - ADIOS_CMD(adios_perform_reads( params->fp, 1 )); + DataSpace domain_offset = localDomain.offset; + DataSpace local_domain_size = params->window.localDimensions.size; + bool useLinearIdxAsDestination = false; - int const elementCount = local_domain_size.productOfComponents(); - - #pragma omp parallel for - for (int linearId = 0; linearId < elementCount; ++linearId) - { - DataSpace destIdx; - if( useLinearIdxAsDestination ) + /* Patch for non-domain-bound fields + * This is an ugly fix to allow output of reduced 1d PML buffers + */ + if(!isDomainBound) { - destIdx[ 0 ] = linearId; + auto const field_layout = params->gridLayout; + auto const field_no_guard = field_layout.getDataSpaceWithoutGuarding(); + auto const elementCount = field_no_guard.productOfComponents(); + + /* Scan the PML buffer local size along all local domains + * This code is symmetric to one in Field::writeField() + */ + log("ADIOS: (begin) collect PML sizes for %1%") % objectName; + auto& gridController = Environment::get().GridController(); + auto const numRanks = uint64_t{gridController.getGlobalSize()}; + /* Use domain position-based rank, not MPI rank, to be independent + * of the MPI rank assignment scheme + */ + auto const rank = uint64_t{gridController.getScalarPosition()}; + std::vector localSizes(2 * numRanks, 0u); + uint64_t localSizeInfo[2] = {static_cast(elementCount), rank}; + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather( + localSizeInfo, + 2, + MPI_UINT64_T, + &(*localSizes.begin()), + 2, + MPI_UINT64_T, + gridController.getCommunicator().getMPIComm())); + uint64_t domainOffset = 0; + for(uint64_t r = 0; r < numRanks; ++r) + { + if(localSizes.at(2u * r + 1u) < rank) + domainOffset += localSizes.at(2u * r); + } + log("ADIOS: (end) collect PML sizes for %1%") % objectName; + + domain_offset = DataSpace::create(0); + domain_offset[0] = static_cast(domainOffset); + local_domain_size = DataSpace::create(1); + local_domain_size[0] = elementCount; + useLinearIdxAsDestination = true; } - else + + auto destBox = field.getHostBuffer().getDataBox(); + for(uint32_t n = 0; n < numComponents; ++n) { - /* calculate index inside the moving window domain which is located on the local grid*/ - destIdx = DataSpaceOperations::map(params->window.localDimensions.size, linearId); - /* jump over guard and local sliding window offset*/ - destIdx += field_guard + params->localWindowToDomainOffset; + // Read the subdomain which belongs to our mpi position. + // The total grid size must match the grid size of the stored data. + log("ADIOS: Read from domain: offset=%1% size=%2%") % domain_offset + % local_domain_size; + + std::stringstream datasetName; + datasetName << params->adiosBasePath << ADIOS_PATH_FIELDS << objectName; + if(numComponents > 1) + datasetName << "/" << componentNames[n]; + + log("ADIOS: Read from field '%1%'") % datasetName.str(); + + ADIOS_VARINFO* varInfo = adios_inq_var(params->fp, datasetName.str().c_str()); + if(varInfo == nullptr) + { + std::string errMsg(adios_errmsg()); + if(errMsg.empty()) + errMsg = '\n'; + std::stringstream s; + s << "ADIOS: error at adios_inq_var '" + << "' (" << adios_errno << ") in " << __FILE__ << ":" << __LINE__ << " " << errMsg; + throw std::runtime_error(s.str()); + } + uint64_t start[varInfo->ndim]; + uint64_t count[varInfo->ndim]; + for(int d = 0; d < varInfo->ndim; ++d) + { + /* \see adios_define_var: z,y,x in C-order */ + start[d] = domain_offset.revert()[d]; + count[d] = local_domain_size.revert()[d]; + } + + ADIOS_SELECTION* fSel = adios_selection_boundingbox(varInfo->ndim, start, count); + + /* specify what we want to read, but start reading at below at + * `adios_perform_reads` */ + log("ADIOS: Allocate %1% elements") + % local_domain_size.productOfComponents(); + + /// \todo float_X should be some kind of gridBuffer's GetComponentsType::type + float_X* field_container = new float_X[local_domain_size.productOfComponents()]; + /* magic parameters (0, 1): `from_step` (not used in streams), `nsteps` to read (must be 1 for + * stream) */ + log("ADIOS: Schedule read from field (%1%, %2%, %3%, %4%)") % params->fp + % fSel % datasetName.str() % (void*) field_container; + + // avoid deadlock between not finished pmacc tasks and mpi calls in adios + __getTransactionEvent().waitForFinished(); + ADIOS_CMD(adios_schedule_read( + params->fp, + fSel, + datasetName.str().c_str(), + 0, + 1, + (void*) field_container)); + + /* start a blocking read of all scheduled variables */ + ADIOS_CMD(adios_perform_reads(params->fp, 1)); + + int const elementCount = local_domain_size.productOfComponents(); + +#pragma omp parallel for + for(int linearId = 0; linearId < elementCount; ++linearId) + { + DataSpace destIdx; + if(useLinearIdxAsDestination) + { + destIdx[0] = linearId; + } + else + { + /* calculate index inside the moving window domain which is located on the local grid*/ + destIdx = DataSpaceOperations::map(params->window.localDimensions.size, linearId); + /* jump over guard and local sliding window offset*/ + destIdx += field_guard + params->localWindowToDomainOffset; + } + destBox(destIdx)[n] = field_container[linearId]; + } + + __deleteArray(field_container); + adios_selection_delete(fSel); + adios_free_varinfo(varInfo); } - destBox(destIdx)[n] = field_container[linearId]; - } - - __deleteArray(field_container); - adios_selection_delete(fSel); - adios_free_varinfo(varInfo); - } - field.hostToDevice(); + field.hostToDevice(); - __getTransactionEvent().waitForFinished(); + __getTransactionEvent().waitForFinished(); - log ("ADIOS: Read from domain: offset=%1% size=%2%") % - domain_offset % local_domain_size; - log ("ADIOS: Finished loading field '%1%'") % objectName; - } - -}; - -/** - * Helper class for ADIOSWriter (forEach operator) to load a field from ADIOS - * - * @tparam T_Field field class to load - */ -template< typename T_Field > -struct LoadFields -{ -public: - - HDINLINE void operator()(ThreadParams* params) - { + log("ADIOS: Read from domain: offset=%1% size=%2%") % domain_offset + % local_domain_size; + log("ADIOS: Finished loading field '%1%'") % objectName; + } + }; + + /** + * Helper class for ADIOSWriter (forEach operator) to load a field from ADIOS + * + * @tparam T_Field field class to load + */ + template + struct LoadFields + { + public: + HDINLINE void operator()(ThreadParams* params) + { #ifndef __CUDA_ARCH__ - DataConnector &dc = Environment<>::get().DataConnector(); - ThreadParams *tp = params; - - /* load field without copying data to host */ - auto field = dc.get< T_Field >( T_Field::getName(), true ); - tp->gridLayout = field->getGridLayout(); - - /* load from ADIOS */ - bool const isDomainBound = traits::IsFieldDomainBound< T_Field >::value; - RestartFieldLoader::loadField( - field->getGridBuffer(), - (uint32_t)T_Field::numComponents, - T_Field::getName(), - tp, - isDomainBound - ); - - dc.releaseData(T_Field::getName()); + DataConnector& dc = Environment<>::get().DataConnector(); + ThreadParams* tp = params; + + /* load field without copying data to host */ + auto field = dc.get(T_Field::getName(), true); + tp->gridLayout = field->getGridLayout(); + + /* load from ADIOS */ + bool const isDomainBound = traits::IsFieldDomainBound::value; + RestartFieldLoader::loadField( + field->getGridBuffer(), + (uint32_t) T_Field::numComponents, + T_Field::getName(), + tp, + isDomainBound); + + dc.releaseData(T_Field::getName()); #endif - } - -}; + } + }; -using namespace pmacc; + using namespace pmacc; -} /* namespace adios */ + } /* namespace adios */ } /* namespace picongpu */ diff --git a/include/picongpu/plugins/adios/writer/ParticleAttribute.hpp b/include/picongpu/plugins/adios/writer/ParticleAttribute.hpp index 1af0b8f427..3757851bab 100644 --- a/include/picongpu/plugins/adios/writer/ParticleAttribute.hpp +++ b/include/picongpu/plugins/adios/writer/ParticleAttribute.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2014-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -27,69 +27,60 @@ namespace picongpu { - -namespace adios -{ -using namespace pmacc; - -/** write attribute of a particle to adios file - * - * @tparam T_Identifier identifier of a particle attribute - */ -template< typename T_Identifier> -struct ParticleAttribute -{ - - /** write attribute to adios file - * - * @param params wrapped params - * @param elements elements of this attribute - */ - template - HINLINE void operator()( - ThreadParams* params, - FrameType& frame, - const size_t elements) + namespace adios { + using namespace pmacc; + + /** write attribute of a particle to adios file + * + * @tparam T_Identifier identifier of a particle attribute + */ + template + struct ParticleAttribute + { + /** write attribute to adios file + * + * @param params wrapped params + * @param elements elements of this attribute + */ + template + HINLINE void operator()(ThreadParams* params, FrameType& frame, const size_t elements) + { + typedef T_Identifier Identifier; + typedef typename pmacc::traits::Resolve::type::type ValueType; + const uint32_t components = GetNComponents::value; + typedef typename GetComponentsType::type ComponentType; - typedef T_Identifier Identifier; - typedef typename pmacc::traits::Resolve::type::type ValueType; - const uint32_t components = GetNComponents::value; - typedef typename GetComponentsType::type ComponentType; - - log ("ADIOS: (begin) write species attribute: %1%") % Identifier::getName(); - - ComponentType* tmpBfr = nullptr; - - if (elements > 0) - tmpBfr = new ComponentType[elements]; + log("ADIOS: (begin) write species attribute: %1%") % Identifier::getName(); - for (uint32_t d = 0; d < components; d++) - { - ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer(); + ComponentType* tmpBfr = nullptr; - /* copy strided data from source to temporary buffer */ - #pragma omp parallel for - for (size_t i = 0; i < elements; ++i) - { - tmpBfr[i] = ((ComponentType*) dataPtr)[d + i * components]; - } + if(elements > 0) + tmpBfr = new ComponentType[elements]; - int64_t adiosAttributeVarId = *(params->adiosParticleAttrVarIds.begin()); - params->adiosParticleAttrVarIds.pop_front(); + for(uint32_t d = 0; d < components; d++) + { + ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer(); - ADIOS_CMD(adios_write_byid(params->adiosFileHandle, adiosAttributeVarId, tmpBfr)); - } +/* copy strided data from source to temporary buffer */ +#pragma omp parallel for + for(size_t i = 0; i < elements; ++i) + { + tmpBfr[i] = ((ComponentType*) dataPtr)[d + i * components]; + } - __deleteArray(tmpBfr); + int64_t adiosAttributeVarId = *(params->adiosParticleAttrVarIds.begin()); + params->adiosParticleAttrVarIds.pop_front(); - log ("ADIOS: ( end ) write species attribute: %1%") % - Identifier::getName(); - } + ADIOS_CMD(adios_write_byid(params->adiosFileHandle, adiosAttributeVarId, tmpBfr)); + } -}; + __deleteArray(tmpBfr); -} //namspace adios + log("ADIOS: ( end ) write species attribute: %1%") % Identifier::getName(); + } + }; -} //namespace picongpu + } // namespace adios +} // namespace picongpu diff --git a/include/picongpu/plugins/adios/writer/ParticleAttributeSize.hpp b/include/picongpu/plugins/adios/writer/ParticleAttributeSize.hpp index b62bb1f816..63a448300e 100644 --- a/include/picongpu/plugins/adios/writer/ParticleAttributeSize.hpp +++ b/include/picongpu/plugins/adios/writer/ParticleAttributeSize.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Felix Schmitt, Axel Huebl +/* Copyright 2014-2021 Felix Schmitt, Axel Huebl * * This file is part of PIConGPU. * @@ -31,119 +31,133 @@ namespace picongpu { - -namespace adios -{ -using namespace pmacc; - - - -/** collect size of a particle attribute - * - * @tparam T_Identifier identifier of a particle attribute - */ -template< typename T_Identifier> -struct ParticleAttributeSize -{ - /** collect size of attribute - * - * @param params wrapped params - * @param elements number of particles for this attribute - */ - HINLINE void operator()( - ThreadParams* params, - const std::string speciesGroup, - const uint64_t elements, - const uint64_t globalElements, - const uint64_t globalOffset) + namespace adios { + using namespace pmacc; - typedef T_Identifier Identifier; - typedef typename pmacc::traits::Resolve::type::type ValueType; - const uint32_t components = GetNComponents::value; - typedef typename GetComponentsType::type ComponentType; - params->adiosGroupSize += elements * components * sizeof(ComponentType); - - /* define adios var for particle attribute */ - PICToAdios adiosType; - PICToAdios adiosFloatXType; - PICToAdios adiosDoubleType; - PICToAdios adiosUInt32Type; - - const auto componentNames = plugins::misc::getComponentNames( components ); - - OpenPMDName openPMDName; - const std::string recordPath( params->adiosBasePath + - std::string(ADIOS_PATH_PARTICLES) + speciesGroup + openPMDName() ); - - // get the SI scaling, dimensionality and weighting of the attribute - OpenPMDUnit openPMDUnit; - std::vector unit = openPMDUnit(); - OpenPMDUnitDimension openPMDUnitDimension; - std::vector unitDimension = openPMDUnitDimension(); - const bool macroWeightedBool = MacroWeighted::get(); - const uint32_t macroWeighted = (macroWeightedBool ? 1 : 0); - const float_64 weightingPower = WeightingPower::get(); - - PMACC_ASSERT(unit.size() == components); // unitSI for each component - PMACC_ASSERT(unitDimension.size() == 7); // seven openPMD base units - - for (uint32_t d = 0; d < components; d++) + /** collect size of a particle attribute + * + * @tparam T_Identifier identifier of a particle attribute + */ + template + struct ParticleAttributeSize { - std::stringstream datasetName; - datasetName << recordPath; - if (components > 1) - datasetName << "/" << componentNames[d]; - - const char* path = nullptr; - int64_t adiosParticleAttrId = defineAdiosVar( - params->adiosGroupHandle, - datasetName.str().c_str(), - path, - adiosType.type, - pmacc::math::UInt64(elements), - pmacc::math::UInt64(globalElements), - pmacc::math::UInt64(globalOffset), - true, - params->adiosCompression); - - params->adiosParticleAttrVarIds.push_back(adiosParticleAttrId); - - /* already add the unitSI and further attribute so `adios_group_size` - * calculates the reservation for the buffer correctly */ - - /* check if this attribute actually has a unit (unit.size() == 0 is no unit) */ - if (unit.size() >= (d + 1)) - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "unitSI", datasetName.str().c_str(), - adiosDoubleType.type, 1, &unit.at(d) )); - } - - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "unitDimension", recordPath.c_str(), - adiosDoubleType.type, 7, &(*unitDimension.begin()) )); - - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "macroWeighted", recordPath.c_str(), - adiosUInt32Type.type, 1, (void*)¯oWeighted )); - - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "weightingPower", recordPath.c_str(), - adiosDoubleType.type, 1, (void*)&weightingPower )); - - /** \todo check if always correct at this point, depends on attribute - * and MW-solver/pusher implementation */ - const float_X timeOffset = 0.0; - ADIOS_CMD(adios_define_attribute_byvalue(params->adiosGroupHandle, - "timeOffset", recordPath.c_str(), - adiosFloatXType.type, 1, (void*)&timeOffset )); - - } - -}; - -} //namspace adios - -} //namespace picongpu - + /** collect size of attribute + * + * @param params wrapped params + * @param elements number of particles for this attribute + */ + HINLINE void operator()( + ThreadParams* params, + const std::string speciesGroup, + const uint64_t elements, + const uint64_t globalElements, + const uint64_t globalOffset) + { + typedef T_Identifier Identifier; + typedef typename pmacc::traits::Resolve::type::type ValueType; + const uint32_t components = GetNComponents::value; + typedef typename GetComponentsType::type ComponentType; + + params->adiosGroupSize += elements * components * sizeof(ComponentType); + + /* define adios var for particle attribute */ + PICToAdios adiosType; + PICToAdios adiosFloatXType; + PICToAdios adiosDoubleType; + PICToAdios adiosUInt32Type; + + const auto componentNames = plugins::misc::getComponentNames(components); + + OpenPMDName openPMDName; + const std::string recordPath( + params->adiosBasePath + std::string(ADIOS_PATH_PARTICLES) + speciesGroup + openPMDName()); + + // get the SI scaling, dimensionality and weighting of the attribute + OpenPMDUnit openPMDUnit; + std::vector unit = openPMDUnit(); + OpenPMDUnitDimension openPMDUnitDimension; + std::vector unitDimension = openPMDUnitDimension(); + const bool macroWeightedBool = MacroWeighted::get(); + const uint32_t macroWeighted = (macroWeightedBool ? 1 : 0); + const float_64 weightingPower = WeightingPower::get(); + + PMACC_ASSERT(unit.size() == components); // unitSI for each component + PMACC_ASSERT(unitDimension.size() == 7); // seven openPMD base units + + for(uint32_t d = 0; d < components; d++) + { + std::stringstream datasetName; + datasetName << recordPath; + if(components > 1) + datasetName << "/" << componentNames[d]; + + const char* path = nullptr; + int64_t adiosParticleAttrId = defineAdiosVar( + params->adiosGroupHandle, + datasetName.str().c_str(), + path, + adiosType.type, + pmacc::math::UInt64(elements), + pmacc::math::UInt64(globalElements), + pmacc::math::UInt64(globalOffset), + true, + params->adiosCompression); + + params->adiosParticleAttrVarIds.push_back(adiosParticleAttrId); + + /* already add the unitSI and further attribute so `adios_group_size` + * calculates the reservation for the buffer correctly */ + + /* check if this attribute actually has a unit (unit.size() == 0 is no unit) */ + if(unit.size() >= (d + 1)) + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "unitSI", + datasetName.str().c_str(), + adiosDoubleType.type, + 1, + &unit.at(d))); + } + + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "unitDimension", + recordPath.c_str(), + adiosDoubleType.type, + 7, + &(*unitDimension.begin()))); + + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "macroWeighted", + recordPath.c_str(), + adiosUInt32Type.type, + 1, + (void*) ¯oWeighted)); + + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "weightingPower", + recordPath.c_str(), + adiosDoubleType.type, + 1, + (void*) &weightingPower)); + + /** \todo check if always correct at this point, depends on attribute + * and MW-solver/pusher implementation */ + const float_X timeOffset = 0.0; + ADIOS_CMD(adios_define_attribute_byvalue( + params->adiosGroupHandle, + "timeOffset", + recordPath.c_str(), + adiosFloatXType.type, + 1, + (void*) &timeOffset)); + } + }; + + } // namespace adios + +} // namespace picongpu diff --git a/include/picongpu/plugins/common/particlePatches.cpp b/include/picongpu/plugins/common/particlePatches.cpp deleted file mode 100644 index 0248204ecd..0000000000 --- a/include/picongpu/plugins/common/particlePatches.cpp +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright 2016-2020 Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#include "picongpu/plugins/common/particlePatches.hpp" - - -namespace picongpu -{ -namespace openPMD -{ - - ParticlePatches::ParticlePatches( const size_t n ) - { - /* zero particles */ - numParticles = std::vector( n, 0u ); - numParticlesOffset = std::vector( n, 0u ); - - /* zero offsets */ - offsetX = std::vector( n, 0u ); - offsetY = std::vector( n, 0u ); - offsetZ = std::vector( n, 0u ); - - /* zero extents */ - extentX = std::vector( n, 0u ); - extentY = std::vector( n, 0u ); - extentZ = std::vector( n, 0u ); - } - - uint64_t* ParticlePatches::getOffsetComp( const uint32_t comp ) - { - if( comp == 0 ) - return &(*offsetX.begin()); - if( comp == 1 ) - return &(*offsetY.begin()); - if( comp == 2 ) - return &(*offsetZ.begin()); - - return nullptr; - } - - uint64_t* ParticlePatches::getExtentComp( const uint32_t comp ) - { - if( comp == 0 ) - return &(*extentX.begin()); - if( comp == 1 ) - return &(*extentY.begin()); - if( comp == 2 ) - return &(*extentZ.begin()); - - return nullptr; - } - - size_t ParticlePatches::size() const - { - return numParticles.size(); - } - - void ParticlePatches::print() - { - std::cout << "id | numParticles numParticlesOffset " - << "offsetX offsetY offsetZ extentX extentY extentZ" - << std::endl; - for( size_t i = 0; i < this->size(); ++i ) - { - std::cout << i << " | " - << numParticles.at(i) << " " - << numParticlesOffset.at(i) << " " - << offsetX.at(i) << " " - << offsetY.at(i) << " " - << offsetZ.at(i) << " " - << extentX.at(i) << " " - << extentY.at(i) << " " - << extentZ.at(i) << std::endl; - } - } - -} // namespace openPMD -} // namespace picongpu diff --git a/include/picongpu/plugins/common/particlePatches.hpp b/include/picongpu/plugins/common/particlePatches.hpp deleted file mode 100644 index be3fb115fc..0000000000 --- a/include/picongpu/plugins/common/particlePatches.hpp +++ /dev/null @@ -1,100 +0,0 @@ -/* Copyright 2016-2020 Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include -#include -#include -#include - -namespace picongpu -{ -namespace openPMD -{ - - /** Struct for a list of particle patches - * - * Object for all particle patches. - * @see https://github.com/openPMD/openPMD-standard/blob/1.0.0/STANDARD.md#sub-group-for-each-particle-species - */ - class ParticlePatches - { - private: - /** Disallow (empty) default contructor - */ - ParticlePatches (); - - public: - std::vector numParticles; - std::vector numParticlesOffset; - - std::vector offsetX; - std::vector offsetY; - std::vector offsetZ; - - std::vector extentX; - std::vector extentY; - std::vector extentZ; - - /** Fill-Constructor with n empty-sized patches - * - * @param n number of patches to store - */ - ParticlePatches( const size_t n ); - - /** Return the beginning of one of the components of the - * offset as pointer - * - * Be aware that the pointer is pointing to the beginning - * of a C-array of size `size()` and is only allocated as long - * as the `ParticlePatches` object is alive. - * - * @param comp component (0=x, 1=y, 2=z) of offset array - * for the list of patches - * @return uint64_t* pointing to the beginning of a c-array - * with length as given in size() - */ - uint64_t* getOffsetComp( const uint32_t comp ); - - /** Return the beginning of one of the components of the - * extent as pointer - * - * Be aware that the pointer is pointing to the beginning - * of a C-array of size `size()` and is only allocated as long - * as the `ParticlePatches` object is alive. - * - * @param comp component (0=x, 1=y, 2=z) of extent array - * for the list of patches - * @return uint64_t* pointing to the beginning of a c-array - * with length as given in size() - */ - uint64_t* getExtentComp( const uint32_t comp ); - - /** Returns the number of patches - */ - size_t size() const; - - /** Helper function printing to std::cout - */ - void print(); - }; - -} // namespace openPMD -} // namespace picongpu diff --git a/include/picongpu/plugins/common/stringHelpers.cpp b/include/picongpu/plugins/common/stringHelpers.cpp index 0041cc46cb..d4694597e9 100644 --- a/include/picongpu/plugins/common/stringHelpers.cpp +++ b/include/picongpu/plugins/common/stringHelpers.cpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -22,119 +22,87 @@ namespace picongpu { -namespace helper -{ - /** Return the current date as string - * - * \param format, \see http://www.cplusplus.com/reference/ctime/strftime/ - * \return std::string with formatted date - */ - std::string getDateString( std::string format ) - { - time_t rawtime; - struct tm* timeinfo; - const size_t maxLen = 30; - char buffer [maxLen]; - - time( &rawtime ); - timeinfo = localtime( &rawtime ); - - strftime( buffer, maxLen, format.c_str(), timeinfo ); - - std::stringstream dateString; - dateString << buffer; - - return dateString.str(); - } - - GetSplashArrayOfString::Result - GetSplashArrayOfString::operator()( - std::list listOfStrings, - char padding - ) + namespace helper { + /** Return the current date as string + * + * \param format, \see http://www.cplusplus.com/reference/ctime/strftime/ + * \return std::string with formatted date + */ + std::string getDateString(std::string format) + { + time_t rawtime; + struct tm* timeinfo; + const size_t maxLen = 30; + char buffer[maxLen]; + + time(&rawtime); + timeinfo = localtime(&rawtime); + + strftime(buffer, maxLen, format.c_str(), timeinfo); + + std::stringstream dateString; + dateString << buffer; + + return dateString.str(); + } + + GetSplashArrayOfString::Result GetSplashArrayOfString::operator()( + std::list listOfStrings, + char padding) + { Result result; // find length of longest string in list CompStrBySize compStrBySize; - std::string longestString = *std::max_element( - listOfStrings.begin(), - listOfStrings.end(), - compStrBySize - ); + std::string longestString = *std::max_element(listOfStrings.begin(), listOfStrings.end(), compStrBySize); result.maxLen = longestString.size(); // allocate & prepare buffer with padding // size per buffer must include terminator \0 ! const size_t bytesPerEntry = result.maxLen + 1; const size_t lenAllBuffers = listOfStrings.size() * bytesPerEntry; - result.buffers.assign( lenAllBuffers, padding ); + result.buffers.assign(lenAllBuffers, padding); // copy buffers std::list::iterator listIt = listOfStrings.begin(); - for( - size_t i = 0; - i < listOfStrings.size(); - ++i, ++listIt - ) + for(size_t i = 0; i < listOfStrings.size(); ++i, ++listIt) { // index points to each part of the buffer individually const size_t startIdx = i * bytesPerEntry; - std::vector::iterator startIt = - result.buffers.begin() + startIdx; + std::vector::iterator startIt = result.buffers.begin() + startIdx; // copy byte-wise onto padding - std::copy( - listIt->begin(), - listIt->end(), - startIt - ); - if( padding != '\0' ) - result.buffers.at( startIdx + result.maxLen ) = '\0'; + std::copy(listIt->begin(), listIt->end(), startIt); + if(padding != '\0') + result.buffers.at(startIdx + result.maxLen) = '\0'; } // return return result; - } + } - GetADIOSArrayOfString::Result - GetADIOSArrayOfString::operator()( - std::list listOfStrings - ) - { + GetADIOSArrayOfString::Result GetADIOSArrayOfString::operator()(std::list listOfStrings) + { Result result; // sum of all strings + their null terminators StrSize strSize; - const size_t sumLen = std::accumulate( - listOfStrings.begin(), - listOfStrings.end(), - 0u, - strSize - ); + const size_t sumLen = std::accumulate(listOfStrings.begin(), listOfStrings.end(), 0u, strSize); // allocate & prepare buffer, starts - result.buffers.assign( sumLen, '\0' ); - result.starts.assign( listOfStrings.size(), nullptr ); + result.buffers.assign(sumLen, '\0'); + result.starts.assign(listOfStrings.size(), nullptr); // concat all strings, \0 terminated size_t startIdx = 0; std::list::iterator listIt = listOfStrings.begin(); - for( - size_t i = 0; - i < listOfStrings.size(); - ++i, ++listIt - ) + for(size_t i = 0; i < listOfStrings.size(); ++i, ++listIt) { - std::vector::iterator startIt = - result.buffers.begin() + startIdx; + std::vector::iterator startIt = result.buffers.begin() + startIdx; // copy byte-wise onto padding - std::copy( - listIt->begin(), - listIt->end(), - startIt - ); + std::copy(listIt->begin(), listIt->end(), startIt); // start pointer result.starts.at(i) = &(*startIt); @@ -144,6 +112,6 @@ namespace helper // return return result; - } -} // namespace helper + } + } // namespace helper } // namespace picongpu diff --git a/include/picongpu/plugins/common/stringHelpers.hpp b/include/picongpu/plugins/common/stringHelpers.hpp index 8eb681b90b..b70a9e39c2 100644 --- a/include/picongpu/plugins/common/stringHelpers.hpp +++ b/include/picongpu/plugins/common/stringHelpers.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -30,83 +30,80 @@ namespace picongpu { -namespace helper -{ - /** Return the current date as string - * - * \param format, \see http://www.cplusplus.com/reference/ctime/strftime/ - * \return std::string with formatted date - */ - std::string getDateString( std::string format ); - - /** Create array of c-strings suitable for libSplash - * - * Convert a std::list of strings to a format that is suitable to - * be written into libSplash (concated and padded array of constant - * c-strings). Strings will be padded to longest string. - * - * Independent of the padding you chose, the strings will be '\0' - * separated & terminated. \0 padding is default and recommended. - */ - class GetSplashArrayOfString + namespace helper { - private: - // compare two std::string by their size - struct CompStrBySize + /** Return the current date as string + * + * \param format, \see http://www.cplusplus.com/reference/ctime/strftime/ + * \return std::string with formatted date + */ + std::string getDateString(std::string format); + + /** Create array of c-strings suitable for libSplash + * + * Convert a std::list of strings to a format that is suitable to + * be written into libSplash (concated and padded array of constant + * c-strings). Strings will be padded to longest string. + * + * Independent of the padding you chose, the strings will be '\0' + * separated & terminated. \0 padding is default and recommended. + */ + class GetSplashArrayOfString { - bool operator()( std::string i, std::string j ) + private: + // compare two std::string by their size + struct CompStrBySize { - return i.size() < j.size(); - } - }; + bool operator()(std::string i, std::string j) + { + return i.size() < j.size(); + } + }; - public: - // resulting type containing all attributes for a libSplash write call - struct Result - { - size_t maxLen; // size of the longest string - std::vector buffers; // all of same length lenMax + public: + // resulting type containing all attributes for a libSplash write call + struct Result + { + size_t maxLen; // size of the longest string + std::vector buffers; // all of same length lenMax - Result() : maxLen(0) - {} - }; + Result() : maxLen(0) + { + } + }; - Result operator()( - std::list listOfStrings, - char padding = '\0' - ); - }; + Result operator()(std::list listOfStrings, char padding = '\0'); + }; - /** Create array of c-strings suitable for ADIOS - * - * Convert a std::list of strings to a format that is suitable to - * be written into ADIOS (`char *strings[]`). - */ - class GetADIOSArrayOfString - { - private: - // accumulate the size of a string + \0 to an initial value - struct StrSize + /** Create array of c-strings suitable for ADIOS + * + * Convert a std::list of strings to a format that is suitable to + * be written into ADIOS (`char *strings[]`). + */ + class GetADIOSArrayOfString { - size_t operator()( size_t init, std::string s ) + private: + // accumulate the size of a string + \0 to an initial value + struct StrSize { - return init + // previous length - s.size() + // this strings length - 1; // this strings null terminator - } - }; - public: - // resulting type containing all attributes for a ADIOS write call - struct Result - { - std::vector buffers; - std::vector starts; - }; + size_t operator()(size_t init, std::string s) + { + return init + // previous length + s.size() + // this strings length + 1; // this strings null terminator + } + }; - Result operator()( - std::list listOfStrings - ); - }; + public: + // resulting type containing all attributes for a ADIOS write call + struct Result + { + std::vector buffers; + std::vector starts; + }; + + Result operator()(std::list listOfStrings); + }; -} // namespace helper + } // namespace helper } // namespace picongpu diff --git a/include/picongpu/plugins/common/txtFileHandling.hpp b/include/picongpu/plugins/common/txtFileHandling.hpp index d9f745bdfc..1c2e80a2cb 100644 --- a/include/picongpu/plugins/common/txtFileHandling.hpp +++ b/include/picongpu/plugins/common/txtFileHandling.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl, Richard Pausch +/* Copyright 2015-2021 Axel Huebl, Richard Pausch * * This file is part of PIConGPU. * @@ -28,7 +28,7 @@ namespace picongpu { -using namespace boost::filesystem; + using namespace boost::filesystem; /** Restore a txt file from the checkpoint dir * @@ -43,41 +43,41 @@ using namespace boost::filesystem; * * \return operation was successful or not */ - HINLINE bool restoreTxtFile( std::ofstream& outFile, std::string filename, - uint32_t restartStep, const std::string restartDirectory ) + HINLINE bool restoreTxtFile( + std::ofstream& outFile, + std::string filename, + uint32_t restartStep, + const std::string restartDirectory) { /* get restart time step as string */ std::stringstream sStep; sStep << restartStep; /* set location of restart file and output file */ - path src( restartDirectory + std::string("/") + filename + - std::string(".") + sStep.str() ); - path dst( filename ); + path src(restartDirectory + std::string("/") + filename + std::string(".") + sStep.str()); + path dst(filename); /* check whether restart file exists */ - if( !boost::filesystem::exists( src ) ) + if(!boost::filesystem::exists(src)) { /* restart file does not exists */ - log ("Plugin restart file: %1% was not found. \ - --> Starting plugin from current time step.") % src; + log("Plugin restart file: %1% was not found. \ + --> Starting plugin from current time step.") + % src; return true; } else { /* restart file found - fix output file created at restart */ - if( outFile.is_open() ) + if(outFile.is_open()) outFile.close(); - copy_file( src, - dst, - copy_option::overwrite_if_exists ); + copy_file(src, dst, copy_option::overwrite_if_exists); - outFile.open( filename.c_str(), std::ofstream::out | std::ostream::app ); - if( !outFile ) + outFile.open(filename.c_str(), std::ofstream::out | std::ostream::app); + if(!outFile) { - std::cerr << "[Plugin] Can't open file '" << filename - << "', output disabled" << std::endl; + std::cerr << "[Plugin] Can't open file '" << filename << "', output disabled" << std::endl; return false; } return true; @@ -93,21 +93,21 @@ using namespace boost::filesystem; * \param currentStep the current time step * \param checkpointDirectory path to the checkpoint directory */ - HINLINE void checkpointTxtFile( std::ofstream& outFile, std::string filename, - uint32_t currentStep, const std::string checkpointDirectory ) + HINLINE void checkpointTxtFile( + std::ofstream& outFile, + std::string filename, + uint32_t currentStep, + const std::string checkpointDirectory) { outFile.flush(); std::stringstream sStep; sStep << currentStep; - path src( filename ); - path dst( checkpointDirectory + std::string("/") + filename + - std::string(".") + sStep.str() ); + path src(filename); + path dst(checkpointDirectory + std::string("/") + filename + std::string(".") + sStep.str()); - copy_file( src, - dst, - copy_option::overwrite_if_exists ); + copy_file(src, dst, copy_option::overwrite_if_exists); } } /* namespace picongpu */ diff --git a/include/picongpu/plugins/hdf5/HDF5Writer.def b/include/picongpu/plugins/hdf5/HDF5Writer.def deleted file mode 100644 index df5aba9a55..0000000000 --- a/include/picongpu/plugins/hdf5/HDF5Writer.def +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - - - -#pragma once - -#include "picongpu/simulation_types.hpp" -#include -#include "picongpu/simulation/control/MovingWindow.hpp" -#include - - -namespace picongpu -{ - -namespace hdf5 -{ -using namespace pmacc; - -using namespace splash; - - -namespace po = boost::program_options; - -struct ThreadParams -{ - /* set at least the pointers to nullptr by default */ - ThreadParams() : - dataCollector(nullptr), - cellDescription(nullptr) - {} - - /** current simulation step */ - uint32_t currentStep; - - /** current dump is a checkpoint */ - bool isCheckpoint; - - /** libSplash class */ - ParallelDomainCollector *dataCollector; - - /** libSplash file's base name */ - std::string h5Filename; - - /** description of the grid/field layout, including guards etc. */ - GridLayout gridLayout; - - /** cell description */ - MappingDesc *cellDescription; - - /** window describing the volume to be dumped */ - Window window; - - /** offset from local moving window to local domain */ - DataSpace localWindowToDomainOffset; -}; - -/** - * Writes simulation data to hdf5 files. - * Implements the ISimulationPlugin interface. - */ - -class HDF5Writer; - -} //namespace hdf5 -} //namespace picongpu - diff --git a/include/picongpu/plugins/hdf5/HDF5Writer.hpp b/include/picongpu/plugins/hdf5/HDF5Writer.hpp deleted file mode 100644 index 03da760e79..0000000000 --- a/include/picongpu/plugins/hdf5/HDF5Writer.hpp +++ /dev/null @@ -1,776 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, - * Alexander Grund - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - - -#pragma once - -#include -#include -#include -#include -#include - -#include "picongpu/simulation_defines.hpp" - -#include "picongpu/plugins/hdf5/HDF5Writer.def" -#include "picongpu/traits/SplashToPIC.hpp" -#include "picongpu/traits/PICToSplash.hpp" -#include "picongpu/plugins/misc/misc.hpp" -#include "picongpu/plugins/multi/Option.hpp" -#include "picongpu/plugins/misc/SpeciesFilter.hpp" -#include "picongpu/particles/traits/SpeciesEligibleForSolver.hpp" -#include "picongpu/particles/filter/filter.hpp" - -#include - -#include "picongpu/fields/FieldB.hpp" -#include "picongpu/fields/FieldE.hpp" -#include "picongpu/fields/FieldJ.hpp" -#include "picongpu/fields/FieldTmp.hpp" -#include "picongpu/fields/MaxwellSolver/YeePML/Field.hpp" -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include "picongpu/simulation/control/MovingWindow.hpp" -#include - -#include "picongpu/plugins/output/IIOBackend.hpp" -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "picongpu/plugins/hdf5/WriteMeta.hpp" -#include "picongpu/plugins/hdf5/WriteFields.hpp" -#include "picongpu/plugins/hdf5/WriteSpecies.hpp" -#include "picongpu/plugins/hdf5/restart/LoadSpecies.hpp" -#include "picongpu/plugins/hdf5/restart/RestartFieldLoader.hpp" -#include "picongpu/plugins/hdf5/NDScalars.hpp" -#include "picongpu/plugins/misc/SpeciesFilter.hpp" - -#include - - -namespace picongpu -{ - -namespace hdf5 -{ - -using namespace pmacc; - -using namespace splash; - -/** Writes simulation data to hdf5 files using libSplash. - * - * Implements the IIOBackend interface. - */ -class HDF5Writer : - public IIOBackend -{ -public: - - struct Help : public plugins::multi::IHelp - { - /** creates a instance of ISlave - * - * @tparam T_Slave type of the interface implementation (must inherit from ISlave) - * @param help plugin defined help - * @param id index of the plugin, range: [0;help->getNumPlugins()) - */ - std::shared_ptr< ISlave > create( - std::shared_ptr< IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) - { - return std::shared_ptr< ISlave >( - new HDF5Writer( - help, - id, - cellDescription - ) - ); - } - - plugins::multi::Option< std::string > notifyPeriod = { - "period", - "enable HDF5 IO [for each n-th step]" - }; - plugins::multi::Option< std::string > source = { - "source", - "data sources: ", - "species_all, fields_all" - }; - - plugins::multi::Option< std::string > fileName = { - "file", - "HDF5 output filename (prefix)" - }; - - /** defines if the plugin must register itself to the PMacc plugin system - * - * true = the plugin is registering it self - * false = the plugin is not registering itself (plugin is controlled by another class) - */ - bool selfRegister = false; - - std::vector< std::string > allowedDataSources = { - "species_all", - "fields_all" - }; - - template - struct CreateSpeciesFilter - { - using type = plugins::misc::SpeciesFilter< - typename pmacc::math::CT::At< - T_TupleVector, - bmpl::int_<0> - >::type, - typename pmacc::math::CT::At< - T_TupleVector, - bmpl::int_<1> - >::type - >; - }; - - using AllParticlesTimesAllFilters = typename AllCombinations< - bmpl::vector< - FileOutputParticles, - particles::filter::AllParticleFilters - > - >::type; - - using AllSpeciesFilter = typename bmpl::transform< - AllParticlesTimesAllFilters, - CreateSpeciesFilter< bmpl::_1 > - >::type; - - using AllEligibleSpeciesSources = typename bmpl::copy_if< - AllSpeciesFilter, - plugins::misc::speciesFilter::IsEligible< bmpl::_1 > - >::type; - - using AllFieldSources = FileOutputFields; - - ///! method used by plugin controller to get --help description - void registerHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) - { - meta::ForEach< - AllEligibleSpeciesSources, - plugins::misc::AppendName< bmpl::_1 > - > getEligibleDataSourceNames; - getEligibleDataSourceNames( allowedDataSources ); - - meta::ForEach< - AllFieldSources, - plugins::misc::AppendName< bmpl::_1 > - > appendFieldSourceNames; - appendFieldSourceNames( allowedDataSources ); - - // string list with all possible data sources - std::string concatenatedSourceNames = plugins::misc::concatenateToString( - allowedDataSources, - ", " - ); - - notifyPeriod.registerHelp( - desc, - masterPrefix + prefix - ); - source.registerHelp( - desc, - masterPrefix + prefix, - std::string( "[" ) + concatenatedSourceNames + "]" - ); - fileName.registerHelp( - desc, - masterPrefix + prefix - ); - selfRegister = true; - - } - - void expandHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) - { - } - - void validateOptions() - { - if( selfRegister ) - { - if( notifyPeriod.empty() || fileName.empty() ) - throw std::runtime_error( - name + - ": parameter period and file must be defined" - ); - - // check if user passed data source names are valid - for( auto const & dateSourceNames : source ) - { - auto vectorOfDataSourceNames = plugins::misc::splitString( - plugins::misc::removeSpaces( dateSourceNames ) - ); - - for( auto const & f : vectorOfDataSourceNames ) - { - if( - !plugins::misc::containsObject( - allowedDataSources, - f - ) - ) - { - throw std::runtime_error( name + ": unknown data source '" + f + "'" ); - } - } - } - } - } - - size_t getNumPlugins() const - { - if( selfRegister ) - return notifyPeriod.size(); - else - return 1; - } - - std::string getDescription() const - { - return description; - } - - std::string getOptionPrefix() const - { - return prefix; - } - - std::string getName() const - { - return name; - } - - std::string const name = "HDF5Writer"; - //! short description of the plugin - std::string const description = "dump simulation data with hdf5"; - //! prefix used for command line arguments - std::string const prefix = "hdf5"; - }; - - //! must be implemented by the user - static std::shared_ptr< plugins::multi::IHelp > getHelp() - { - return std::shared_ptr< plugins::multi::IHelp >( new Help{ } ); - } - - /** constructor - * - * @param help instance of the class Help - * @param id index of this plugin instance within help - * @param cellDescription PIConGPu cell description information for kernel index mapping - */ - HDF5Writer( - std::shared_ptr< plugins::multi::IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) : - m_help( std::static_pointer_cast< Help >(help) ), - m_id( id ), - m_cellDescription( cellDescription ), - outputDirectory("h5") - { - mThreadParams.cellDescription = m_cellDescription; - - GridController &gc = Environment::get().GridController(); - - /* It is important that we never change the mpi_pos after this point - * because we get problems with the restart. - * Otherwise we do not know which gpu must load the ghost parts around - * the sliding window. - */ - mpi_pos = gc.getPosition(); - mpi_size = gc.getGpuNodes(); - - splashMpiPos.set(0, 0, 0); - splashMpiSize.set(1, 1, 1); - - for (uint32_t i = 0; i < simDim; ++i) - { - splashMpiPos[i] = mpi_pos[i]; - splashMpiSize[i] = mpi_size[i]; - } - - if( m_help->selfRegister ) - { - std::string notifyPeriod = m_help->notifyPeriod.get( id ); - /* only register for notify callback when .period is set on command line */ - if(!notifyPeriod.empty()) - { - Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); - - /** create notify directory */ - Environment::get().Filesystem().createDirectoryWithPermissions(outputDirectory); - } - } - } - - virtual ~HDF5Writer() - { - if (mThreadParams.dataCollector) - mThreadParams.dataCollector->finalize(); - - __delete(mThreadParams.dataCollector); - } - - void notify(uint32_t currentStep) - { - // notify is only allowed if the plugin is not controlled by the class Checkpoint - assert( m_help->selfRegister ); - - __getTransactionEvent().waitForFinished(); - - std::string filename = m_help->fileName.get( m_id ); - /* if file name is relative, prepend with common directory */ - if( boost::filesystem::path(filename).has_root_path() ) - mThreadParams.h5Filename = filename; - else - mThreadParams.h5Filename = outputDirectory + "/" + filename; - - /* window selection */ - mThreadParams.window = MovingWindow::getInstance().getWindow(currentStep); - mThreadParams.isCheckpoint = false; - dumpData(currentStep); - } - - virtual void restart( - uint32_t restartStep, - std::string const & restartDirectory - ) - { - /* ISlave restart interface is not needed becase IIOBackend - * restart interface is used - */ - } - - virtual void checkpoint( - uint32_t currentStep, - std::string const & checkpointDirectory - ) - { - /* ISlave checkpoint interface is not needed becase IIOBackend - * checkpoint interface is used - */ - } - - void doRestart( - const uint32_t restartStep, - const std::string& restartDirectory, - const std::string& constRestartFilename, - const uint32_t restartChunkSize - ) - { - // restart is only allowed if the plugin is controlled by the class Checkpoint - assert(!m_help->selfRegister); - - // allow to modify the restart file name - std::string restartFilename{ constRestartFilename }; - - const uint32_t maxOpenFilesPerNode = 4; - GridController &gc = Environment::get().GridController(); - mThreadParams.dataCollector = new ParallelDomainCollector( - gc.getCommunicator().getMPIComm(), - gc.getCommunicator().getMPIInfo(), - splashMpiSize, - maxOpenFilesPerNode); - - mThreadParams.currentStep = restartStep; - - /* set attributes for datacollector files */ - DataCollector::FileCreationAttr attr; - attr.fileAccType = DataCollector::FAT_READ; - attr.mpiPosition.set(splashMpiPos); - attr.mpiSize.set(splashMpiSize); - - /* if restartFilename is relative, prepend with restartDirectory */ - if (!boost::filesystem::path(restartFilename).has_root_path()) - { - restartFilename = restartDirectory + std::string("/") + restartFilename; - } - - /* open datacollector */ - try - { - log ("HDF5 open DataCollector with file: %1%") % restartFilename; - mThreadParams.dataCollector->open(restartFilename.c_str(), attr); - } - catch (const DCException& e) - { - std::cerr << e.what() << std::endl; - throw std::runtime_error("HDF5 failed to open DataCollector"); - } - - /* load number of slides to initialize MovingWindow */ - uint32_t slides = 0; - mThreadParams.dataCollector->readAttributeInfo(restartStep, nullptr, "sim_slides").read(&slides, sizeof(slides)); - - /* apply slides to set gpus to last/written configuration */ - log ("HDF5 setting slide count for moving window to %1%") % slides; - MovingWindow::getInstance().setSlideCounter(slides, restartStep); - - /* re-distribute the local offsets in y-direction - * this will work for restarts with moving window still enabled - * and restarts that disable the moving window - * \warning enabling the moving window from a checkpoint that - * had no moving window will not work - */ - gc.setStateAfterSlides(slides); - - /* set window for restart, complete global domain */ - mThreadParams.window = MovingWindow::getInstance().getDomainAsWindow(restartStep); - for (uint32_t i = 0; i < simDim; ++i) - { - mThreadParams.localWindowToDomainOffset[i] = 0; - } - - ThreadParams *params = &mThreadParams; - - /* load all fields */ - meta::ForEach > forEachLoadFields; - forEachLoadFields(params); - - /* load all particles */ - meta::ForEach > forEachLoadSpecies; - forEachLoadSpecies(params, restartChunkSize); - - IdProvider::State idProvState; - ReadNDScalars()(mThreadParams, - "picongpu/idProvider/startId", &idProvState.startId, - "maxNumProc", &idProvState.maxNumProc); - ReadNDScalars()(mThreadParams, - "picongpu/idProvider/nextId", &idProvState.nextId); - log ("Setting next free id on current rank: %1%") % idProvState.nextId; - IdProvider::setState(idProvState); - - /* close datacollector */ - log ("HDF5 close DataCollector with file: %1%") % restartFilename; - mThreadParams.dataCollector->close(); - - if (mThreadParams.dataCollector) - mThreadParams.dataCollector->finalize(); - - __delete(mThreadParams.dataCollector); - } - - void dumpCheckpoint( - const uint32_t currentStep, - const std::string& checkpointDirectory, - const std::string& checkpointFilename - ) - { - // checkpointing is only allowed if the plugin is controlled by the class Checkpoint - assert(!m_help->selfRegister); - - __getTransactionEvent().waitForFinished(); - /* if file name is relative, prepend with common directory */ - if( boost::filesystem::path(checkpointFilename).has_root_path() ) - mThreadParams.h5Filename = checkpointFilename; - else - mThreadParams.h5Filename = checkpointDirectory + "/" + checkpointFilename; - - mThreadParams.window = MovingWindow::getInstance().getDomainAsWindow(currentStep); - mThreadParams.isCheckpoint = true; - - dumpData(currentStep); - } - -private: - - void closeH5File() - { - if (mThreadParams.dataCollector != nullptr) - { - log ("HDF5 close DataCollector"); - mThreadParams.dataCollector->close(); - } - } - - void openH5File(const std::string h5Filename) - { - const uint32_t maxOpenFilesPerNode = 4; - if (mThreadParams.dataCollector == nullptr) - { - GridController &gc = Environment::get().GridController(); - mThreadParams.dataCollector = new ParallelDomainCollector( - gc.getCommunicator().getMPIComm(), - gc.getCommunicator().getMPIInfo(), - splashMpiSize, - maxOpenFilesPerNode); - } - // set attributes for datacollector files - DataCollector::FileCreationAttr attr; - attr.enableCompression = false; - attr.fileAccType = DataCollector::FAT_CREATE; - attr.mpiPosition.set(splashMpiPos); - attr.mpiSize.set(splashMpiSize); - - // open datacollector - try - { - log ("HDF5 open DataCollector with file: %1%") % h5Filename; - mThreadParams.dataCollector->open(h5Filename.c_str(), attr); - } - catch (const DCException& e) - { - std::cerr << e.what() << std::endl; - throw std::runtime_error("HDF5 failed to open DataCollector"); - } - } - - /** dump data - * - * @param currentStep current simulation step - * @param isCheckpoint checkpoint notification - */ - void dumpData(uint32_t currentStep) - { - const pmacc::Selection& localDomain = Environment::get().SubGrid().getLocalDomain(); - mThreadParams.cellDescription = m_cellDescription; - mThreadParams.currentStep = currentStep; - - for (uint32_t i = 0; i < simDim; ++i) - { - mThreadParams.localWindowToDomainOffset[i] = 0; - if (mThreadParams.window.globalDimensions.offset[i] > localDomain.offset[i]) - { - mThreadParams.localWindowToDomainOffset[i] = - mThreadParams.window.globalDimensions.offset[i] - - localDomain.offset[i]; - } - } - - openH5File(mThreadParams.h5Filename); - - writeHDF5((void*) &mThreadParams); - - closeH5File(); - } - - template< typename T_ParticleFilter> - struct CallWriteSpecies - { - - template - void operator()( - const std::vector< std::string > & vectorOfDataSourceNames, - ThreadParams* params, - const Space domainOffset - ) - { - bool const containsDataSource = plugins::misc::containsObject( - vectorOfDataSourceNames, - T_ParticleFilter::getName() - ); - - if( containsDataSource ) - { - WriteSpecies< - T_ParticleFilter - > writeSpecies; - writeSpecies(params, domainOffset); - } - - } - }; - - template< typename T_Field > - struct CallWriteFields - { - - void operator()( - const std::vector< std::string > & vectorOfDataSourceNames, - ThreadParams* params - ) - { - bool const containsDataSource = plugins::misc::containsObject( - vectorOfDataSourceNames, - T_Field::getName() - ); - - if( containsDataSource ) - { - WriteFields< - T_Field - > writeFields; - writeFields(params); - } - - } - }; - - void writeHDF5(void *p_args) - { - ThreadParams *threadParams = (ThreadParams*) (p_args); - - const SubGrid& subGrid = Environment::get().SubGrid(); - DataSpace domainOffset( - subGrid.getGlobalDomain().offset + - subGrid.getLocalDomain().offset - ); - - std::vector< std::string > vectorOfDataSourceNames; - if( m_help->selfRegister ) - { - std::string dateSourceNames = m_help->source.get( m_id ); - - vectorOfDataSourceNames = plugins::misc::splitString( - plugins::misc::removeSpaces( dateSourceNames ) - ); - } - - /* write all fields */ - log ("HDF5: (begin) writing fields."); - if (threadParams->isCheckpoint) - { - meta::ForEach > forEachWriteFields; - forEachWriteFields(threadParams); - } - else - { - bool dumpFields = plugins::misc::containsObject( - vectorOfDataSourceNames, - "fields_all" - ); - if( dumpFields ) - { - meta::ForEach< - FileOutputFields, - WriteFields< bmpl::_1 > - > forEachWriteFields; - forEachWriteFields(threadParams); - } - - meta::ForEach< - typename Help::AllFieldSources, - CallWriteFields< - bmpl::_1 - > - >{}( - vectorOfDataSourceNames, - threadParams - ); - } - log ("HDF5: ( end ) writing fields."); - - /* write all particle species */ - log ("HDF5: (begin) writing particle species."); - if (threadParams->isCheckpoint) - { - meta::ForEach< - FileCheckpointParticles, - WriteSpecies< - plugins::misc::UnfilteredSpecies< bmpl::_1 > - > - > writeSpecies; - writeSpecies(threadParams, domainOffset); - } - else - { - bool dumpAllParticles = plugins::misc::containsObject( - vectorOfDataSourceNames, - "species_all" - ); - - if( dumpAllParticles ) - { - meta::ForEach< - FileOutputParticles, - WriteSpecies< - plugins::misc::UnfilteredSpecies< bmpl::_1 > - > - > writeSpecies; - writeSpecies(threadParams, domainOffset); - } - - meta::ForEach< - typename Help::AllEligibleSpeciesSources, - CallWriteSpecies< - bmpl::_1 - > - >{}( - vectorOfDataSourceNames, - threadParams, - domainOffset - ); - - } - log ("HDF5: ( end ) writing particle species."); - - auto idProviderState = IdProvider::getState(); - log("HDF5: Writing IdProvider state (StartId: %1%, NextId: %2%, maxNumProc: %3%)") - % idProviderState.startId % idProviderState.nextId % idProviderState.maxNumProc; - WriteNDScalars()(*threadParams, - "picongpu/idProvider/startId", idProviderState.startId, - "maxNumProc", idProviderState.maxNumProc); - WriteNDScalars()(*threadParams, - "picongpu/idProvider/nextId", idProviderState.nextId); - - // write global meta attributes - WriteMeta writeMetaAttributes; - writeMetaAttributes(threadParams); - } - - ThreadParams mThreadParams; - - std::shared_ptr< Help > m_help; - size_t m_id; - - MappingDesc *m_cellDescription; - - std::string outputDirectory; - - DataSpace mpi_pos; - DataSpace mpi_size; - - Dimensions splashMpiPos; - Dimensions splashMpiSize; -}; - -} //namespace hdf5 -} //namespace picongpu diff --git a/include/picongpu/plugins/hdf5/NDScalars.hpp b/include/picongpu/plugins/hdf5/NDScalars.hpp deleted file mode 100644 index 163855999a..0000000000 --- a/include/picongpu/plugins/hdf5/NDScalars.hpp +++ /dev/null @@ -1,136 +0,0 @@ -/* Copyright 2016-2020 Alexander Grund - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include -#include "picongpu/plugins/hdf5/HDF5Writer.def" -#include "picongpu/traits/PICToSplash.hpp" -#include - -namespace picongpu { -namespace hdf5 { - -/** Functor for writing ND scalar fields with N=simDim - * In the current implementation each process (of the ND grid of processes) writes 1 scalar value - * Optionally the processes can also write an attribute for this dataset by using a non-empty attrName - * - * @tparam T_Scalar Type of the scalar value to write - * @tparam T_Attribute Type of the attribute (can be omitted if attribute is not written, defaults to uint64_t) - */ -template -struct WriteNDScalars -{ - void operator()(ThreadParams& params, - const std::string& name, T_Scalar value, - const std::string& attrName = "", T_Attribute attribute = T_Attribute()) - { - log("HDF5: write %1%D scalars: %2%") % simDim % name; - - // Size over all processes - Dimensions globalSize(1, 1, 1); - // Offset for this process - Dimensions localOffset(0, 0, 0); - // Offset for all processes - Dimensions globalOffset(0, 0, 0); - - for (uint32_t d = 0; d < simDim; ++d) - { - globalSize[d] = Environment::get().GridController().getGpuNodes()[d]; - localOffset[d] = Environment::get().GridController().getPosition()[d]; - } - - Dimensions localSize(1, 1, 1); - - // avoid deadlock between not finished pmacc tasks and mpi calls in adios - __getTransactionEvent().waitForFinished(); - - typename traits::PICToSplash::type splashType; - params.dataCollector->writeDomain(params.currentStep, /* id == time step */ - globalSize, /* total size of dataset over all processes */ - localOffset, /* write offset for this process */ - splashType, /* data type */ - simDim, /* NDims spatial dimensionality of the field */ - splash::Selection(localSize), /* data size of this process */ - name.c_str(), /* data set name */ - splash::Domain( - globalOffset, /* offset of the global domain */ - globalSize /* size of the global domain */ - ), - DomainCollector::GridType, - &value); - - if(!attrName.empty()) - { - /*simulation attribute for data*/ - typename traits::PICToSplash::type attType; - - log("HDF5: write attribute %1% for scalars: %2%") % attrName % name; - params.dataCollector->writeAttribute(params.currentStep, - attType, name.c_str(), - attrName.c_str(), &attribute); - } - } -}; - -/** Functor for reading ND scalar fields with N=simDim - * In the current implementation each process (of the ND grid of processes) reads 1 scalar value - * Optionally the processes can also read an attribute for this dataset by using a non-empty attrName - * - * @tparam T_Scalar Type of the scalar value to read - * @tparam T_Attribute Type of the attribute (can be omitted if attribute is not read, defaults to uint64_t) - */ -template -struct ReadNDScalars -{ - void operator()(ThreadParams& params, - const std::string& name, T_Scalar* value, - const std::string& attrName = "", T_Attribute* attribute = nullptr) - { - log("HDF5: read %1%D scalars: %2%") % simDim % name; - - Dimensions domain_offset(0, 0, 0); - for (uint32_t d = 0; d < simDim; ++d) - domain_offset[d] = Environment::get().GridController().getPosition()[d]; - - // avoid deadlock between not finished pmacc tasks and mpi calls in adios - __getTransactionEvent().waitForFinished(); - - DomainCollector::DomDataClass data_class; - DataContainer *dataContainer = - params.dataCollector->readDomain(params.currentStep, - name.c_str(), - Domain(domain_offset, Dimensions(1, 1, 1)), - &data_class); - - typename traits::PICToSplash::type splashType; - *value = *static_cast(dataContainer->getIndex(0)->getData()); - __delete(dataContainer); - - if(!attrName.empty()) - { - log("HDF5: read attribute %1% for scalars: %2%") % attrName % name; - params.dataCollector->readAttributeInfo(params.currentStep, name.c_str(), attrName.c_str()).read(attribute, sizeof(T_Attribute)); - log("HDF5: attribute %1% = %2%") % attrName % *attribute; - } - } -}; - -} // namespace hdf5 -} // namespace picongpu diff --git a/include/picongpu/plugins/hdf5/WriteFields.hpp b/include/picongpu/plugins/hdf5/WriteFields.hpp deleted file mode 100644 index 8763211369..0000000000 --- a/include/picongpu/plugins/hdf5/WriteFields.hpp +++ /dev/null @@ -1,234 +0,0 @@ -/* Copyright 2014-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, - * Benjamin Worpitz, Sergei Bastrakov - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include -#include "picongpu/simulation_defines.hpp" -#include "picongpu/fields/CellType.hpp" -#include "picongpu/plugins/hdf5/HDF5Writer.def" -#include "picongpu/plugins/hdf5/writer/Field.hpp" -#include "picongpu/traits/IsFieldDomainBound.hpp" - -#include - -#include - - -namespace picongpu -{ - -namespace hdf5 -{ - -using namespace pmacc; -using namespace splash; - -/** - * Helper class to create a unit vector of type float_64 - */ -class CreateUnit -{ -public: - template - static std::vector createUnit(UnitType unit, uint32_t numComponents) - { - std::vector tmp(numComponents); - for (uint32_t i = 0; i < numComponents; ++i) - tmp[i] = unit[i]; - return tmp; - } -}; - - -/** - * Write calculated fields to HDF5 file. - * - * @tparam T_Field field class - */ -template< typename T_Field > -class WriteFields -{ -private: - - using ValueType = typename T_Field::ValueType; - - static std::vector getUnit() - { - using UnitType = typename T_Field::UnitValueType ; - UnitType unit = T_Field::getUnit(); - return CreateUnit::createUnit(unit, T_Field::numComponents); - } - -public: - - HDINLINE void operator()(ThreadParams* params) - { -#ifndef __CUDA_ARCH__ - DataConnector &dc = Environment<>::get().DataConnector(); - - auto field = dc.get< T_Field >( T_Field::getName() ); - params->gridLayout = field->getGridLayout(); - - // convert in a std::vector of std::vector format for writeField API - const traits::FieldPosition fieldPos; - - std::vector > inCellPosition; - for( uint32_t n = 0; n < T_Field::numComponents; ++n ) - { - std::vector inCellPositonComponent; - for( uint32_t d = 0; d < simDim; ++d ) - inCellPositonComponent.push_back( fieldPos()[n][d] ); - inCellPosition.push_back( inCellPositonComponent ); - } - - /** \todo check if always correct at this point, depends on solver - * implementation */ - const float_X timeOffset = 0.0; - - const bool isDomainBound = traits::IsFieldDomainBound< T_Field >::value; - Field::writeField( - params, - T_Field::getName(), - getUnit(), - T_Field::getUnitDimension(), - inCellPosition, - timeOffset, - field->getHostDataBox(), - ValueType(), - isDomainBound - ); - - dc.releaseData( T_Field::getName() ); -#endif - } - -}; - -/** Calculate FieldTmp with given solver and particle species - * and write them to hdf5. - * - * FieldTmp is calculated on device and than dumped to HDF5. - * - * @tparam Solver solver class for species - * @tparam Species species/particles class - */ -template< typename Solver, typename Species > -class WriteFields > -{ -public: - /* - * This is only a wrapper function to allow disable nvcc warnings. - * Warning: calling a __host__ function from __host__ __device__ - * function. - * Use of PMACC_NO_NVCC_HDWARNING is not possible if we call a virtual - * method inside of the method were we disable the warnings. - * Therefore we create this method and call a new method were we can - * call virtual functions. - */ - PMACC_NO_NVCC_HDWARNING - HDINLINE void operator()(ThreadParams* tparam) - { - this->operator_impl(tparam); - } - -private: - typedef typename FieldTmp::ValueType ValueType; - - /** Create a name for the hdf5 identifier. - */ - static std::string getName() - { - return FieldTmpOperation::getName(); - } - - /** Get the unit for the result from the solver*/ - static std::vector getUnit() - { - typedef typename FieldTmp::UnitValueType UnitType; - UnitType unit = FieldTmp::getUnit(); - const uint32_t components = GetNComponents::value; - return CreateUnit::createUnit(unit, components); - } - - HINLINE void operator_impl(ThreadParams* params) - { - DataConnector &dc = Environment<>::get().DataConnector(); - - /*## update field ##*/ - - /*load FieldTmp without copy data to host*/ - PMACC_CASSERT_MSG( - _please_allocate_at_least_one_FieldTmp_in_memory_param, - fieldTmpNumSlots > 0 - ); - auto fieldTmp = dc.get< FieldTmp >( FieldTmp::getUniqueId( 0 ), true ); - /*load particle without copy particle data to host*/ - auto speciesTmp = dc.get< Species >( Species::FrameType::getName(), true ); - - fieldTmp->getGridBuffer().getDeviceBuffer().setValue(ValueType::create(0.0)); - /*run algorithm*/ - fieldTmp->template computeValue< CORE + BORDER, Solver >(*speciesTmp, params->currentStep); - - EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); - __setTransactionEvent(fieldTmpEvent); - /* copy data to host that we can write same to disk*/ - fieldTmp->getGridBuffer().deviceToHost(); - dc.releaseData( Species::FrameType::getName() ); - /*## finish update field ##*/ - - /*wrap in a one-component vector for writeField API*/ - const traits::FieldPosition - fieldPos; - - std::vector > inCellPosition; - std::vector inCellPositonComponent; - for( uint32_t d = 0; d < simDim; ++d ) - inCellPositonComponent.push_back( fieldPos()[0][d] ); - inCellPosition.push_back( inCellPositonComponent ); - - /** \todo check if always correct at this point, depends on solver - * implementation */ - const float_X timeOffset = 0.0; - - params->gridLayout = fieldTmp->getGridLayout(); - const bool isDomainBound = traits::IsFieldDomainBound< FieldTmp >::value; - /*write data to HDF5 file*/ - Field::writeField( - params, - getName(), - getUnit(), - FieldTmp::getUnitDimension(), - inCellPosition, - timeOffset, - fieldTmp->getHostDataBox(), - ValueType(), - isDomainBound - ); - - dc.releaseData( FieldTmp::getUniqueId( 0 ) ); - - } - -}; - -} //namspace hdf5 - -} //namespace picongpu diff --git a/include/picongpu/plugins/hdf5/WriteMeta.hpp b/include/picongpu/plugins/hdf5/WriteMeta.hpp deleted file mode 100644 index 3168e950d6..0000000000 --- a/include/picongpu/plugins/hdf5/WriteMeta.hpp +++ /dev/null @@ -1,319 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - - - -#pragma once - -#include "picongpu/simulation_defines.hpp" - -#include "picongpu/plugins/hdf5/HDF5Writer.def" -#include "picongpu/plugins/common/stringHelpers.hpp" -#include - -#include "picongpu/fields/absorber/Absorber.hpp" -#include "picongpu/fields/currentInterpolation/CurrentInterpolation.hpp" - -#include "picongpu/traits/SIBaseUnits.hpp" -#include "picongpu/traits/SplashToPIC.hpp" -#include "picongpu/traits/PICToSplash.hpp" - -#include -#include -#include - -namespace picongpu -{ - -namespace hdf5 -{ -using namespace pmacc; - -namespace writeMeta -{ - /** write openPMD species meta data - * - * @tparam numSpecies count of defined species - */ - template< uint32_t numSpecies = bmpl::size::type::value > - struct OfAllSpecies - { - /** write meta data for species - * - * @param dc hdf5 data connector - * @param meshesPath path to mesh entry - * @param currentStep current simulation time step - */ - void operator()( - ParallelDomainCollector* dc, - const std::string& meshesPath, - const uint32_t currentStep - ) const - { - // assume all boundaries are like the first species for openPMD 1.0.0 - GetStringProperties::type> particleBoundaryProp; - std::list listParticleBoundary; - std::list listParticleBoundaryParam; - for( uint32_t i = NumberOfExchanges::value - 1; i > 0; --i ) - { - if( FRONT % i == 0 ) - { - listParticleBoundary.push_back( - particleBoundaryProp[ExchangeTypeNames()[i]]["name"].value - ); - listParticleBoundaryParam.push_back( - particleBoundaryProp[ExchangeTypeNames()[i]]["param"].value - ); - } - } - helper::GetSplashArrayOfString getSplashArrayOfString; - auto arrParticleBoundary = getSplashArrayOfString( listParticleBoundary ); - ColTypeString ctParticleBoundary( arrParticleBoundary.maxLen ); - auto arrParticleBoundaryParam = getSplashArrayOfString( listParticleBoundaryParam ); - ColTypeString ctParticleBoundaryParam( arrParticleBoundaryParam.maxLen ); - - dc->writeAttribute( currentStep, ctParticleBoundary, meshesPath.c_str(), - "particleBoundary", - 1u, Dimensions( listParticleBoundary.size(), 0, 0 ), - &( arrParticleBoundary.buffers.at( 0 ) ) - ); - dc->writeAttribute( currentStep, ctParticleBoundaryParam, meshesPath.c_str(), - "particleBoundaryParameters", - 1u, Dimensions( listParticleBoundaryParam.size(), 0, 0 ), - &( arrParticleBoundaryParam.buffers.at( 0 ) ) - ); - } - }; - - /** specialization if no species are defined */ - template< > - struct OfAllSpecies< 0 > - { - /** write meta data for species - * - * @param dc hdf5 data connector - * @param meshesPath path to mesh entry - * @param currentStep current simulation time step - */ - void operator()( - ParallelDomainCollector* /* dc */, - const std::string& /* meshesPath */, - const uint32_t /* currentStep */ - ) const - { - } - }; - -} // namespace writeMeta - - struct WriteMeta - { - typedef PICToSplash::type SplashFloatXType; - - void operator()(ThreadParams *threadParams) - { - ColTypeUInt32 ctUInt32; - ColTypeUInt64 ctUInt64; - ColTypeDouble ctDouble; - SplashFloatXType splashFloatXType; - - ParallelDomainCollector *dc = threadParams->dataCollector; - uint32_t currentStep = threadParams->currentStep; - - /* openPMD attributes */ - /* required */ - const std::string openPMDversion( "1.0.0" ); - ColTypeString ctOpenPMDversion( openPMDversion.length() ); - dc->writeGlobalAttribute( threadParams->currentStep, - ctOpenPMDversion, "openPMD", - openPMDversion.c_str() ); - - const uint32_t openPMDextension = 1; // ED-PIC ID - dc->writeGlobalAttribute( threadParams->currentStep, - ctUInt32, "openPMDextension", - &openPMDextension ); - - const std::string basePath( "/data/%T/" ); - ColTypeString ctBasePath( basePath.length() ); - dc->writeGlobalAttribute( threadParams->currentStep, - ctBasePath, "basePath", - basePath.c_str() ); - - const std::string meshesPath( "fields/" ); - ColTypeString ctMeshesPath( meshesPath.length() ); - dc->writeGlobalAttribute( threadParams->currentStep, - ctMeshesPath, "meshesPath", - meshesPath.c_str() ); - - const std::string particlesPath( "particles/" ); - ColTypeString ctParticlesPath( particlesPath.length() ); - dc->writeGlobalAttribute( threadParams->currentStep, - ctParticlesPath, "particlesPath", - particlesPath.c_str() ); - - const std::string iterationEncoding( "fileBased" ); - ColTypeString ctIterationEncoding( iterationEncoding.length() ); - dc->writeGlobalAttribute( threadParams->currentStep, - ctIterationEncoding, "iterationEncoding", - iterationEncoding.c_str() ); - - const std::string iterationFormat( - Environment< simDim >::get().Filesystem().basename( threadParams->h5Filename ) + - std::string("_%T.h5") - ); - ColTypeString ctIterationFormat( iterationFormat.length() ); - dc->writeGlobalAttribute( threadParams->currentStep, - ctIterationFormat, "iterationFormat", - iterationFormat.c_str() ); - - /* recommended */ - const std::string author = Environment<>::get().SimulationDescription().getAuthor(); - if( author.length() > 0 ) - { - ColTypeString ctAuthor( author.length() ); - dc->writeGlobalAttribute( threadParams->currentStep, - ctAuthor, "author", - author.c_str() ); - } - const std::string software( "PIConGPU" ); - ColTypeString ctSoftware( software.length() ); - dc->writeGlobalAttribute( threadParams->currentStep, - ctSoftware, "software", - software.c_str() ); - - std::stringstream softwareVersion; - softwareVersion << PICONGPU_VERSION_MAJOR << "." - << PICONGPU_VERSION_MINOR << "." - << PICONGPU_VERSION_PATCH; - if( ! std::string(PICONGPU_VERSION_LABEL).empty() ) - softwareVersion << "-" << PICONGPU_VERSION_LABEL; - ColTypeString ctSoftwareVersion( softwareVersion.str().length() ); - dc->writeGlobalAttribute( threadParams->currentStep, - ctSoftwareVersion, "softwareVersion", - softwareVersion.str().c_str() ); - - const std::string date = helper::getDateString( "%F %T %z" ); - ColTypeString ctDate( date.length() ); - dc->writeGlobalAttribute( threadParams->currentStep, - ctDate, "date", - date.c_str() ); - /* ED-PIC */ - GetStringProperties fieldSolverProps; - const std::string fieldSolver( fieldSolverProps["name"].value ); - ColTypeString ctFieldSolver( fieldSolver.length() ); - dc->writeAttribute(currentStep, ctFieldSolver, meshesPath.c_str(), - "fieldSolver", fieldSolver.c_str()); - - /* order as in axisLabels: - * 3D: z-lower, z-upper, y-lower, y-upper, x-lower, x-upper - * 2D: y-lower, y-upper, x-lower, x-upper - */ - GetStringProperties fieldBoundaryProp; - std::list listFieldBoundary; - std::list listFieldBoundaryParam; - for( uint32_t i = NumberOfExchanges::value - 1; i > 0; --i ) - { - if( FRONT % i == 0 ) - { - listFieldBoundary.push_back( - fieldBoundaryProp[ExchangeTypeNames()[i]]["name"].value - ); - listFieldBoundaryParam.push_back( - fieldBoundaryProp[ExchangeTypeNames()[i]]["param"].value - ); - } - } - helper::GetSplashArrayOfString getSplashArrayOfString; - auto arrFieldBoundary = getSplashArrayOfString( listFieldBoundary ); - ColTypeString ctFieldBoundaries( arrFieldBoundary.maxLen ); - auto arrFieldBoundaryParam = getSplashArrayOfString( listFieldBoundaryParam ); - ColTypeString ctFieldBoundariesParam( arrFieldBoundaryParam.maxLen ); - - dc->writeAttribute( currentStep, ctFieldBoundaries, meshesPath.c_str(), - "fieldBoundary", - 1u, Dimensions( listFieldBoundary.size(), 0, 0 ), - &( arrFieldBoundary.buffers.at( 0 ) ) - ); - dc->writeAttribute( currentStep, ctFieldBoundariesParam, meshesPath.c_str(), - "fieldBoundaryParameters", - 1u, Dimensions( listFieldBoundaryParam.size(), 0, 0 ), - &( arrFieldBoundaryParam.buffers.at( 0 ) ) - ); - - writeMeta::OfAllSpecies<>()( dc, meshesPath, currentStep ); - - GetStringProperties currentSmoothingProp; - const std::string currentSmoothing( currentSmoothingProp["name"].value ); - ColTypeString ctCurrentSmoothing( currentSmoothing.length() ); - dc->writeAttribute( currentStep, ctCurrentSmoothing, meshesPath.c_str(), - "currentSmoothing", currentSmoothing.c_str() ); - - if( currentSmoothingProp.find( "param" ) != currentSmoothingProp.end() ) - { - const std::string currentSmoothingParam( currentSmoothingProp["param"].value ); - ColTypeString ctCurrentSmoothingParam( currentSmoothingParam.length() ); - dc->writeAttribute( currentStep, ctCurrentSmoothingParam, meshesPath.c_str(), - "currentSmoothingParameters", currentSmoothingParam.c_str() ); - } - - const std::string chargeCorrection( "none" ); - ColTypeString ctChargeCorrection( chargeCorrection.length() ); - dc->writeAttribute( currentStep, ctChargeCorrection, meshesPath.c_str(), - "chargeCorrection", chargeCorrection.c_str() ); - - /* write number of slides */ - const uint32_t slides = MovingWindow::getInstance().getSlideCounter( - threadParams->currentStep - ); - - dc->writeAttribute( threadParams->currentStep, - ctUInt32, nullptr, "sim_slides", &slides ); - - - /* openPMD: required time attributes */ - dc->writeAttribute( currentStep, splashFloatXType, nullptr, "dt", &DELTA_T ); - const float_X time = float_X( threadParams->currentStep ) * DELTA_T; - dc->writeAttribute( currentStep, splashFloatXType, nullptr, "time", &time ); - dc->writeAttribute( currentStep, ctDouble, nullptr, "timeUnitSI", &UNIT_TIME ); - - /* write normed grid parameters */ - dc->writeAttribute( currentStep, splashFloatXType, nullptr, "cell_width", &CELL_WIDTH ); - dc->writeAttribute( currentStep, splashFloatXType, nullptr, "cell_height", &CELL_HEIGHT ); - if( simDim == DIM3 ) - { - dc->writeAttribute( currentStep, splashFloatXType, nullptr, "cell_depth", &CELL_DEPTH ); - } - - /* write base units */ - dc->writeAttribute( currentStep, ctDouble, nullptr, "unit_energy", &UNIT_ENERGY ); - dc->writeAttribute( currentStep, ctDouble, nullptr, "unit_length", &UNIT_LENGTH ); - dc->writeAttribute( currentStep, ctDouble, nullptr, "unit_speed", &UNIT_SPEED ); - dc->writeAttribute( currentStep, ctDouble, nullptr, "unit_time", &UNIT_TIME ); - dc->writeAttribute( currentStep, ctDouble, nullptr, "unit_mass", &UNIT_MASS ); - dc->writeAttribute( currentStep, ctDouble, nullptr, "unit_charge", &UNIT_CHARGE ); - dc->writeAttribute( currentStep, ctDouble, nullptr, "unit_efield", &UNIT_EFIELD ); - dc->writeAttribute( currentStep, ctDouble, nullptr, "unit_bfield", &UNIT_BFIELD ); - - /* write physical constants */ - dc->writeAttribute( currentStep, splashFloatXType, nullptr, "mue0", &MUE0 ); - dc->writeAttribute( currentStep, splashFloatXType, nullptr, "eps0", &EPS0 ); - } - }; -} // namespace hdf5 -} // namespace picongpu diff --git a/include/picongpu/plugins/hdf5/WriteSpecies.hpp b/include/picongpu/plugins/hdf5/WriteSpecies.hpp deleted file mode 100644 index d0e34adcd4..0000000000 --- a/include/picongpu/plugins/hdf5/WriteSpecies.hpp +++ /dev/null @@ -1,603 +0,0 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/simulation_defines.hpp" -#include "picongpu/plugins/hdf5/HDF5Writer.def" -#include "picongpu/traits/SIBaseUnits.hpp" -#include "picongpu/traits/PICToOpenPMD.hpp" -#include "picongpu/plugins/ISimulationPlugin.hpp" -#include "picongpu/plugins/misc/ComponentNames.hpp" -#include "picongpu/plugins/output/WriteSpeciesCommon.hpp" -#include "picongpu/plugins/kernel/CopySpecies.kernel" -#include "picongpu/particles/traits/GetSpeciesFlagName.hpp" -#include "picongpu/plugins/hdf5/writer/ParticleAttribute.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - - -namespace picongpu -{ - -namespace hdf5 -{ -using namespace pmacc; - -// = ColTypeUInt64_5Array -TYPE_ARRAY(UInt64_5, H5T_INTEL_U64, uint64_t, 5); - -using namespace splash; - -namespace detail -{ - template< typename T_FrameType > - struct GetChargeOrZero - { - static constexpr bool hasChargeRatio = pmacc::traits::HasFlag< - T_FrameType, - chargeRatio<> - >::type::value; - - template< typename T_Defer = float_X > - typename std::enable_if< - hasChargeRatio, - T_Defer - >::type - operator()() const - { - return frame::getCharge< T_FrameType >(); - } - - template< typename T_Defer = float_X > - typename std::enable_if< - !hasChargeRatio, - T_Defer - >::type - operator()() const - { - return float_X( 0. ); - } - }; - - template< typename T_FrameType > - struct GetMassOrZero - { - static constexpr bool hasMassRatio = pmacc::traits::HasFlag< - T_FrameType, - massRatio<> - >::type::value; - - template< typename T_Defer = float_X > - typename std::enable_if< - hasMassRatio, - T_Defer - >::type - operator()() const - { - return frame::getMass< T_FrameType >(); - } - - template< typename T_Defer = float_X > - typename std::enable_if< - !hasMassRatio, - T_Defer - >::type - operator()() const - { - return float_X( 0. ); - } - }; -} - -/** Write copy particle to host memory and dump to HDF5 file - * - * @tparam T_SpeciesFilter type and filter of species - * - */ -template< typename T_SpeciesFilter > -struct WriteSpecies -{ -public: - - typedef typename T_SpeciesFilter::Species ThisSpecies; - typedef typename ThisSpecies::FrameType FrameType; - typedef typename FrameType::ParticleDescription ParticleDescription; - typedef typename FrameType::ValueTypeSeq ParticleAttributeList; - - - /* delete multiMask and localCellIdx in hdf5 particle*/ - typedef bmpl::vector TypesToDelete; - typedef typename RemoveFromSeq::type ParticleCleanedAttributeList; - - /* add totalCellIdx for hdf5 particle*/ - typedef typename MakeSeq< - ParticleCleanedAttributeList, - totalCellIdx - >::type ParticleNewAttributeList; - - typedef - typename ReplaceValueTypeSeq::type - NewParticleDescription; - - typedef Frame Hdf5FrameType; - - /** - * @param domainOffset offset to the local domain: globalDomain.offset + localDomain.offset - */ - template - HINLINE void operator()(ThreadParams* params, - const Space domainOffset) - { - log ("HDF5: (begin) write species: %1%") % T_SpeciesFilter::getName(); - DataConnector &dc = Environment<>::get().DataConnector(); - /* load particle without copy particle data to host */ - auto speciesTmp = dc.get< ThisSpecies >( ThisSpecies::FrameType::getName(), true ); - - /* count number of particles for this species on the device */ - uint64_t numParticles = 0; - - log ("HDF5: (begin) count particles: %1%") % T_SpeciesFilter::getName(); - - // enforce that the filter interface is fulfilled - particles::filter::IUnary< typename T_SpeciesFilter::Filter > particleFilter{ params->currentStep }; - /* at this point we cast to uint64_t, before we assume that per device - * less then 1e9 (int range) particles will be counted - */ - numParticles = uint64_t( pmacc::CountParticles::countOnDevice< CORE + BORDER >( - *speciesTmp, - *(params->cellDescription), - params->localWindowToDomainOffset, - params->window.localDimensions.size, - particleFilter - )); - - - log ("HDF5: ( end ) count particles: %1% = %2%") % T_SpeciesFilter::getName() % numParticles; - Hdf5FrameType hostFrame; - log ("HDF5: (begin) malloc mapped memory: %1%") % T_SpeciesFilter::getName(); - /*malloc mapped memory*/ - meta::ForEach > mallocMem; - mallocMem(hostFrame, numParticles); - log ("HDF5: ( end ) malloc mapped memory: %1%") % T_SpeciesFilter::getName(); - - if (numParticles != 0) - { - - log ("HDF5: (begin) get mapped memory device pointer: %1%") % T_SpeciesFilter::getName(); - /*load device pointer of mapped memory*/ - Hdf5FrameType deviceFrame; - meta::ForEach > getDevicePtr; - getDevicePtr(deviceFrame, hostFrame); - log ("HDF5: ( end ) get mapped memory device pointer: %1%") % T_SpeciesFilter::getName(); - - log ("HDF5: (begin) copy particle to host: %1%") % T_SpeciesFilter::getName(); - typedef bmpl::vector< typename GetPositionFilter::type > usedFilters; - typedef typename FilterFactory::FilterType MyParticleFilter; - MyParticleFilter filter; - /* activate filter pipeline if moving window is activated */ - filter.setStatus(MovingWindow::getInstance().isEnabled()); - filter.setWindowPosition(params->localWindowToDomainOffset, - params->window.localDimensions.size); - - /* int: assume < 2e9 particles per device */ - GridBuffer counterBuffer(DataSpace(1)); - AreaMapping < CORE + BORDER, MappingDesc > mapper(*(params->cellDescription)); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - /* this sanity check costs a little bit of time but hdf5 writing is slower */ - PMACC_KERNEL( CopySpecies< numWorkers >{} )( - mapper.getGridDim(), - numWorkers - )( - counterBuffer.getDeviceBuffer().getPointer(), - deviceFrame, speciesTmp->getDeviceParticlesBox(), - filter, - domainOffset, - totalCellIdx_, - mapper, - particleFilter - ); - counterBuffer.deviceToHost(); - log ("HDF5: ( end ) copy particle to host: %1%") % T_SpeciesFilter::getName(); - __getTransactionEvent().waitForFinished(); - log ("HDF5: all events are finished: %1%") % T_SpeciesFilter::getName(); - - PMACC_ASSERT((uint64_t) counterBuffer.getHostBuffer().getDataBox()[0] == numParticles); - } - - /* We rather do an allgather at this point then letting libSplash - * do an allgather during write to find out the global number of - * particles. - */ - log ("HDF5: (begin) collect particle sizes for %1%") % T_SpeciesFilter::getName(); - - ColTypeUInt64 ctUInt64; - ColTypeDouble ctDouble; - GridController& gc = Environment::get().GridController(); - - const uint64_t numRanks( gc.getGlobalSize() ); - const uint64_t myRank( gc.getGlobalRank() ); - - /* For collective write calls we need the information: - * - how many particles will be written globally - * - what is my particle offset within this global data set - * - * interleaved in array: - * numParticles for mpi rank, mpi rank - * - * the mpi rank is an arbitrary quantity and might change after a - * restart, but we only use it to order our patches and offsets - */ - std::vector particleCounts( 2 * numRanks, 0u ); - uint64_t myParticlePatch[ 2 ]; - myParticlePatch[ 0 ] = numParticles; - myParticlePatch[ 1 ] = myRank; - - /* we do the scan over MPI ranks since it does not matter how the - * global rank or scalar position (which are not idential) are - * ordered as long as the particle attributes are also written in - * the same order (which is by global rank) */ - uint64_t numParticlesOffset = 0; - uint64_t numParticlesGlobal = 0; - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Allgather( - myParticlePatch, 2, MPI_UINT64_T, - &(*particleCounts.begin()), 2, MPI_UINT64_T, - gc.getCommunicator().getMPIComm() - )); - - for( uint64_t r = 0; r < numRanks; ++r ) - { - numParticlesGlobal += particleCounts.at(2 * r); - if( particleCounts.at(2 * r + 1) < myParticlePatch[ 1 ] ) - numParticlesOffset += particleCounts.at(2 * r); - } - log ("HDF5: (end) collect particle sizes for %1%") % T_SpeciesFilter::getName(); - - /* dump non-constant particle records to hdf5 file */ - log ("HDF5: (begin) write particle records for %1%") % T_SpeciesFilter::getName(); - - const std::string speciesPath( std::string("particles/") + T_SpeciesFilter::getName() ); - - meta::ForEach > writeToHdf5; - writeToHdf5( - params, - hostFrame, - speciesPath, - numParticles, - numParticlesOffset, - numParticlesGlobal - ); - - /* write constant particle records to hdf5 file - * ions with variable charge due to a boundElectrons attribute do not write charge - */ - using hasBoundElectrons = typename pmacc::traits::HasIdentifier< - FrameType, - boundElectrons - >::type; - detail::GetChargeOrZero< FrameType > const getChargeOrZero; - if( ! hasBoundElectrons::value && getChargeOrZero.hasChargeRatio ) - { - const float_64 charge( getChargeOrZero() ); - std::vector chargeUnitDimension( NUnitDimension, 0.0 ); - chargeUnitDimension.at(SIBaseUnits::time) = 1.0; - chargeUnitDimension.at(SIBaseUnits::electricCurrent) = 1.0; - - writeConstantRecord( - params, - speciesPath + std::string("/charge"), - numParticlesGlobal, - charge, - UNIT_CHARGE, - chargeUnitDimension - ); - } - - detail::GetMassOrZero< FrameType > const getMassOrZero; - if( getMassOrZero.hasMassRatio ) - { - const float_64 mass( getMassOrZero() ); - std::vector massUnitDimension( NUnitDimension, 0.0 ); - massUnitDimension.at(SIBaseUnits::mass) = 1.0; - - writeConstantRecord( - params, - speciesPath + std::string("/mass"), - numParticlesGlobal, - mass, - UNIT_MASS, - massUnitDimension - ); - } - - /* openPMD ED-PIC: write additional attributes */ - const float_64 particleShape( GetShape::type::support - 1 ); - params->dataCollector->writeAttribute( params->currentStep, - ctDouble, - speciesPath.c_str(), - "particleShape", - &particleShape ); - - traits::GetSpeciesFlagName > currentDepositionName; - const std::string currentDeposition( currentDepositionName() ); - ColTypeString ctCurrentDeposition( currentDeposition.length() ); - params->dataCollector->writeAttribute( params->currentStep, - ctCurrentDeposition, - speciesPath.c_str(), - "currentDeposition", - currentDeposition.c_str() ); - - traits::GetSpeciesFlagName > particlePushName; - const std::string particlePush( particlePushName() ); - ColTypeString ctParticlePush( particlePush.length() ); - params->dataCollector->writeAttribute( params->currentStep, - ctParticlePush, - speciesPath.c_str(), - "particlePush", - particlePush.c_str() ); - - traits::GetSpeciesFlagName > particleInterpolationName; - const std::string particleInterpolation( particleInterpolationName() ); - ColTypeString ctParticleInterpolation( particleInterpolation.length() ); - params->dataCollector->writeAttribute( params->currentStep, - ctParticleInterpolation, - speciesPath.c_str(), - "particleInterpolation", - particleInterpolation.c_str() ); - - const std::string particleSmoothing("none"); - ColTypeString ctParticleSmoothing(particleSmoothing.length()); - params->dataCollector->writeAttribute( params->currentStep, - ctParticleSmoothing, - speciesPath.c_str(), - "particleSmoothing", - particleSmoothing.c_str() ); - - log ("HDF5: (end) write particle records for %1%") % T_SpeciesFilter::getName(); - - /* write species particle patch meta information */ - log ("HDF5: (begin) writing particlePatches for %1%") % T_SpeciesFilter::getName(); - - std::string particlePatchesPath( speciesPath + std::string("/particlePatches") ); - - /* offset and size of our particle patches - * - numPatches: we write as many patches as MPI ranks - * - myPatchOffset: we write in the order of the MPI ranks - * - myPatchEntries: every MPI rank writes exactly one patch - */ - const Dimensions numPatches( numRanks, 1, 1 ); - const Dimensions myPatchOffset( myRank, 0, 0 ); - const Dimensions myPatchEntries( 1, 1, 1 ); - - /* numParticles: number of particles in this patch */ - params->dataCollector->write( - params->currentStep, - numPatches, - myPatchOffset, - ctUInt64, 1, - myPatchEntries, - (particlePatchesPath + std::string("/numParticles")).c_str(), - &numParticles); - - /* numParticlesOffset: number of particles before this patch */ - params->dataCollector->write( - params->currentStep, - numPatches, - myPatchOffset, - ctUInt64, 1, - myPatchEntries, - (particlePatchesPath + std::string("/numParticlesOffset")).c_str(), - &numParticlesOffset); - - /* offset: absolute position where this particle patch begins including - * global domain offsets (slides), etc. - * extent: size of this particle patch, upper bound is excluded - */ - const pmacc::Selection& globalDomain = Environment::get().SubGrid().getGlobalDomain(); - const auto componentNames = plugins::misc::getComponentNames( simDim ); - for (uint32_t d = 0; d < simDim; ++d) - { - const uint64_t patchOffset = - globalDomain.offset[d] + - params->window.globalDimensions.offset[d] + - params->window.localDimensions.offset[d]; - const uint64_t patchExtent = - params->window.localDimensions.size[d]; - - params->dataCollector->write( - params->currentStep, - numPatches, - myPatchOffset, - ctUInt64, 1, - myPatchEntries, - (particlePatchesPath + std::string("/offset/") + - componentNames[d]).c_str(), - &patchOffset); - params->dataCollector->write( - params->currentStep, - numPatches, - myPatchOffset, - ctUInt64, 1, - myPatchEntries, - (particlePatchesPath + std::string("/extent/") + - componentNames[d]).c_str(), - &patchExtent); - - /* offsets and extent of the patch are positions (lengths) - * and need to be scaled like the cell idx of a particle - */ - OpenPMDUnit openPMDUnitCellIdx; - std::vector unitCellIdx = openPMDUnitCellIdx(); - - params->dataCollector->writeAttribute( - params->currentStep, - ctDouble, - (particlePatchesPath + std::string("/offset/") + - componentNames[d]).c_str(), - "unitSI", - &(unitCellIdx.at(d))); - params->dataCollector->writeAttribute( - params->currentStep, - ctDouble, - (particlePatchesPath + std::string("/extent/") + - componentNames[d]).c_str(), - "unitSI", - &(unitCellIdx.at(d))); - } - - OpenPMDUnitDimension openPMDUnitDimension; - std::vector unitDimensionCellIdx = openPMDUnitDimension(); - - params->dataCollector->writeAttribute( - params->currentStep, - ctDouble, - (particlePatchesPath + std::string("/offset")).c_str(), - "unitDimension", - 1u, Dimensions(7,0,0), - &(*unitDimensionCellIdx.begin())); - params->dataCollector->writeAttribute( - params->currentStep, - ctDouble, - (particlePatchesPath + std::string("/extent")).c_str(), - "unitDimension", - 1u, Dimensions(7,0,0), - &(*unitDimensionCellIdx.begin())); - - - log ("HDF5: ( end ) writing particlePatches for %1%") % T_SpeciesFilter::getName(); - - /*free host memory*/ - meta::ForEach > freeMem; - freeMem(hostFrame); - log ("HDF5: ( end ) writing species: %1%") % T_SpeciesFilter::getName(); - } - -private: - - /** Writes a constant particle record (weighted for a real particle) - * - * @param params thread parameters - * @param recordPath path to the record - * @param numParticlesGlobal global number of particles in the species - * @param value of the record - * @param unitSI conversion factor to SI - * @param unitDimension power in terms of SI base units for this record - */ - static void writeConstantRecord( - ThreadParams* params, - const std::string recordPath, - const uint64_t numParticlesGlobal, - const float_64 value, - const float_64 unitSI, - const std::vector& unitDimension - ) - { - typedef typename PICToSplash::type SplashFloatXType; - - ColTypeUInt32 ctUInt32; - ColTypeUInt64 ctUInt64; - ColTypeDouble ctDouble; - SplashFloatXType splashFloatXType; - - /* openPMD base standard - * write constant record - */ - params->dataCollector->writeAttribute( - params->currentStep, - ctDouble, recordPath.c_str(), - "value", &value); - - params->dataCollector->writeAttribute( - params->currentStep, - ctUInt64, recordPath.c_str(), - "shape", - 1u, Dimensions(1,0,0), - &numParticlesGlobal); - - params->dataCollector->writeAttribute( - params->currentStep, - ctDouble, recordPath.c_str(), - "unitSI", &unitSI); - - params->dataCollector->writeAttribute( - params->currentStep, - ctDouble, recordPath.c_str(), - "unitDimension", - 1u, Dimensions(7,0,0), - &(*unitDimension.begin())); - - /** \todo check if always correct at this point, depends on attribute - * and MW-solver/pusher implementation */ - const float_X timeOffset( 0.0 ); // same type as "time" in basePath - params->dataCollector->writeAttribute( - params->currentStep, - splashFloatXType, recordPath.c_str(), - "timeOffset", &timeOffset); - - /* ED-PIC extension: - * - this is a record describing a *real* particle (0: false) - * - it needs to be scaled linearly (w^1.0) to get the *macro* - * particle record - */ - const uint32_t macroWeighted( 0 ); - params->dataCollector->writeAttribute( - params->currentStep, - ctUInt32, recordPath.c_str(), - "macroWeighted", - ¯oWeighted); - - const float_64 weightingPower( 1.0 ); - params->dataCollector->writeAttribute( - params->currentStep, - ctDouble, recordPath.c_str(), - "weightingPower", - &weightingPower); - } -}; - - -} //namspace hdf5 - -} //namespace picongpu diff --git a/include/picongpu/plugins/hdf5/openPMD/patchReader.cpp b/include/picongpu/plugins/hdf5/openPMD/patchReader.cpp deleted file mode 100644 index d16ab342c0..0000000000 --- a/include/picongpu/plugins/hdf5/openPMD/patchReader.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/* Copyright 2016-2020 Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#if( ENABLE_HDF5 == 1 ) - -# include "picongpu/plugins/hdf5/openPMD/patchReader.hpp" -#include "picongpu/plugins/misc/ComponentNames.hpp" - - -namespace picongpu -{ -namespace hdf5 -{ -namespace openPMD -{ - void PatchReader::checkSpatialTypeSize( - splash::DataCollector* const dc, - const uint32_t availableRanks, - const int32_t id, - const std::string particlePatchPathComponent - ) const - { - // will later read into 1D buffer from first position on - splash::Dimensions dstBuffer(availableRanks, 1, 1); - splash::Dimensions dstOffset(0, 0, 0); - // sizeRead will be set - splash::Dimensions sizeRead(0, 0, 0); - - splash::CollectionType* colType = dc->readMeta( - id, - particlePatchPathComponent.c_str(), - dstBuffer, - dstOffset, - sizeRead ); - - // check if the 1D list of patches has the right length - assert( sizeRead[0] == availableRanks ); - - // currently only support uint64_t types to spare type conversation - assert( typeid(*colType) == typeid(splash::ColTypeUInt64) ); - - // free collections - delete( colType ); - colType = nullptr; - } - - void PatchReader::readPatchAttribute( - splash::DataCollector* const dc, - const uint32_t availableRanks, - const int32_t id, - const std::string particlePatchPathComponent, - uint64_t* const dest - ) const - { - // will later read into 1D buffer from first position on - splash::Dimensions dstBuffer(availableRanks, 1, 1); - splash::Dimensions dstOffset(0, 0, 0); - // sizeRead will be set - splash::Dimensions sizeRead(0, 0, 0); - - // check if types, number of patches and names are supported - checkSpatialTypeSize( dc, availableRanks, id, particlePatchPathComponent.c_str() ); - - // read actual offset and extent data of particle patch component - dc->read( id, - particlePatchPathComponent.c_str(), - sizeRead, - (void*)dest ); - } - - picongpu::openPMD::ParticlePatches PatchReader::operator()( - splash::DataCollector* const dc, - const uint32_t availableRanks, - const uint32_t dimensionality, - const int32_t id, - const std::string particlePatchPath - ) const - { - // allocate memory for patches - picongpu::openPMD::ParticlePatches particlePatches( availableRanks ); - const auto componentNames = plugins::misc::getComponentNames( dimensionality ); - for( uint32_t d = 0; d < dimensionality; ++d ) - { - readPatchAttribute( - dc, availableRanks, id, - particlePatchPath + std::string("offset/") + componentNames[d], - particlePatches.getOffsetComp( d ) - ); - readPatchAttribute( - dc, availableRanks, id, - particlePatchPath + std::string("extent/") + componentNames[d], - particlePatches.getExtentComp( d ) - ); - } - - // read number of particles and their starting point (offset), too - readPatchAttribute( - dc, availableRanks, id, - particlePatchPath + std::string("numParticles"), - &(*particlePatches.numParticles.begin()) - ); - readPatchAttribute( - dc, availableRanks, id, - particlePatchPath + std::string("numParticlesOffset"), - &(*particlePatches.numParticlesOffset.begin()) - ); - - // return struct of array with particle patches - return particlePatches; - } - -} // namespace openPMD -} // namespace hdf5 -} // namespace picongpu - -#endif diff --git a/include/picongpu/plugins/hdf5/openPMD/patchReader.hpp b/include/picongpu/plugins/hdf5/openPMD/patchReader.hpp deleted file mode 100644 index 04e8510047..0000000000 --- a/include/picongpu/plugins/hdf5/openPMD/patchReader.hpp +++ /dev/null @@ -1,120 +0,0 @@ -/* Copyright 2016-2020 Axel Huebl - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/plugins/common/particlePatches.hpp" - -#if( ENABLE_HDF5 == 1 ) -# include -#endif - -#include -#include -#include -#include -#include - -namespace picongpu -{ -namespace hdf5 -{ -namespace openPMD -{ - class PatchReader; - -#if( ENABLE_HDF5 == 1 ) - /** Functor to populate and validate the list of particle patches - */ - class PatchReader - { - private: - /** Determine the variable type for `offset` and `extent` - * - * In particle patches, the `offset` and `extent` can be of - * user-defined types. This function allows to determine which - * one was used and how many patches exist. - * - * @note currently we force the type to be `uint64_t`, - * we can implement type conversions later on - * @note currently we force the number of patches - * to stay constant during restarts - * - * @param dc parallel libSplash DataCollector - * @param availableRanks MPI ranks in the restarted simulation - * that are currently waiting to find patches - * @param id iteration in file - * @param particlePatchPathComponent string such as - * "particles/e/particlePatches/numParticles" or - * "particles/e/particlePatches/offset/x" - */ - void checkSpatialTypeSize( - splash::DataCollector* const dc, - const uint32_t availableRanks, - const int32_t id, - const std::string particlePatchPathComponent - ) const; - - /** Read a specific record component of the particle patch - * - * Read for example: numParticles or offset/x - * - * @param[in] dc pointer to an open splash::DataCollector - * @param[in] availableRanks MPI ranks in the restarted simulation - * that are currently waiting to find patches - * @param[in] id time step to read - * @param[in] particlePatchPathComponent string such as - * "particles/e/particlePatches/numParticles" or - * "particles/e/particlePatches/offset/x" - * @param[out] dest beginning of c-array of length size() - * to write the patch record component to - */ - void readPatchAttribute( - splash::DataCollector* const dc, - const uint32_t availableRanks, - const int32_t id, - const std::string particlePatchPathComponent, - uint64_t* const dest - ) const; - - public: - /** Build up the global list of patches - * - * @param dc parallel libSplash DataCollector - * @param availableRanks MPI ranks in the restarted simulation - * that are currently waiting to find patches - * @param dimensionality the PIConGPU simDim - * @param id iteration in file - * @param particlePatchPath in-file path to a specific particle patch dir - * - * @return picongpu::openPMD::ParticlePatches struct of arrays with patches - */ - picongpu::openPMD::ParticlePatches operator()( - splash::DataCollector* const dc, - const uint32_t availableRanks, - const uint32_t dimensionality, - const int32_t id, - const std::string particlePatchPath - ) const; - }; -#endif - -} // namespace openPMD -} // namespace hdf5 -} // namespace picongpu diff --git a/include/picongpu/plugins/hdf5/restart/LoadParticleAttributesFromHDF5.hpp b/include/picongpu/plugins/hdf5/restart/LoadParticleAttributesFromHDF5.hpp deleted file mode 100644 index 3ce6268b5f..0000000000 --- a/include/picongpu/plugins/hdf5/restart/LoadParticleAttributesFromHDF5.hpp +++ /dev/null @@ -1,127 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - - - -#pragma once - -#include "picongpu/simulation_defines.hpp" -#include "picongpu/plugins/hdf5/HDF5Writer.def" -#include "picongpu/plugins/misc/ComponentNames.hpp" -#include "picongpu/traits/PICToSplash.hpp" -#include "picongpu/traits/PICToOpenPMD.hpp" -#include -#include -#include -#include - - -namespace picongpu -{ - -namespace hdf5 -{ -using namespace pmacc; - -using namespace splash; - -/** Load attribute of a species from HDF5 checkpoint file - * - * @tparam T_Identifier identifier of species attribute - */ -template< typename T_Identifier> -struct LoadParticleAttributesFromHDF5 -{ - - /** read attributes from hdf5 file - * - * @param params thread params with domainwriter, ... - * @param frame frame with all particles - * @param subGroup path to the group in the hdf5 file - * @param particlesOffset read offset in the attribute array - * @param elements number of elements which should be read the attribute array - */ - template - HINLINE void operator()( - ThreadParams* params, - FrameType& frame, - const std::string subGroup, - const uint64_t particlesOffset, - const uint64_t elements) - { - - typedef T_Identifier Identifier; - typedef typename pmacc::traits::Resolve::type::type ValueType; - const uint32_t components = GetNComponents::value; - typedef typename GetComponentsType::type ComponentType; - typedef typename PICToSplash::type SplashType; - - log ("HDF5: ( begin ) load species attribute: %1%") % Identifier::getName(); - - const auto componentNames = plugins::misc::getComponentNames( components ); - - ComponentType* tmpArray = nullptr; - if( elements > 0 ) - tmpArray = new ComponentType[elements]; - - ParallelDomainCollector* dataCollector = params->dataCollector; - - // avoid deadlock between not finished pmacc tasks and mpi calls in splash/HDF5 - __getTransactionEvent().waitForFinished(); - - for (uint32_t d = 0; d < components; d++) - { - OpenPMDName openPMDName; - std::stringstream datasetName; - datasetName << subGroup << "/" << openPMDName(); - if (components > 1) - datasetName << "/" << componentNames[d]; - - ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer(); - Dimensions sizeRead(0, 0, 0); - // read one component from file to temporary array - dataCollector->read(params->currentStep, - Dimensions(elements, 1, 1), - Dimensions(particlesOffset, 0, 0), - datasetName.str().c_str(), - sizeRead, - tmpArray - ); - PMACC_ASSERT(sizeRead[0] == elements); - - /* copy component from temporary array to array of structs */ - #pragma omp parallel for - for (size_t i = 0; i < elements; ++i) - { - ComponentType& ref = ((ComponentType*) dataPtr)[i * components + d]; - ref = tmpArray[i]; - } - } - __deleteArray(tmpArray); - - log ("HDF5: ( end ) load species attribute: %1%") % - Identifier::getName(); - } - -}; - -} //namspace hdf5 - -} //namespace picongpu - diff --git a/include/picongpu/plugins/hdf5/restart/LoadSpecies.hpp b/include/picongpu/plugins/hdf5/restart/LoadSpecies.hpp deleted file mode 100644 index 7dd94f2984..0000000000 --- a/include/picongpu/plugins/hdf5/restart/LoadSpecies.hpp +++ /dev/null @@ -1,216 +0,0 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/simulation_defines.hpp" - -#include "picongpu/plugins/hdf5/HDF5Writer.def" -#include "picongpu/plugins/ISimulationPlugin.hpp" - -#include "picongpu/plugins/output/WriteSpeciesCommon.hpp" -#include "picongpu/plugins/hdf5/restart/LoadParticleAttributesFromHDF5.hpp" - -#include "picongpu/plugins/common/particlePatches.hpp" -#include "picongpu/plugins/hdf5/openPMD/patchReader.hpp" - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace picongpu -{ - -namespace hdf5 -{ -using namespace pmacc; - -using namespace splash; - -/** Load species from HDF5 checkpoint file - * - * @tparam T_Species type of species - * - */ -template< typename T_Species > -struct LoadSpecies -{ -public: - - typedef T_Species ThisSpecies; - typedef typename ThisSpecies::FrameType FrameType; - typedef typename FrameType::ParticleDescription ParticleDescription; - typedef typename FrameType::ValueTypeSeq ParticleAttributeList; - - - /* delete multiMask and localCellIdx in hdf5 particle*/ - typedef bmpl::vector2 TypesToDelete; - typedef typename RemoveFromSeq::type ParticleCleanedAttributeList; - - /* add totalCellIdx for hdf5 particle*/ - typedef typename MakeSeq< - ParticleCleanedAttributeList, - totalCellIdx - >::type ParticleNewAttributeList; - - typedef - typename ReplaceValueTypeSeq::type - NewParticleDescription; - - typedef Frame Hdf5FrameType; - - /** Load species from HDF5 checkpoint file - * - * @param params thread params with domainwriter, ... - * @param restartChunkSize number of particles processed in one kernel call - */ - HINLINE void operator()(ThreadParams* params, const uint32_t restartChunkSize) - { - std::string const speciesName = FrameType::getName(); - log ("HDF5: (begin) load species: %1%") % speciesName; - DataConnector &dc = Environment<>::get().DataConnector(); - GridController &gc = Environment::get().GridController(); - - const std::string speciesSubGroup( - std::string("particles/") + speciesName + std::string("/") - ); - const pmacc::Selection& localDomain = Environment::get().SubGrid().getLocalDomain(); - const pmacc::Selection& globalDomain = Environment::get().SubGrid().getGlobalDomain(); - - // load particle without copying particle data to host - auto speciesTmp = dc.get< ThisSpecies >( FrameType::getName(), true ); - - // count total number of particles on the device - uint64_cu totalNumParticles = 0; - uint64_t particleOffset = 0; - - // load particle patches offsets to find own patch - const std::string particlePatchesPath( - speciesSubGroup + std::string("particlePatches/") - ); - - // avoid deadlock between not finished pmacc tasks and mpi calls in splash/HDF5 - __getTransactionEvent().waitForFinished(); - - // read particle patches - openPMD::PatchReader patchReader; - - picongpu::openPMD::ParticlePatches particlePatches( - patchReader( - params->dataCollector, - gc.getGlobalSize(), - simDim, - params->currentStep, - particlePatchesPath - ) - ); - - /** search my entry (using my cell offset and my local grid size) - * - * \note if you want to restart with a changed GPU configuration, either - * post-process the particle-patches in the file or implement to find - * all contributing patches and then filter the particles inside those - * by position - * - * \see plugins/hdf5/WriteSpecies.hpp `WriteSpecies::operator()` - * as its counterpart - */ - const DataSpace patchOffset = - globalDomain.offset + - params->window.globalDimensions.offset + - params->window.localDimensions.offset; - const DataSpace patchExtent = - params->window.localDimensions.size; - - for( size_t i = 0; i < gc.getGlobalSize(); ++i ) - { - bool exactlyMyPatch = true; - - for( uint32_t d = 0; d < simDim; ++d ) - { - if( particlePatches.getOffsetComp( d )[ i ] != (uint64_t)patchOffset[ d ] ) - exactlyMyPatch = false; - if( particlePatches.getExtentComp( d )[ i ] != (uint64_t)patchExtent[ d ] ) - exactlyMyPatch = false; - } - - if( exactlyMyPatch ) - { - totalNumParticles = particlePatches.numParticles[ i ]; - particleOffset = particlePatches.numParticlesOffset[ i ]; - break; - } - } - - log ("Loading %1% particles from offset %2%") % - (long long unsigned) totalNumParticles % (long long unsigned) particleOffset; - - Hdf5FrameType hostFrame; - log ("HDF5: malloc mapped memory: %1%") % speciesName; - /*malloc mapped memory*/ - meta::ForEach > mallocMem; - mallocMem(hostFrame, totalNumParticles); - - log ("HDF5: get mapped memory device pointer: %1%") % speciesName; - /*load device pointer of mapped memory*/ - Hdf5FrameType deviceFrame; - meta::ForEach > getDevicePtr; - getDevicePtr(deviceFrame, hostFrame); - - meta::ForEach > loadAttributes; - loadAttributes(params, hostFrame, speciesSubGroup, particleOffset, totalNumParticles); - - if (totalNumParticles != 0) - { - pmacc::particles::operations::splitIntoListOfFrames( - *speciesTmp, - deviceFrame, - totalNumParticles, - restartChunkSize, - globalDomain.offset + localDomain.offset, - totalCellIdx_, - *(params->cellDescription), - picLog::INPUT_OUTPUT() - ); - - /*free host memory*/ - meta::ForEach > freeMem; - freeMem(hostFrame); - log ("HDF5: ( end ) load species: %1%") % speciesName; - } - } -}; - - -} //namspace hdf5 - -} //namespace picongpu diff --git a/include/picongpu/plugins/hdf5/restart/RestartFieldLoader.hpp b/include/picongpu/plugins/hdf5/restart/RestartFieldLoader.hpp deleted file mode 100644 index 2552ea41f3..0000000000 --- a/include/picongpu/plugins/hdf5/restart/RestartFieldLoader.hpp +++ /dev/null @@ -1,229 +0,0 @@ -/* Copyright 2014-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/simulation_defines.hpp" - -#include -#include "picongpu/fields/FieldE.hpp" -#include "picongpu/fields/FieldB.hpp" -#include "picongpu/plugins/misc/ComponentNames.hpp" -#include "picongpu/simulation/control/MovingWindow.hpp" -#include "picongpu/traits/IsFieldDomainBound.hpp" - -#include -#include -#include - -#include - -#include -#include - - -namespace picongpu -{ - -namespace hdf5 -{ - -/** - * Helper class for HDF5Writer plugin to load fields from parallel libSplash files. - */ -class RestartFieldLoader -{ -public: - template - static void loadField( - Data& field, - const uint32_t numComponents, - std::string objectName, - ThreadParams *params, - const bool isDomainBound - ) - { - log ("Begin loading field '%1%'") % objectName; - const DataSpace field_guard = field.getGridLayout().getGuard(); - - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(params->currentStep); - const pmacc::Selection& localDomain = Environment::get().SubGrid().getLocalDomain(); - - using ValueType = typename Data::ValueType; - field.getHostBuffer().setValue(ValueType::create(0.0)); - - const auto componentNames = plugins::misc::getComponentNames( numComponents ); - - /* globalSlideOffset due to gpu slides between origin at time step 0 - * and origin at current time step - * ATTENTION: splash offset are globalSlideOffset + picongpu offsets - */ - DataSpace globalSlideOffset; - globalSlideOffset.y() = numSlides * localDomain.size.y(); - - Dimensions domain_offset(0, 0, 0); - for (uint32_t d = 0; d < simDim; ++d) - domain_offset[d] = localDomain.offset[d] + globalSlideOffset[d]; - - if (Environment::get().GridController().getPosition().y() == 0) - domain_offset[1] += params->window.globalDimensions.offset.y(); - - Dimensions local_domain_size; - for (uint32_t d = 0; d < simDim; ++d) - local_domain_size[d] = params->window.localDimensions.size[d]; - int elementCount = params->window.localDimensions.size.productOfComponents(); - bool useLinearIdxAsDestination = false; - - /* Patch for non-domain-bound fields - * This is an ugly fix to allow output of reduced 1d PML buffers, - * that are the same size on each domain. - * This code is to be replaced with the openPMD output plugin soon. - */ - if( !isDomainBound ) - { - auto const field_layout = params->gridLayout; - auto const field_no_guard = field_layout.getDataSpaceWithoutGuarding(); - elementCount = field_no_guard.productOfComponents(); - // Number of elements on each local domain - local_domain_size = Dimensions( - elementCount, - 1, - 1 - ); - auto const & gridController = Environment::get().GridController(); - auto const rank = gridController.getGlobalRank(); - domain_offset = Dimensions( - rank * elementCount, - 0, - 0 - ); - useLinearIdxAsDestination = true; - } - - // avoid deadlock between not finished pmacc tasks and mpi calls in splash/HDF5 - __getTransactionEvent().waitForFinished(); - - auto destBox = field.getHostBuffer().getDataBox(); - for (uint32_t i = 0; i < numComponents; ++i) - { - // Read the subdomain which belongs to our mpi position. - // The total grid size must match the grid size of the stored data. - log ("Read from domain: offset=%1% size=%2%") % - domain_offset.toString() % local_domain_size.toString(); - DomainCollector::DomDataClass data_class; - DataContainer *field_container = - params->dataCollector->readDomain(params->currentStep, - (std::string("fields/") + objectName + - std::string("/") + componentNames[i]).c_str(), - Domain(domain_offset, local_domain_size), - &data_class); - - for (int linearId = 0; linearId < elementCount; ++linearId) - { - DataSpace destIdx; - if( useLinearIdxAsDestination ) - { - destIdx[ 0 ] = linearId; - } - else - { - /* calculate index inside the moving window domain which is located on the local grid*/ - destIdx = DataSpaceOperations::map(params->window.localDimensions.size, linearId); - /* jump over guard and local sliding window offset*/ - destIdx += field_guard + params->localWindowToDomainOffset; - } - destBox(destIdx)[i] = ((float_X*) (field_container->getIndex(0)->getData()))[linearId]; - } - - delete field_container; - } - - field.hostToDevice(); - - __getTransactionEvent().waitForFinished(); - - log ("Read from domain: offset=%1% size=%2%") % - domain_offset.toString() % local_domain_size.toString(); - log ("Finished loading field '%1%'") % objectName; - } - - template - static void cloneField(Data& fieldDest, Data& fieldSrc, std::string objectName) - { - log ("Begin cloning field '%1%'") % objectName; - DataSpace field_grid = fieldDest.getGridLayout().getDataSpace(); - - size_t elements = field_grid.productOfComponents(); - float3_X *ptrDest = fieldDest.getHostBuffer().getDataBox().getPointer(); - float3_X *ptrSrc = fieldSrc.getHostBuffer().getDataBox().getPointer(); - - for (size_t k = 0; k < elements; ++k) - { - ptrDest[k] = ptrSrc[k]; - } - - fieldDest.hostToDevice(); - - __getTransactionEvent().waitForFinished(); - - log ("Finished cloning field '%1%'") % objectName; - } -}; - -/** - * Hepler class for HDF5Writer (forEach operator) to load a field from HDF5 - * - * @tparam T_Field field class to load - */ -template< typename T_Field > -struct LoadFields -{ -public: - - HDINLINE void operator()(ThreadParams* params) - { -#ifndef __CUDA_ARCH__ - DataConnector &dc = Environment<>::get().DataConnector(); - ThreadParams *tp = params; - - /* load field without copying data to host */ - std::shared_ptr< T_Field > field = dc.get< T_Field >( T_Field::getName(), true ); - tp->gridLayout = field->getGridLayout(); - - /* load from HDF5 */ - bool const isDomainBound = traits::IsFieldDomainBound< T_Field >::value; - RestartFieldLoader::loadField( - field->getGridBuffer(), - static_cast< uint32_t >( T_Field::numComponents ), - T_Field::getName(), - tp, - isDomainBound - ); - - dc.releaseData( T_Field::getName() ); -#endif - } - -}; - -using namespace pmacc; -using namespace splash; - -} //namespace hdf5 -} //namespace picongpu diff --git a/include/picongpu/plugins/hdf5/writer/Field.hpp b/include/picongpu/plugins/hdf5/writer/Field.hpp deleted file mode 100644 index bdf778d759..0000000000 --- a/include/picongpu/plugins/hdf5/writer/Field.hpp +++ /dev/null @@ -1,281 +0,0 @@ -/* Copyright 2014-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, - * Sergei Bastrakov - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/simulation_defines.hpp" -#include "picongpu/plugins/hdf5/HDF5Writer.def" -#include "picongpu/plugins/misc/ComponentNames.hpp" -#include "picongpu/traits/PICToSplash.hpp" -#include -#include -#include - -#include - -namespace picongpu -{ - -namespace hdf5 -{ - -using namespace pmacc; -using namespace splash; - -struct Field -{ - - /* \param inCellPosition std::vector > with the outer - * vector for each component and the inner vector for - * the simDim position offset within the cell [0.0; 1.0) - */ - template< - typename T_ValueType, - typename T_DataBoxType - > - static void writeField( - ThreadParams *params, - const std::string name, - std::vector unit, - std::vector unitDimension, - std::vector > inCellPosition, - float_X timeOffset, - T_DataBoxType dataBox, - const T_ValueType&, - const bool isDomainBound - ) - { - typedef T_DataBoxType NativeDataBoxType; - typedef T_ValueType ValueType; - typedef typename GetComponentsType::type ComponentType; - typedef typename PICToSplash::type SplashType; - typedef typename PICToSplash::type SplashFloatXType; - - const uint32_t nComponents = GetNComponents::value; - - SplashType splashType; - ColTypeDouble ctDouble; - SplashFloatXType splashFloatXType; - - log ("HDF5 write field: %1% %2%") % - name % nComponents; - - /* parameter checking */ - PMACC_ASSERT( unit.size() == nComponents ); - PMACC_ASSERT( inCellPosition.size() == nComponents ); - for( uint32_t n = 0; n < nComponents; ++n ) - PMACC_ASSERT( inCellPosition.at(n).size() == simDim ); - PMACC_ASSERT(unitDimension.size() == 7); // seven openPMD base units - - /* component names */ - const std::string recordName = std::string("fields/") + name; - - const auto componentNames = plugins::misc::getComponentNames( nComponents ); - - /*data to describe source buffer*/ - GridLayout field_layout = params->gridLayout; - DataSpace field_no_guard = params->window.localDimensions.size; - DataSpace field_guard = field_layout.getGuard() + params->localWindowToDomainOffset; - /* globalSlideOffset due to gpu slides between origin at time step 0 - * and origin at current time step - * ATTENTION: splash offset are globalSlideOffset + picongpu offsets - */ - DataSpace globalSlideOffset; - const pmacc::Selection& localDomain = Environment::get().SubGrid().getLocalDomain(); - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(params->currentStep); - globalSlideOffset.y() += numSlides * localDomain.size.y(); - - Dimensions splashGlobalDomainOffset(0, 0, 0); - Dimensions splashGlobalOffsetFile(0, 0, 0); - Dimensions splashGlobalDomainSize(1, 1, 1); - - for (uint32_t d = 0; d < simDim; ++d) - { - splashGlobalOffsetFile[d] = localDomain.offset[d]; - splashGlobalDomainOffset[d] = params->window.globalDimensions.offset[d] + globalSlideOffset[d]; - splashGlobalDomainSize[d] = params->window.globalDimensions.size[d]; - } - - splashGlobalOffsetFile[1] = std::max(0, localDomain.offset[1] - - params->window.globalDimensions.offset[1]); - - /* Patch for non-domain-bound fields - * This is an ugly fix to allow output of reduced 1d PML buffers, - * that are the same size on each domain. - * This code is to be replaced with the openPMD output plugin soon. - */ - if( !isDomainBound ) - { - field_no_guard = field_layout.getDataSpaceWithoutGuarding(); - auto const localSize = field_no_guard.productOfComponents(); - auto const & gridController = Environment::get().GridController(); - auto const numRanks = gridController.getGlobalSize(); - auto const rank = gridController.getGlobalRank(); - // Number of elements on all domains combined - splashGlobalDomainSize = Dimensions( - localSize * numRanks, - 1, - 1 - ); - // Offset for this rank - splashGlobalOffsetFile = Dimensions( - localSize * rank, - 0, - 0 - ); - // We are not affected by moving window, so all have offset to 0 - splashGlobalDomainOffset = Dimensions( - 0, - 0, - 0 - ); - } - - size_t tmpArraySize = field_no_guard.productOfComponents(); - ComponentType* tmpArray = new ComponentType[tmpArraySize]; - - typedef DataBoxDim1Access D1Box; - D1Box d1Access(dataBox.shift(field_guard), field_no_guard); - - for (uint32_t n = 0; n < nComponents; n++) - { - /* copy data to temp array - * tmpArray has the size of the data without any offsets - */ - for (size_t i = 0; i < tmpArraySize; ++i) - { - tmpArray[i] = d1Access[i][n]; - } - - std::stringstream datasetName; - datasetName << recordName; - if (nComponents > 1) - datasetName << "/" << componentNames.at(n); - - Dimensions sizeSrcData(1, 1, 1); - - for (uint32_t d = 0; d < simDim; ++d) - { - sizeSrcData[d] = field_no_guard[d]; - } - - // avoid deadlock between not finished pmacc tasks and mpi calls in splash/HDF5 - __getTransactionEvent().waitForFinished(); - params->dataCollector->writeDomain(params->currentStep, /* id == time step */ - splashGlobalDomainSize, /* total size of dataset over all processes */ - splashGlobalOffsetFile, /* write offset for this process */ - splashType, /* data type */ - simDim, /* NDims spatial dimensionality of the field */ - splash::Selection(sizeSrcData), /* data size of this process */ - datasetName.str().c_str(), /* data set name */ - splash::Domain( - splashGlobalDomainOffset, /* offset of the global domain */ - splashGlobalDomainSize /* size of the global domain */ - ), - DomainCollector::GridType, - tmpArray); - - /* attributes */ - params->dataCollector->writeAttribute(params->currentStep, - splashFloatXType, datasetName.str().c_str(), - "position", - 1u, Dimensions(simDim,0,0), - &(*inCellPosition.at(n).begin())); - - params->dataCollector->writeAttribute(params->currentStep, - ctDouble, datasetName.str().c_str(), - "unitSI", &(unit.at(n))); - } - __deleteArray(tmpArray); - - - params->dataCollector->writeAttribute(params->currentStep, - ctDouble, recordName.c_str(), - "unitDimension", - 1u, Dimensions(7,0,0), - &(*unitDimension.begin())); - - params->dataCollector->writeAttribute(params->currentStep, - splashFloatXType, recordName.c_str(), - "timeOffset", &timeOffset); - - const std::string geometry("cartesian"); - ColTypeString ctGeometry(geometry.length()); - params->dataCollector->writeAttribute(params->currentStep, - ctGeometry, recordName.c_str(), - "geometry", geometry.c_str()); - - const std::string dataOrder("C"); - ColTypeString ctDataOrder(dataOrder.length()); - params->dataCollector->writeAttribute(params->currentStep, - ctDataOrder, recordName.c_str(), - "dataOrder", dataOrder.c_str()); - - char axisLabels[simDim][2]; - ColTypeString ctAxisLabels(1); - for( uint32_t d = 0; d < simDim; ++d ) - { - axisLabels[simDim-1-d][0] = char('x' + d); // 3D: F[z][y][x], 2D: F[y][x] - axisLabels[simDim-1-d][1] = '\0'; // terminator is important! - } - params->dataCollector->writeAttribute(params->currentStep, - ctAxisLabels, recordName.c_str(), - "axisLabels", - 1u, Dimensions(simDim,0,0), - axisLabels); - - // cellSize is {x, y, z} but fields are F[z][y][x] - std::vector gridSpacing(simDim, 0.0); - for( uint32_t d = 0; d < simDim; ++d ) - gridSpacing.at(simDim-1-d) = cellSize[d]; - params->dataCollector->writeAttribute(params->currentStep, - splashFloatXType, recordName.c_str(), - "gridSpacing", - 1u, Dimensions(simDim,0,0), - &(*gridSpacing.begin())); - - // splashGlobalDomainOffset is {x, y, z} but fields are F[z][y][x] - std::vector gridGlobalOffset(simDim, 0.0); - for( uint32_t d = 0; d < simDim; ++d ) - gridGlobalOffset.at(simDim-1-d) = - float_64(cellSize[d]) * - float_64(splashGlobalDomainOffset[d]); - params->dataCollector->writeAttribute(params->currentStep, - ctDouble, recordName.c_str(), - "gridGlobalOffset", - 1u, Dimensions(simDim,0,0), - &(*gridGlobalOffset.begin())); - - params->dataCollector->writeAttribute(params->currentStep, - ctDouble, recordName.c_str(), - "gridUnitSI", &UNIT_LENGTH); - - const std::string fieldSmoothing("none"); - ColTypeString ctFieldSmoothing(fieldSmoothing.length()); - params->dataCollector->writeAttribute(params->currentStep, - ctFieldSmoothing, recordName.c_str(), - "fieldSmoothing", fieldSmoothing.c_str()); - } - -}; - -} //namspace hdf5 - -} //namespace picongpu diff --git a/include/picongpu/plugins/hdf5/writer/ParticleAttribute.hpp b/include/picongpu/plugins/hdf5/writer/ParticleAttribute.hpp deleted file mode 100644 index 4ed258a99b..0000000000 --- a/include/picongpu/plugins/hdf5/writer/ParticleAttribute.hpp +++ /dev/null @@ -1,219 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - - - -#pragma once - - -#include "picongpu/simulation_defines.hpp" -#include "picongpu/plugins/hdf5/HDF5Writer.def" -#include "picongpu/plugins/misc/ComponentNames.hpp" -#include "picongpu/traits/PICToSplash.hpp" -#include "picongpu/traits/PICToOpenPMD.hpp" -#include -#include -#include -#include - -namespace picongpu -{ - -namespace hdf5 -{ -using namespace pmacc; - -using namespace splash; - - -/** write attribute of a particle to hdf5 file - * - * @tparam T_Identifier identifier of a particle record - */ -template< typename T_Identifier> -struct ParticleAttribute -{ - /** write attribute to hdf5 file - * - * @param params wrapped thread params such as domainwriter, ... - * @param frame frame with all particles - * @param speciesPath path for the current species (of FrameType) - * @param elements number of particles in this patch - * @param elementsOffset number of particles in this patch - * @param numParticlesGlobal number of particles globally - */ - template - HINLINE void operator()( - ThreadParams* params, - FrameType& frame, - const std::string speciesPath, - const uint64_t elements, - const uint64_t elementsOffset, - const uint64_t numParticlesGlobal - ) - { - - typedef T_Identifier Identifier; - typedef typename pmacc::traits::Resolve::type::type ValueType; - const uint32_t components = GetNComponents::value; - typedef typename GetComponentsType::type ComponentType; - typedef typename PICToSplash::type SplashType; - typedef typename PICToSplash::type SplashFloatXType; - - const ThreadParams *threadParams = params; - - log ("HDF5: (begin) write species attribute: %1%") % Identifier::getName(); - - SplashType splashType; - ColTypeDouble ctDouble; - ColTypeUInt32 ctUInt32; - SplashFloatXType splashFloatXType; - - OpenPMDName openPMDName; - const std::string recordPath( speciesPath + std::string("/") + openPMDName() ); - - const auto componentNames = plugins::misc::getComponentNames( components ); - - // get the SI scaling, dimensionality and weighting of the attribute - OpenPMDUnit openPMDUnit; - std::vector unit = openPMDUnit(); - OpenPMDUnitDimension openPMDUnitDimension; - std::vector unitDimension = openPMDUnitDimension(); - const bool macroWeightedBool = MacroWeighted::get(); - const uint32_t macroWeighted = (macroWeightedBool ? 1 : 0); - const float_64 weightingPower = WeightingPower::get(); - - PMACC_ASSERT(unit.size() == components); // unitSI for each component - PMACC_ASSERT(unitDimension.size() == 7); // seven openPMD base units - - /* globalSlideOffset due to gpu slides between origin at time step 0 - * and origin at current time step - * ATTENTION: splash offset are globalSlideOffset + picongpu offsets - */ - DataSpace globalSlideOffset; - const pmacc::Selection& localDomain = Environment::get().SubGrid().getLocalDomain(); - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(threadParams->currentStep); - globalSlideOffset.y() += numSlides * localDomain.size.y(); - - Dimensions splashDomainOffset(0, 0, 0); - Dimensions splashGlobalDomainOffset(0, 0, 0); - - Dimensions splashDomainSize(1, 1, 1); - Dimensions splashGlobalDomainSize(1, 1, 1); - - for (uint32_t d = 0; d < simDim; ++d) - { - splashDomainOffset[d] = threadParams->window.localDimensions.offset[d] + globalSlideOffset[d]; - splashGlobalDomainOffset[d] = threadParams->window.globalDimensions.offset[d] + globalSlideOffset[d]; - splashGlobalDomainSize[d] = threadParams->window.globalDimensions.size[d]; - splashDomainSize[d] = threadParams->window.localDimensions.size[d]; - } - - typedef typename GetComponentsType::type ComponentValueType; - - ComponentValueType* tmpArray = new ComponentValueType[elements]; - - for (uint32_t d = 0; d < components; d++) - { - std::stringstream datasetName; - datasetName << recordPath; - if (components > 1) - datasetName << "/" << componentNames[d]; - - ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer(); - #pragma omp parallel for - for( uint64_t i = 0; i < elements; ++i ) - { - tmpArray[i] = ((ComponentValueType*)dataPtr)[i * components + d]; - } - - // avoid deadlock between not finished pmacc tasks and mpi calls in splash/HDF5 - __getTransactionEvent().waitForFinished(); - threadParams->dataCollector->writeDomain( - threadParams->currentStep, - /* Dimensions for global collective buffer */ - Dimensions(numParticlesGlobal, 1, 1), - /* 3D-offset in the globalSize-buffer this process writes to */ - Dimensions(elementsOffset, 1, 1), - /* Type information for data */ - splashType, - /* Number of dimensions (1-3) of the buffer */ - 1u, - /* Selection: size in src buffer */ - splash::Selection( - Dimensions(elements, 1, 1) - ), - /* Name of the dataset */ - datasetName.str().c_str(), - /* Global domain information */ - splash::Domain( - splashGlobalDomainOffset, - splashGlobalDomainSize - ), - /* Domain type annotation */ - DomainCollector::PolyType, - /* Buffer with data */ - tmpArray - ); - - threadParams->dataCollector->writeAttribute( - threadParams->currentStep, - ctDouble, datasetName.str().c_str(), - "unitSI", &(unit.at(d))); - - } - __deleteArray(tmpArray); - - - threadParams->dataCollector->writeAttribute( - params->currentStep, - ctDouble, recordPath.c_str(), - "unitDimension", - 1u, Dimensions(7,0,0), - &(*unitDimension.begin())); - - threadParams->dataCollector->writeAttribute( - params->currentStep, - ctUInt32, recordPath.c_str(), - "macroWeighted", - ¯oWeighted); - - threadParams->dataCollector->writeAttribute( - params->currentStep, - ctDouble, recordPath.c_str(), - "weightingPower", - &weightingPower); - - /** \todo check if always correct at this point, depends on attribute - * and MW-solver/pusher implementation */ - const float_X timeOffset = 0.0; - threadParams->dataCollector->writeAttribute(params->currentStep, - splashFloatXType, recordPath.c_str(), - "timeOffset", &timeOffset); - - log ("HDF5: ( end ) write species attribute: %1%") % - Identifier::getName(); - } - -}; - -} //namspace hdf5 - -} //namespace picongpu - diff --git a/include/picongpu/plugins/kernel/CopySpecies.kernel b/include/picongpu/plugins/kernel/CopySpecies.kernel index a3c2db7814..0693909167 100644 --- a/include/picongpu/plugins/kernel/CopySpecies.kernel +++ b/include/picongpu/plugins/kernel/CopySpecies.kernel @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once @@ -33,14 +32,11 @@ namespace picongpu { - /** copy particle from the device to the host frame * * @tparam T_numWorkers number of workers */ - template< - uint32_t T_numWorkers - > + template struct CopySpecies { /** copy particle of a species to a host frame @@ -50,7 +46,7 @@ namespace picongpu * @tparam T_Filter type of filer with particle selection rules * @tparam T_Space type of coordinate description * @tparam T_Identifier type of identifier for the particle cellIdx - * @tparam T_Mapping type of the mapper to map cuda idx to supercells + * @tparam T_Mapping type of the mapper to map cupla idx to supercells * @tparam T_Acc alpaka accelerator type * * @param acc alpaka accelerator type @@ -64,7 +60,7 @@ namespace picongpu * @param domainCellIdxIdentifier the identifier for the particle cellIdx * that is calculated with respect to * domainOffset - * @param mapper map cuda idx to supercells + * @param mapper map cupla idx to supercells */ template< typename T_DestFrame, @@ -74,20 +70,17 @@ namespace picongpu typename T_Identifier, typename T_Mapping, typename T_Acc, - typename T_ParticleFilter - > - DINLINE void - operator()( - T_Acc const & acc, - int * counter, + typename T_ParticleFilter> + DINLINE void operator()( + T_Acc const& acc, + int* counter, T_DestFrame destFrame, T_SrcBox srcBox, T_Filter filter, T_Space const domainOffset, T_Identifier const domainCellIdxIdentifier, T_Mapping const mapper, - T_ParticleFilter parFilter - ) const + T_ParticleFilter parFilter) const { using namespace pmacc::particles::operations; using namespace mappings::threads; @@ -96,160 +89,90 @@ namespace picongpu using SrcFrameType = typename T_SrcBox::FrameType; using SrcFramePtr = typename T_SrcBox::FramePtr; - constexpr uint32_t numParticlesPerFrame = pmacc::math::CT::volume< typename SrcFrameType::SuperCellSize >::type::value; + constexpr uint32_t numParticlesPerFrame + = pmacc::math::CT::volume::type::value; constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; - PMACC_SMEM( acc, srcFramePtr, SrcFramePtr ); - PMACC_SMEM( acc, localCounter, int ); - PMACC_SMEM( acc, globalOffset, int ); + PMACC_SMEM(acc, srcFramePtr, SrcFramePtr); + PMACC_SMEM(acc, localCounter, int); + PMACC_SMEM(acc, globalOffset, int); - using ParticlesDomCfg = IdxConfig< - numParticlesPerFrame, - numWorkers - >; + using ParticlesDomCfg = IdxConfig; // loop over all particles in a frame - ForEachIdx< ParticlesDomCfg > forEachParticle( workerIdx ); + ForEachIdx forEachParticle(workerIdx); - memory::CtxArray< - int, - ParticlesDomCfg - > - storageOffsetCtx{}; + memory::CtxArray storageOffsetCtx{}; - DataSpace< simDim > const supcerCellIdx = mapper.getSuperCellIndex( DataSpace< simDim > ( blockIdx ) ); + DataSpace const supcerCellIdx = mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))); /* offset (in cells) of the supercell relative to the origin of the * local domain (without any guards) */ - DataSpace< simDim > const localSuperCellCellOffset( - ( supcerCellIdx - mapper.getGuardingSuperCells() ) * - mapper.getSuperCellSize() - ); + DataSpace const localSuperCellCellOffset( + (supcerCellIdx - mapper.getGuardingSuperCells()) * mapper.getSuperCellSize()); // each virtual worker needs only one filter - filter.setSuperCellPosition( localSuperCellCellOffset ); - auto accParFilter = parFilter( - acc, - supcerCellIdx - mapper.getGuardingSuperCells( ), - WorkerCfg< numWorkers >{ workerIdx } - ); - - ForEachIdx< - IdxConfig< - 1, - numWorkers - > - > onlyMaster{ workerIdx }; - - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - localCounter = 0; - srcFramePtr = srcBox.getFirstFrame( supcerCellIdx ); - } - ); + filter.setSuperCellPosition(localSuperCellCellOffset); + auto accParFilter + = parFilter(acc, supcerCellIdx - mapper.getGuardingSuperCells(), WorkerCfg{workerIdx}); - __syncthreads(); + ForEachIdx> onlyMaster{workerIdx}; - // move over all Frames in the supercell - while( srcFramePtr.isValid() ) - { - forEachParticle( - [&]( - uint32_t const localIdx, - uint32_t const idx - ) - { - auto parSrc = ( srcFramePtr[ localIdx ] ); - storageOffsetCtx[ idx ] = -1; - // count particle in frame - if( parSrc[ multiMask_ ] == 1 && - filter( - *srcFramePtr, - localIdx - ) - ) - if( - accParFilter( - acc, - parSrc - ) - ) - storageOffsetCtx[ idx ] = nvidia::atomicAllInc( - acc, - &localCounter, - ::alpaka::hierarchy::Threads{} - ); - } - ); - __syncthreads(); - - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - // reserve host memory for particle - globalOffset = atomicAdd( - counter, - localCounter, - ::alpaka::hierarchy::Blocks{} - ); - } - ); + onlyMaster([&](uint32_t const, uint32_t const) { + localCounter = 0; + srcFramePtr = srcBox.getFirstFrame(supcerCellIdx); + }); - __syncthreads(); + cupla::__syncthreads(acc); - forEachParticle( - [&]( - uint32_t const localIdx, - uint32_t const idx - ) + // move over all Frames in the supercell + while(srcFramePtr.isValid()) + { + forEachParticle([&](uint32_t const localIdx, uint32_t const idx) { + auto parSrc = (srcFramePtr[localIdx]); + storageOffsetCtx[idx] = -1; + // count particle in frame + if(parSrc[multiMask_] == 1 && filter(*srcFramePtr, localIdx)) + if(accParFilter(acc, parSrc)) + storageOffsetCtx[idx] + = nvidia::atomicAllInc(acc, &localCounter, ::alpaka::hierarchy::Threads{}); + }); + cupla::__syncthreads(acc); + + onlyMaster([&](uint32_t const, uint32_t const) { + // reserve host memory for particle + globalOffset = cupla::atomicAdd(acc, counter, localCounter, ::alpaka::hierarchy::Blocks{}); + }); + + cupla::__syncthreads(acc); + + forEachParticle([&](uint32_t const localIdx, uint32_t const idx) { + if(storageOffsetCtx[idx] != -1) { - if( storageOffsetCtx[ idx ] != -1 ) - { - auto parDest = destFrame[ globalOffset + storageOffsetCtx[ idx ] ]; - auto parDestNoDomainIdx = deselect< T_Identifier >( parDest ); - auto parSrc = ( srcFramePtr[ localIdx ] ); - assign( - parDestNoDomainIdx, - parSrc - ); - // calculate cell index for user-defined domain - DataSpace< simDim > const localCell( - DataSpaceOperations< simDim >::template map< - SuperCellSize - >( parSrc[ localCellIdx_ ] ) - ); - parDest[domainCellIdxIdentifier] = - domainOffset + localSuperCellCellOffset + localCell; - } + auto parDest = destFrame[globalOffset + storageOffsetCtx[idx]]; + auto parDestNoDomainIdx = deselect(parDest); + auto parSrc = (srcFramePtr[localIdx]); + assign(parDestNoDomainIdx, parSrc); + // calculate cell index for user-defined domain + DataSpace const localCell( + DataSpaceOperations::template map(parSrc[localCellIdx_])); + parDest[domainCellIdxIdentifier] = domainOffset + localSuperCellCellOffset + localCell; } - ); + }); - __syncthreads(); + cupla::__syncthreads(acc); - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - // get next frame in supercell - srcFramePtr = srcBox.getNextFrame( srcFramePtr ); - localCounter = 0; - } - ); - __syncthreads(); + onlyMaster([&](uint32_t const, uint32_t const) { + // get next frame in supercell + srcFramePtr = srcBox.getNextFrame(srcFramePtr); + localCounter = 0; + }); + cupla::__syncthreads(acc); } } }; -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/plugins/makroParticleCounter/PerSuperCell.hpp b/include/picongpu/plugins/makroParticleCounter/PerSuperCell.hpp index cd55016db1..1945d6e2c8 100644 --- a/include/picongpu/plugins/makroParticleCounter/PerSuperCell.hpp +++ b/include/picongpu/plugins/makroParticleCounter/PerSuperCell.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Richard Pausch +/* Copyright 2014-2021 Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -39,308 +39,290 @@ namespace picongpu { -using namespace pmacc; -using namespace splash; + using namespace pmacc; + using namespace splash; -struct CountMakroParticle -{ - template< - typename ParBox, - typename CounterBox, - typename Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - ParBox parBox, - CounterBox counterBox, - Mapping mapper - ) const + struct CountMakroParticle { + template + DINLINE void operator()(T_Acc const& acc, ParBox parBox, CounterBox counterBox, Mapping mapper) const + { + typedef MappingDesc::SuperCellSize SuperCellSize; + typedef typename ParBox::FrameType FrameType; + typedef typename ParBox::FramePtr FramePtr; - typedef MappingDesc::SuperCellSize SuperCellSize; - typedef typename ParBox::FrameType FrameType; - typedef typename ParBox::FramePtr FramePtr; - - const DataSpace block(mapper.getSuperCellIndex(DataSpace (blockIdx))); - /* counterBox has no guarding supercells*/ - const DataSpace counterCell = block - mapper.getGuardingSuperCells(); + const DataSpace block(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + /* counterBox has no guarding supercells*/ + const DataSpace counterCell = block - mapper.getGuardingSuperCells(); - const DataSpace threadIndex(threadIdx); - const int linearThreadIdx = DataSpaceOperations::template map (threadIndex); + const DataSpace threadIndex(cupla::threadIdx(acc)); + const int linearThreadIdx = DataSpaceOperations::template map(threadIndex); - PMACC_SMEM( acc, counterValue, uint64_cu ); - PMACC_SMEM( acc, frame, FramePtr ); + PMACC_SMEM(acc, counterValue, uint64_cu); + PMACC_SMEM(acc, frame, FramePtr); - if (linearThreadIdx == 0) - { - counterValue = 0; - frame = parBox.getLastFrame(block); - if (!frame.isValid()) + if(linearThreadIdx == 0) { - counterBox(counterCell) = counterValue; + counterValue = 0; + frame = parBox.getLastFrame(block); + if(!frame.isValid()) + { + counterBox(counterCell) = counterValue; + } } - } - __syncthreads(); - if (!frame.isValid()) - return; //end kernel if we have no frames + cupla::__syncthreads(acc); + if(!frame.isValid()) + return; // end kernel if we have no frames - bool isParticle = frame[linearThreadIdx][multiMask_]; + bool isParticle = frame[linearThreadIdx][multiMask_]; - while (frame.isValid()) - { - if (isParticle) + while(frame.isValid()) { - atomicAdd(&counterValue, static_cast (1LU), ::alpaka::hierarchy::Blocks{}); + if(isParticle) + { + cupla::atomicAdd(acc, &counterValue, static_cast(1LU), ::alpaka::hierarchy::Blocks{}); + } + cupla::__syncthreads(acc); + if(linearThreadIdx == 0) + { + frame = parBox.getPreviousFrame(frame); + } + isParticle = true; + cupla::__syncthreads(acc); } - __syncthreads(); - if (linearThreadIdx == 0) - { - frame = parBox.getPreviousFrame(frame); - } - isParticle = true; - __syncthreads(); - } - - if (linearThreadIdx == 0) - counterBox(counterCell) = counterValue; - } -}; -/** Count makro particle of a species and write down the result to a global HDF5 file. - * - * - count the total number of makro particle per supercell - * - store one number (size_t) per supercell in a mesh - * - Output: - create a folder with the name of the plugin - * - per time step one file with the name "result_[currentStep].h5" is created - * - HDF5 Format: - default lib splash output for meshes - * - the attribute name in the HDF5 file is "makroParticleCount" - * - */ -template -class PerSuperCell : public ILightweightPlugin -{ -private: - - - typedef MappingDesc::SuperCellSize SuperCellSize; - typedef GridBuffer GridBufferType; - - MappingDesc *cellDescription; - std::string notifyPeriod; - - std::string pluginName; - std::string pluginPrefix; - std::string foldername; - mpi::MPIReduce reduce; - - GridBufferType* localResult; - ParallelDomainCollector *dataCollector; - // set attributes for datacollector files - DataCollector::FileCreationAttr h5_attr; - -public: - - PerSuperCell() : - pluginName("PerSuperCell: create hdf5 with macro particle count per superCell"), - pluginPrefix(ParticlesType::FrameType::getName() + std::string("_macroParticlesPerSuperCell")), - foldername(pluginPrefix), - cellDescription(nullptr), - localResult(nullptr), - dataCollector(nullptr) - { - Environment<>::get().PluginConnector().registerPlugin(this); - } - - virtual ~PerSuperCell() - { - - } - - void notify(uint32_t currentStep) - { - countMakroParticles < CORE + BORDER > (currentStep); - } - - void pluginRegisterHelp(po::options_description& desc) + if(linearThreadIdx == 0) + counterBox(counterCell) = counterValue; + } + }; + /** Count makro particle of a species and write down the result to a global HDF5 file. + * + * - count the total number of makro particle per supercell + * - store one number (size_t) per supercell in a mesh + * - Output: - create a folder with the name of the plugin + * - per time step one file with the name "result_[currentStep].h5" is created + * - HDF5 Format: - default lib splash output for meshes + * - the attribute name in the HDF5 file is "makroParticleCount" + * + */ + template + class PerSuperCell : public ILightweightPlugin { - desc.add_options() - ((pluginPrefix + ".period").c_str(), - po::value (¬ifyPeriod), "enable plugin [for each n-th step]"); - } + private: + typedef MappingDesc::SuperCellSize SuperCellSize; + typedef GridBuffer GridBufferType; + + MappingDesc* cellDescription; + std::string notifyPeriod; + + std::string pluginName; + std::string pluginPrefix; + std::string foldername; + mpi::MPIReduce reduce; + + GridBufferType* localResult; + + ParallelDomainCollector* dataCollector; + // set attributes for datacollector files + DataCollector::FileCreationAttr h5_attr; + + public: + PerSuperCell() + : pluginName("PerSuperCell: create hdf5 with macro particle count per superCell") + , pluginPrefix(ParticlesType::FrameType::getName() + std::string("_macroParticlesPerSuperCell")) + , foldername(pluginPrefix) + , cellDescription(nullptr) + , localResult(nullptr) + , dataCollector(nullptr) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } - std::string pluginGetName() const - { - return pluginName; - } + virtual ~PerSuperCell() + { + } - void setMappingDescription(MappingDesc *cellDescription) - { - this->cellDescription = cellDescription; - } + void notify(uint32_t currentStep) + { + countMakroParticles(currentStep); + } -private: + void pluginRegisterHelp(po::options_description& desc) + { + desc.add_options()( + (pluginPrefix + ".period").c_str(), + po::value(¬ifyPeriod), + "enable plugin [for each n-th step]"); + } - void pluginLoad() - { - if(!notifyPeriod.empty()) + std::string pluginGetName() const { - Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); - const SubGrid& subGrid = Environment::get().SubGrid(); - /* local count of supercells without any guards*/ - DataSpace localSuperCells(subGrid.getLocalDomain().size / SuperCellSize::toRT()); - localResult = new GridBufferType(localSuperCells); + return pluginName; + } - /* create folder for hdf5 files*/ - Environment::get().Filesystem().createDirectoryWithPermissions(foldername); + void setMappingDescription(MappingDesc* cellDescription) + { + this->cellDescription = cellDescription; } - } - void pluginUnload() - { - __delete(localResult); + private: + void pluginLoad() + { + if(!notifyPeriod.empty()) + { + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + const SubGrid& subGrid = Environment::get().SubGrid(); + /* local count of supercells without any guards*/ + DataSpace localSuperCells(subGrid.getLocalDomain().size / SuperCellSize::toRT()); + localResult = new GridBufferType(localSuperCells); + + /* create folder for hdf5 files*/ + Environment::get().Filesystem().createDirectoryWithPermissions(foldername); + } + } - if (dataCollector) - dataCollector->finalize(); + void pluginUnload() + { + __delete(localResult); - __delete(dataCollector); - } + if(dataCollector) + dataCollector->finalize(); - template< uint32_t AREA> - void countMakroParticles(uint32_t currentStep) - { - openH5File(); + __delete(dataCollector); + } - DataConnector &dc = Environment<>::get().DataConnector(); + template + void countMakroParticles(uint32_t currentStep) + { + openH5File(); - auto particles = dc.get< ParticlesType >( ParticlesType::FrameType::getName(), true ); + DataConnector& dc = Environment<>::get().DataConnector(); - /*############ count particles #######################################*/ - typedef MappingDesc::SuperCellSize SuperCellSize; - AreaMapping mapper(*cellDescription); + auto particles = dc.get(ParticlesType::FrameType::getName(), true); - PMACC_KERNEL(CountMakroParticle{}) - (mapper.getGridDim(), SuperCellSize::toRT()) - (particles->getDeviceParticlesBox(), - localResult->getDeviceBuffer().getDataBox(), mapper); + /*############ count particles #######################################*/ + typedef MappingDesc::SuperCellSize SuperCellSize; + AreaMapping mapper(*cellDescription); - dc.releaseData( ParticlesType::FrameType::getName() ); + PMACC_KERNEL(CountMakroParticle{}) + (mapper.getGridDim(), SuperCellSize::toRT())( + particles->getDeviceParticlesBox(), + localResult->getDeviceBuffer().getDataBox(), + mapper); - localResult->deviceToHost(); + dc.releaseData(ParticlesType::FrameType::getName()); + localResult->deviceToHost(); - /*############ dump data #############################################*/ - const SubGrid& subGrid = Environment::get().SubGrid(); + /*############ dump data #############################################*/ + const SubGrid& subGrid = Environment::get().SubGrid(); - DataSpace localSize(subGrid.getLocalDomain().size / SuperCellSize::toRT()); - DataSpace globalOffset(subGrid.getLocalDomain().offset / SuperCellSize::toRT()); - DataSpace globalSize(subGrid.getGlobalDomain().size / SuperCellSize::toRT()); + DataSpace localSize(subGrid.getLocalDomain().size / SuperCellSize::toRT()); + DataSpace globalOffset(subGrid.getLocalDomain().offset / SuperCellSize::toRT()); + DataSpace globalSize(subGrid.getGlobalDomain().size / SuperCellSize::toRT()); + Dimensions splashGlobalDomainOffset(0, 0, 0); + Dimensions splashGlobalOffset(0, 0, 0); + Dimensions splashGlobalDomainSize(1, 1, 1); + Dimensions splashGlobalSize(1, 1, 1); + Dimensions localBufferSize(1, 1, 1); - Dimensions splashGlobalDomainOffset(0, 0, 0); - Dimensions splashGlobalOffset(0, 0, 0); - Dimensions splashGlobalDomainSize(1, 1, 1); - Dimensions splashGlobalSize(1, 1, 1); - Dimensions localBufferSize(1, 1, 1); + for(uint32_t d = 0; d < simDim; ++d) + { + splashGlobalOffset[d] = globalOffset[d]; + splashGlobalSize[d] = globalSize[d]; + splashGlobalDomainSize[d] = globalSize[d]; + localBufferSize[d] = localSize[d]; + } - for (uint32_t d = 0; d < simDim; ++d) - { - splashGlobalOffset[d] = globalOffset[d]; - splashGlobalSize[d] = globalSize[d]; - splashGlobalDomainSize[d] = globalSize[d]; - localBufferSize[d] = localSize[d]; + size_t* ptr = localResult->getHostBuffer().getPointer(); + + // avoid deadlock between not finished pmacc tasks and mpi calls in adios + __getTransactionEvent().waitForFinished(); + + dataCollector->writeDomain( + currentStep, /* id == time step */ + splashGlobalSize, /* total size of dataset over all processes */ + splashGlobalOffset, /* write offset for this process */ + ColTypeUInt64(), /* data type */ + simDim, /* NDims of the field data (scalar, vector, ...) */ + splash::Selection(localBufferSize), + "makroParticlePerSupercell", /* data set name */ + splash::Domain( + splashGlobalDomainOffset, /* offset of the global domain */ + splashGlobalDomainSize /* size of the global domain */ + ), + DomainCollector::GridType, + ptr); + + closeH5File(); } - size_t* ptr = localResult->getHostBuffer().getPointer(); - - // avoid deadlock between not finished pmacc tasks and mpi calls in adios - __getTransactionEvent().waitForFinished(); - - dataCollector->writeDomain(currentStep, /* id == time step */ - splashGlobalSize, /* total size of dataset over all processes */ - splashGlobalOffset, /* write offset for this process */ - ColTypeUInt64(), /* data type */ - simDim, /* NDims of the field data (scalar, vector, ...) */ - splash::Selection(localBufferSize), - "makroParticlePerSupercell", /* data set name */ - splash::Domain( - splashGlobalDomainOffset, /* offset of the global domain */ - splashGlobalDomainSize /* size of the global domain */ - ), - DomainCollector::GridType, - ptr); - - closeH5File(); - } - - void closeH5File() - { - if (dataCollector != nullptr) + void closeH5File() { - std::string filename = (foldername + std::string("/makroParticlePerSupercell")); - log ("HDF5 close DataCollector with file: %1%") % filename; - dataCollector->close(); + if(dataCollector != nullptr) + { + std::string filename = (foldername + std::string("/makroParticlePerSupercell")); + log("HDF5 close DataCollector with file: %1%") % filename; + dataCollector->close(); + } } - } - void openH5File() - { - - if (dataCollector == nullptr) + void openH5File() { - DataSpace mpi_pos; - DataSpace mpi_size; - - Dimensions splashMpiPos; - Dimensions splashMpiSize; - - GridController &gc = Environment::get().GridController(); - - mpi_pos = gc.getPosition(); - mpi_size = gc.getGpuNodes(); - - splashMpiPos.set(0, 0, 0); - splashMpiSize.set(1, 1, 1); - - for (uint32_t i = 0; i < simDim; ++i) + if(dataCollector == nullptr) { - splashMpiPos[i] = mpi_pos[i]; - splashMpiSize[i] = mpi_size[i]; + DataSpace mpi_pos; + DataSpace mpi_size; + + Dimensions splashMpiPos; + Dimensions splashMpiSize; + + GridController& gc = Environment::get().GridController(); + + mpi_pos = gc.getPosition(); + mpi_size = gc.getGpuNodes(); + + splashMpiPos.set(0, 0, 0); + splashMpiSize.set(1, 1, 1); + + for(uint32_t i = 0; i < simDim; ++i) + { + splashMpiPos[i] = mpi_pos[i]; + splashMpiSize[i] = mpi_size[i]; + } + + + const uint32_t maxOpenFilesPerNode = 1; + dataCollector = new ParallelDomainCollector( + gc.getCommunicator().getMPIComm(), + gc.getCommunicator().getMPIInfo(), + splashMpiSize, + maxOpenFilesPerNode); + // set attributes for datacollector files + DataCollector::FileCreationAttr h5_attr; + h5_attr.enableCompression = false; + h5_attr.fileAccType = DataCollector::FAT_CREATE; + h5_attr.mpiPosition.set(splashMpiPos); + h5_attr.mpiSize.set(splashMpiSize); } - const uint32_t maxOpenFilesPerNode = 1; - dataCollector = new ParallelDomainCollector( - gc.getCommunicator().getMPIComm(), - gc.getCommunicator().getMPIInfo(), - splashMpiSize, - maxOpenFilesPerNode); - // set attributes for datacollector files - DataCollector::FileCreationAttr h5_attr; - h5_attr.enableCompression = false; - h5_attr.fileAccType = DataCollector::FAT_CREATE; - h5_attr.mpiPosition.set(splashMpiPos); - h5_attr.mpiSize.set(splashMpiSize); - } - - - // open datacollector - try - { - std::string filename = (foldername + std::string("/makroParticlePerSupercell")); - log ("HDF5 open DataCollector with file: %1%") % - filename; - dataCollector->open(filename.c_str(), h5_attr); - } - catch (const DCException& e) - { - std::cerr << e.what() << std::endl; - throw std::runtime_error("Failed to open datacollector"); + // open datacollector + try + { + std::string filename = (foldername + std::string("/makroParticlePerSupercell")); + log("HDF5 open DataCollector with file: %1%") % filename; + dataCollector->open(filename.c_str(), h5_attr); + } + catch(const DCException& e) + { + std::cerr << e.what() << std::endl; + throw std::runtime_error("Failed to open datacollector"); + } } - } - -}; + }; -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/plugins/misc/AppendName.hpp b/include/picongpu/plugins/misc/AppendName.hpp index 15d59c55ca..bda45a6855 100644 --- a/include/picongpu/plugins/misc/AppendName.hpp +++ b/include/picongpu/plugins/misc/AppendName.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -25,22 +25,22 @@ namespace picongpu { -namespace plugins -{ -namespace misc -{ - /** append the name of an filter to a vector - * - * @tparam T_Filter filter class (required interface: `getName( )`) - */ - template< typename T_Filter > - struct AppendName + namespace plugins { - void operator( )( std::vector< std::string > & vector ) const + namespace misc { - vector.emplace_back( T_Filter::getName() ); - } - }; -} // namespace misc -} // namespace plugins + /** append the name of an filter to a vector + * + * @tparam T_Filter filter class (required interface: `getName( )`) + */ + template + struct AppendName + { + void operator()(std::vector& vector) const + { + vector.emplace_back(T_Filter::getName()); + } + }; + } // namespace misc + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/misc/ComponentNames.cpp b/include/picongpu/plugins/misc/ComponentNames.cpp index fd5f44eac9..ba33ff41da 100644 --- a/include/picongpu/plugins/misc/ComponentNames.cpp +++ b/include/picongpu/plugins/misc/ComponentNames.cpp @@ -1,4 +1,4 @@ -/* Copyright 2020 Sergei Bastrakov +/* Copyright 2020-2021 Sergei Bastrakov * * This file is part of PIConGPU. * @@ -26,39 +26,33 @@ namespace picongpu { -namespace plugins -{ -namespace misc -{ - - std::vector< std::string > getComponentNames( - uint32_t const numComponents - ) + namespace plugins { - /* For low number of components, fall back to the previously used - * "xyzw" naming scheme for backward compatibility - */ - if( numComponents <= 4 ) - { - std::array< std::string, 4 > names = { "x" , "y", "z", "w" }; - return std::vector< std::string >{ - names.begin(), - names.begin() + numComponents - }; - } - // Special case for 6 PML components - else if( numComponents == 6 ) - return { "xy" , "xz", "yx", "yz", "zx", "zy" }; - else + namespace misc { - // Otherwise use different generic names - auto result = std::vector< std::string >( numComponents ); - for( auto i = 0u; i < result.size(); i++ ) - result[ i ] = "component" + std::to_string( i ); - return result; - } - } + std::vector getComponentNames(uint32_t const numComponents) + { + /* For low number of components, fall back to the previously used + * "xyzw" naming scheme for backward compatibility + */ + if(numComponents <= 4) + { + std::array names = {"x", "y", "z", "w"}; + return std::vector{names.begin(), names.begin() + numComponents}; + } + // Special case for 6 PML components + else if(numComponents == 6) + return {"xy", "xz", "yx", "yz", "zx", "zy"}; + else + { + // Otherwise use different generic names + auto result = std::vector(numComponents); + for(auto i = 0u; i < result.size(); i++) + result[i] = "component" + std::to_string(i); + return result; + } + } -} // namespace misc -} // namespace plugins + } // namespace misc + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/misc/ComponentNames.hpp b/include/picongpu/plugins/misc/ComponentNames.hpp index 1a439bf70c..ca33f7a8e8 100644 --- a/include/picongpu/plugins/misc/ComponentNames.hpp +++ b/include/picongpu/plugins/misc/ComponentNames.hpp @@ -1,4 +1,4 @@ -/* Copyright 2020 Sergei Bastrakov +/* Copyright 2020-2021 Sergei Bastrakov * * This file is part of PIConGPU. * @@ -25,22 +25,19 @@ namespace picongpu { -namespace plugins -{ -namespace misc -{ - - /** Get text names of vector components - * - * For 1-4 and 6 components use predefined names, - * for other amounts use generic different names - * - * @param numComponents number of components - */ - std::vector< std::string > getComponentNames( - uint32_t numComponents - ); + namespace plugins + { + namespace misc + { + /** Get text names of vector components + * + * For 1-4 and 6 components use predefined names, + * for other amounts use generic different names + * + * @param numComponents number of components + */ + std::vector getComponentNames(uint32_t numComponents); -} // namespace misc -} // namespace plugins + } // namespace misc + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/misc/ExecuteIfNameIsEqual.hpp b/include/picongpu/plugins/misc/ExecuteIfNameIsEqual.hpp index 4f981da0f9..018aa5e670 100644 --- a/include/picongpu/plugins/misc/ExecuteIfNameIsEqual.hpp +++ b/include/picongpu/plugins/misc/ExecuteIfNameIsEqual.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -24,36 +24,29 @@ namespace picongpu { -namespace plugins -{ -namespace misc -{ - /** execute an unary functor if the name is equal - * - * @tparam T_Filter filter class (required interface: `getName( )` and default constructor) - */ - template< typename T_Filter > - struct ExecuteIfNameIsEqual + namespace plugins { - /** evaluate if functor must executed - * - * @param filterName name of the filter which should started - * @param unaryFunctor any unary functor - */ - template< - typename T_Kernel, - typename ... T_Args - > - void operator( )( - std::string filterName, - uint32_t const currentStep, - T_Kernel const unaryFunctor - ) const + namespace misc { - if( filterName == T_Filter::getName( ) ) - unaryFunctor( particles::filter::IUnary< T_Filter >{ currentStep } ); - } - }; -} // namespace misc -} // namespace plugins + /** execute an unary functor if the name is equal + * + * @tparam T_Filter filter class (required interface: `getName( )` and default constructor) + */ + template + struct ExecuteIfNameIsEqual + { + /** evaluate if functor must executed + * + * @param filterName name of the filter which should started + * @param unaryFunctor any unary functor + */ + template + void operator()(std::string filterName, uint32_t const currentStep, T_Kernel const unaryFunctor) const + { + if(filterName == T_Filter::getName()) + unaryFunctor(particles::filter::IUnary{currentStep}); + } + }; + } // namespace misc + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/misc/SpeciesFilter.hpp b/include/picongpu/plugins/misc/SpeciesFilter.hpp index 0fae441ef0..1107c7d10b 100644 --- a/include/picongpu/plugins/misc/SpeciesFilter.hpp +++ b/include/picongpu/plugins/misc/SpeciesFilter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -26,73 +26,68 @@ namespace picongpu { -namespace plugins -{ -namespace misc -{ - - /** combines a particle species with a filter - * - * @tparam T_Species picongpu::Particle, type of the species - * @tparam T_Filter pmacc::filter::Interface, type of the filter - */ - template< - typename T_Species, - typename T_Filter = particles::filter::All - > - struct SpeciesFilter + namespace plugins { - using Filter = T_Filter; - using Species = T_Species; - - /** name of the filtered species - * - * @return _` - */ - static std::string getName() + namespace misc { - return Species::FrameType::getName() + "_" + Filter::getName(); - } - }; + /** combines a particle species with a filter + * + * @tparam T_Species picongpu::Particle, type of the species + * @tparam T_Filter pmacc::filter::Interface, type of the filter + */ + template + struct SpeciesFilter + { + using Filter = T_Filter; + using Species = T_Species; - /** species without a filter - * - * This class fulfills the interface of SpeciesFilter for a species - * but keeps the species name without adding the filter suffix. - */ - template< typename T_Species > - struct UnfilteredSpecies - { - using Filter = particles::filter::All; - using Species = T_Species; + /** name of the filtered species + * + * @return _` + */ + static std::string getName() + { + return Species::FrameType::getName() + "_" + Filter::getName(); + } + }; - /** get name of the filtered species - * - * @return - */ - static std::string getName() - { - return Species::FrameType::getName(); - } - }; + /** species without a filter + * + * This class fulfills the interface of SpeciesFilter for a species + * but keeps the species name without adding the filter suffix. + */ + template + struct UnfilteredSpecies + { + using Filter = particles::filter::All; + using Species = T_Species; -namespace speciesFilter -{ - /** evaluate if the filter and species combination is valid - * - * @tparam T_SpeciesFilter SpeciesFilter, type of the filter and species - * @return ::type boost::mpl::bool_<>, if the species is eligible for the filter - */ - template< typename T_SpeciesFilter > - struct IsEligible - { - using type = typename particles::traits::SpeciesEligibleForSolver< - typename T_SpeciesFilter::Species, - typename T_SpeciesFilter::Filter - >::type; - }; -} // namespace speciesFilter + /** get name of the filtered species + * + * @return + */ + static std::string getName() + { + return Species::FrameType::getName(); + } + }; + + namespace speciesFilter + { + /** evaluate if the filter and species combination is valid + * + * @tparam T_SpeciesFilter SpeciesFilter, type of the filter and species + * @return ::type boost::mpl::bool_<>, if the species is eligible for the filter + */ + template + struct IsEligible + { + using type = typename particles::traits::SpeciesEligibleForSolver< + typename T_SpeciesFilter::Species, + typename T_SpeciesFilter::Filter>::type; + }; + } // namespace speciesFilter -} //namespace misc -} //namespace plugins -} //namespace picongpu + } // namespace misc + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/misc/concatenateToString.hpp b/include/picongpu/plugins/misc/concatenateToString.hpp index 162d53f1e9..4f18adc911 100644 --- a/include/picongpu/plugins/misc/concatenateToString.hpp +++ b/include/picongpu/plugins/misc/concatenateToString.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -25,36 +25,28 @@ namespace picongpu { -namespace plugins -{ -namespace misc -{ - /** concatenate all values of an string container - * - * @tparam T_Container type of the container - * - * @param vector source container (required interface: `begin(), end()`) - * @param separator separator between two elements - */ - template< typename T_Container > - std::string concatenateToString( - T_Container & container, - std::string const & separator = "," - ) + namespace plugins { - return std::accumulate( - container.begin(), - container.end(), - std::string(), - [ & ]( - std::string & result, - std::string & inString - ) + namespace misc + { + /** concatenate all values of an string container + * + * @tparam T_Container type of the container + * + * @param vector source container (required interface: `begin(), end()`) + * @param separator separator between two elements + */ + template + std::string concatenateToString(T_Container& container, std::string const& separator = ",") { - return result.empty() ? inString : result + separator + inString; + return std::accumulate( + container.begin(), + container.end(), + std::string(), + [&](std::string& result, std::string& inString) { + return result.empty() ? inString : result + separator + inString; + }); } - ); - } -} // namespace misc -} // namespace plugins + } // namespace misc + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/misc/containsObject.hpp b/include/picongpu/plugins/misc/containsObject.hpp index aa109be2cf..96c51632a3 100644 --- a/include/picongpu/plugins/misc/containsObject.hpp +++ b/include/picongpu/plugins/misc/containsObject.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -24,32 +24,25 @@ namespace picongpu { -namespace plugins -{ -namespace misc -{ - /** search for an element within a STL container - * - * @tparam T_Container standard container, type of the container - * - * @param container object to query - * @param value object to search - * @return true if container contains the element, else false - */ - template< typename T_Container > - bool containsObject( - T_Container const & container, - typename T_Container::value_type const & value - ) + namespace plugins { - auto it = std::find( - container.begin(), - container.end(), - value - ); + namespace misc + { + /** search for an element within a STL container + * + * @tparam T_Container standard container, type of the container + * + * @param container object to query + * @param value object to search + * @return true if container contains the element, else false + */ + template + bool containsObject(T_Container const& container, typename T_Container::value_type const& value) + { + auto it = std::find(container.begin(), container.end(), value); - return it != container.end(); - } -} // namespace misc -} // namespace plugins + return it != container.end(); + } + } // namespace misc + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/misc/misc.hpp b/include/picongpu/plugins/misc/misc.hpp index 063abadf00..aa74c9e77e 100644 --- a/include/picongpu/plugins/misc/misc.hpp +++ b/include/picongpu/plugins/misc/misc.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * diff --git a/include/picongpu/plugins/misc/removeSpaces.cpp b/include/picongpu/plugins/misc/removeSpaces.cpp index 46765b744e..f7342548e8 100644 --- a/include/picongpu/plugins/misc/removeSpaces.cpp +++ b/include/picongpu/plugins/misc/removeSpaces.cpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -25,23 +25,16 @@ namespace picongpu { -namespace plugins -{ -namespace misc -{ - std::string removeSpaces( std::string value ) + namespace plugins { - value.erase( - std::remove( - value.begin(), - value.end(), - ' ' - ), - value.end() - ); + namespace misc + { + std::string removeSpaces(std::string value) + { + value.erase(std::remove(value.begin(), value.end(), ' '), value.end()); - return value; - } -} // namespace misc -} // namespace plugins + return value; + } + } // namespace misc + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/misc/removeSpaces.hpp b/include/picongpu/plugins/misc/removeSpaces.hpp index c897c886eb..989e24e686 100644 --- a/include/picongpu/plugins/misc/removeSpaces.hpp +++ b/include/picongpu/plugins/misc/removeSpaces.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -24,16 +24,16 @@ namespace picongpu { -namespace plugins -{ -namespace misc -{ - /** removes all spaces within a string - * - * @param value input string - * @return string without any spaces - */ - std::string removeSpaces( std::string value ); -} // namespace misc -} // namespace plugins + namespace plugins + { + namespace misc + { + /** removes all spaces within a string + * + * @param value input string + * @return string without any spaces + */ + std::string removeSpaces(std::string value); + } // namespace misc + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/misc/splitString.cpp b/include/picongpu/plugins/misc/splitString.cpp index dd827f3f77..d6aaa459b5 100644 --- a/include/picongpu/plugins/misc/splitString.cpp +++ b/include/picongpu/plugins/misc/splitString.cpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -26,30 +26,19 @@ namespace picongpu { -namespace plugins -{ -namespace misc -{ - std::vector< std::string > splitString( - std::string const & input, - std::string const & regex - ) + namespace plugins { - std::regex re( regex ); - // passing -1 as the submatch index parameter performs splitting - std::sregex_token_iterator first{ - input.begin(), - input.end(), - re, - -1 - }; - std::sregex_token_iterator last; + namespace misc + { + std::vector splitString(std::string const& input, std::string const& regex) + { + std::regex re(regex); + // passing -1 as the submatch index parameter performs splitting + std::sregex_token_iterator first{input.begin(), input.end(), re, -1}; + std::sregex_token_iterator last; - return { - first, - last - }; - } -} // namespace misc -} // namespace plugins + return {first, last}; + } + } // namespace misc + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/misc/splitString.hpp b/include/picongpu/plugins/misc/splitString.hpp index 09fd8d268c..d5590a5f33 100644 --- a/include/picongpu/plugins/misc/splitString.hpp +++ b/include/picongpu/plugins/misc/splitString.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -25,24 +25,21 @@ namespace picongpu { -namespace plugins -{ -namespace misc -{ - /** split a string in a vector of strings - * - * Based on Stack Overflow post: - * source: https://stackoverflow.com/a/28142357 - * author: Marcin - * date: Jan 25 '15 - * - * @param input string to split - * @param regex separator between two elements - */ - std::vector< std::string > splitString( - std::string const & input, - std::string const & regex = "," - ); -} // namespace misc -} // namespace plugins + namespace plugins + { + namespace misc + { + /** split a string in a vector of strings + * + * Based on Stack Overflow post: + * source: https://stackoverflow.com/a/28142357 + * author: Marcin + * date: Jan 25 '15 + * + * @param input string to split + * @param regex separator between two elements + */ + std::vector splitString(std::string const& input, std::string const& regex = ","); + } // namespace misc + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/multi/IHelp.hpp b/include/picongpu/plugins/multi/IHelp.hpp index 67c66779f1..bf5c2775ae 100644 --- a/include/picongpu/plugins/multi/IHelp.hpp +++ b/include/picongpu/plugins/multi/IHelp.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -25,54 +25,53 @@ namespace picongpu { -namespace plugins -{ -namespace multi -{ - - //! Interface to expose a help of a plugin - struct IHelp + namespace plugins { - //! creates a ISlave instance - virtual std::shared_ptr< ISlave > create( - std::shared_ptr< IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) = 0; + namespace multi + { + //! Interface to expose a help of a plugin + struct IHelp + { + //! creates a ISlave instance + virtual std::shared_ptr create( + std::shared_ptr& help, + size_t const id, + MappingDesc* cellDescription) + = 0; - /** register help options - * - * The options are used if the plugin is a ISlave and is handling - * there own notification period. - */ - virtual void registerHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) = 0; + /** register help options + * + * The options are used if the plugin is a ISlave and is handling + * there own notification period. + */ + virtual void registerHelp( + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) + = 0; - /** register independent help options - * - * This options can be used even if the plugin is not handling there - * own notification period. - */ - virtual void expandHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) = 0; + /** register independent help options + * + * This options can be used even if the plugin is not handling there + * own notification period. + */ + virtual void expandHelp( + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) + = 0; - //! validate if the command line interface options are well formated - virtual void validateOptions() = 0; + //! validate if the command line interface options are well formated + virtual void validateOptions() = 0; - //! number of plugin which must be created - virtual size_t getNumPlugins() const = 0; + //! number of plugin which must be created + virtual size_t getNumPlugins() const = 0; - //! short description of the plugin functionality - virtual std::string getDescription() const = 0; + //! short description of the plugin functionality + virtual std::string getDescription() const = 0; - //! name of the plugin - virtual std::string getName() const = 0; - }; + //! name of the plugin + virtual std::string getName() const = 0; + }; -} // namespace multi -} // namespace plugins + } // namespace multi + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/multi/ISlave.hpp b/include/picongpu/plugins/multi/ISlave.hpp index 5eedeba45a..2c040a6de8 100644 --- a/include/picongpu/plugins/multi/ISlave.hpp +++ b/include/picongpu/plugins/multi/ISlave.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,37 +27,31 @@ namespace picongpu { -namespace plugins -{ -namespace multi -{ - struct IHelp; - - /** Interface for a slave plugin - * - * A plugin which fulfil l this interface can be used as slave plugin for - * multi::Master. - * - * A slave must register itself to the PluginConnector to receive the notify calls. - */ - struct ISlave : public pmacc::INotify + namespace plugins { - //! must be implemented by the user - static std::shared_ptr< IHelp > getHelp(); - - //! restart the plugin from a checkpoint - virtual void restart( - uint32_t restartStep, - std::string const & restartDirectory - ) = 0; - - //! create a check point forthe plugin - virtual void checkpoint( - uint32_t currentStep, - std::string const & checkpointDirectory - ) = 0; - }; - -} // namespace multi -} // namespace plugins + namespace multi + { + struct IHelp; + + /** Interface for a slave plugin + * + * A plugin which fulfil l this interface can be used as slave plugin for + * multi::Master. + * + * A slave must register itself to the PluginConnector to receive the notify calls. + */ + struct ISlave : public pmacc::INotify + { + //! must be implemented by the user + static std::shared_ptr getHelp(); + + //! restart the plugin from a checkpoint + virtual void restart(uint32_t restartStep, std::string const& restartDirectory) = 0; + + //! create a check point forthe plugin + virtual void checkpoint(uint32_t currentStep, std::string const& checkpointDirectory) = 0; + }; + + } // namespace multi + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/multi/Master.hpp b/include/picongpu/plugins/multi/Master.hpp index 42706b19e0..8bad1f7afc 100644 --- a/include/picongpu/plugins/multi/Master.hpp +++ b/include/picongpu/plugins/multi/Master.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -32,140 +32,109 @@ namespace picongpu { -namespace plugins -{ -namespace multi -{ - /** Master class to create multi plugins - * - * Create and handle a plugin as multi plugin. Parameter of a multi plugin - * can be used multiple times on the command line. - * - * @tparam T_Slave type of the plugin (must inherit from ISlave) - */ - template< typename T_Slave > - class Master : public ISimulationPlugin + namespace plugins { - public: - - using Slave = T_Slave; - using SlaveList = std::list< std::shared_ptr< ISlave > >; - SlaveList slaveList; - - std::shared_ptr< IHelp > slaveHelp; - - MappingDesc* m_cellDescription = nullptr; - - Master( ) : slaveHelp( Slave::getHelp() ) - { - Environment<>::get( ).PluginConnector( ).registerPlugin(this); - } - - virtual ~Master( ) - { - - } - - std::string pluginGetName( ) const - { - // the PMacc plugin system needs a short description instead of the plugin name - return slaveHelp->getName( ) + ": " + slaveHelp->getDescription( ); - } - - void pluginRegisterHelp( boost::program_options::options_description& desc ) - { - slaveHelp->registerHelp( desc ); - } - - void setMappingDescription( MappingDesc* cellDescription ) + namespace multi { - m_cellDescription = cellDescription; - } - - /** restart a checkpoint - * - * Trigger the method restart() for all slave instances. - */ - void restart( - uint32_t restartStep, - std::string const restartDirectory - ) - { - for( auto & slave : slaveList ) - slave->restart( - restartStep, - restartDirectory - ); - } - - /** create a checkpoint - * - * Trigger the method checkpoint() for all slave instances. - */ - void checkpoint( - uint32_t currentStep, - std::string const checkpointDirectory - ) - { - for( auto & slave : slaveList ) - slave->checkpoint( - currentStep, - checkpointDirectory - ); - } - - private: - - void pluginLoad( ) - { - size_t const numSlaves = slaveHelp->getNumPlugins( ); - if( numSlaves > 0u ) - slaveHelp->validateOptions( ); - for( size_t i = 0; i < numSlaves; ++i ) + /** Master class to create multi plugins + * + * Create and handle a plugin as multi plugin. Parameter of a multi plugin + * can be used multiple times on the command line. + * + * @tparam T_Slave type of the plugin (must inherit from ISlave) + */ + template + class Master : public ISimulationPlugin { - slaveList.emplace_back( - slaveHelp->create( - slaveHelp, - i, - m_cellDescription - ) - ); - } - } - - void pluginUnload( ) - { - slaveList.clear( ); - } - - void notify(uint32_t currentStep) - { - // nothing to do here - } - - }; - -} // namespace multi -} // namespace plugins - -namespace particles -{ -namespace traits -{ - template< - typename T_Species, - typename T_Slave - > - struct SpeciesEligibleForSolver< - T_Species, - plugins::multi::Master< T_Slave > - > + public: + using Slave = T_Slave; + using SlaveList = std::list>; + SlaveList slaveList; + + std::shared_ptr slaveHelp; + + MappingDesc* m_cellDescription = nullptr; + + Master() : slaveHelp(Slave::getHelp()) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } + + virtual ~Master() + { + } + + std::string pluginGetName() const + { + // the PMacc plugin system needs a short description instead of the plugin name + return slaveHelp->getName() + ": " + slaveHelp->getDescription(); + } + + void pluginRegisterHelp(boost::program_options::options_description& desc) + { + slaveHelp->registerHelp(desc); + } + + void setMappingDescription(MappingDesc* cellDescription) + { + m_cellDescription = cellDescription; + } + + /** restart a checkpoint + * + * Trigger the method restart() for all slave instances. + */ + void restart(uint32_t restartStep, std::string const restartDirectory) + { + for(auto& slave : slaveList) + slave->restart(restartStep, restartDirectory); + } + + /** create a checkpoint + * + * Trigger the method checkpoint() for all slave instances. + */ + void checkpoint(uint32_t currentStep, std::string const checkpointDirectory) + { + for(auto& slave : slaveList) + slave->checkpoint(currentStep, checkpointDirectory); + } + + private: + void pluginLoad() + { + size_t const numSlaves = slaveHelp->getNumPlugins(); + if(numSlaves > 0u) + slaveHelp->validateOptions(); + for(size_t i = 0; i < numSlaves; ++i) + { + slaveList.emplace_back(slaveHelp->create(slaveHelp, i, m_cellDescription)); + } + } + + void pluginUnload() + { + slaveList.clear(); + } + + void notify(uint32_t currentStep) + { + // nothing to do here + } + }; + + } // namespace multi + } // namespace plugins + + namespace particles { - using type = typename SpeciesEligibleForSolver< - T_Species, - T_Slave - >::type; - }; -} // namespace traits -} // namespace particles + namespace traits + { + template + struct SpeciesEligibleForSolver> + { + using type = typename SpeciesEligibleForSolver::type; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/plugins/multi/Option.hpp b/include/picongpu/plugins/multi/Option.hpp index b5a7677a90..063b0ab9aa 100644 --- a/include/picongpu/plugins/multi/Option.hpp +++ b/include/picongpu/plugins/multi/Option.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,169 +27,161 @@ namespace picongpu { -namespace plugins -{ -namespace multi -{ - - /** multi option storage - * - * This option stores the values of a multi command line option - * and allows to set a default value. - * - * @tparam T_ValueType type of the option - */ - template< typename T_ValueType > - struct Option : public std::vector< T_ValueType > + namespace plugins { - using StorageType = std::vector< T_ValueType >; - - //! type of the value - using ValueType = T_ValueType; - - - /** create a option with a default value - * - * @param name name of the option - * @param description description for the option - * @param defaultValue default value of the option - */ - Option( - std::string const & name, - std::string const & description, - ValueType const & defaultValue - ) : - m_name( name ), - m_description( description ), - m_defaultValue( defaultValue ), - m_hasDefaultValue( true ) - { - } - - /** create a option without a default value - * - * @param name name of the option - * @param description description for the option - */ - Option( - std::string const & name, - std::string const & description - ) : - m_name( name ), - m_description( description ), - m_hasDefaultValue( false ) - { - } - - /** get the name of the option - * - * @return name - */ - std::string getName() - { - return m_name; - } - - /** get the description of the option - * - * @return description - */ - std::string getDescription() - { - return m_description; - } - - /** register the option - * - * @param desc option object where the option is appended - * @param prefix prefix to add to the option name - * @param additionalDescription extent the default description - */ - void registerHelp( - boost::program_options::options_description & desc, - std::string const & prefix = std::string{ }, - std::string const & additionalDescription = std::string{ } - ) - { - std::string printDefault; - if( m_hasDefaultValue ) - printDefault = std::string( " | default: " ) + getDefaultAsStr(); - - desc.add_options( )( - ( prefix + "." + getName() ).c_str( ), - boost::program_options::value( getStorage() )->multitoken( ), - ( getDescription() + additionalDescription + printDefault ).c_str() - ); - } - - /** get the default value - * - * Throw an exception if there is no default value defined. - * - * @param get the default value defined during the construction of this class - */ - T_ValueType getDefault() - { - if( !m_hasDefaultValue ) - throw std::runtime_error( std::string("There is no default value defined for the option: " ) + getName() ); - return m_defaultValue; - } - - /** set a default value - * - * The old default value will be overwritten if already exists. - * - * @param defaultValue new default value - */ - void setDefault( T_ValueType const & defaultValue ) + namespace multi { - m_hasDefaultValue = true; - m_defaultValue = defaultValue; - } - - //! get the default value as string - std::string getDefaultAsStr() - { - std::stringstream ss; - ss << getDefault( ); - return ss.str(); - } - - /** get the value set by the user - * - * Throw an exception if there is no default value defined and idx is - * larger than the number of options provided by the user. - * - * @param idx index of the multi plugin - * @return if number of user provided option <= idx then the user defined - * value else the default value if defined - */ - T_ValueType get( uint32_t idx ) - { - if( StorageType::size() <= idx ) + /** multi option storage + * + * This option stores the values of a multi command line option + * and allows to set a default value. + * + * @tparam T_ValueType type of the option + */ + template + struct Option : public std::vector { - if( !m_hasDefaultValue ) - throw std::runtime_error( std::string("There is no default value defined for the option " + getName() + " and idx is out of range") ); - return m_defaultValue; - } - - return StorageType::operator[]( idx ); - } - - private: - - std::string const m_name; - std::string const m_description; - - T_ValueType m_defaultValue; - bool m_hasDefaultValue = false; - - StorageType* getStorage() - { - return static_cast(this); - } - }; - -} // namespace multi -} // namespace plugins + using StorageType = std::vector; + + //! type of the value + using ValueType = T_ValueType; + + + /** create a option with a default value + * + * @param name name of the option + * @param description description for the option + * @param defaultValue default value of the option + */ + Option(std::string const& name, std::string const& description, ValueType const& defaultValue) + : m_name(name) + , m_description(description) + , m_defaultValue(defaultValue) + , m_hasDefaultValue(true) + { + } + + /** create a option without a default value + * + * @param name name of the option + * @param description description for the option + */ + Option(std::string const& name, std::string const& description) + : m_name(name) + , m_description(description) + , m_hasDefaultValue(false) + { + } + + /** get the name of the option + * + * @return name + */ + std::string getName() + { + return m_name; + } + + /** get the description of the option + * + * @return description + */ + std::string getDescription() + { + return m_description; + } + + /** register the option + * + * @param desc option object where the option is appended + * @param prefix prefix to add to the option name + * @param additionalDescription extent the default description + */ + void registerHelp( + boost::program_options::options_description& desc, + std::string const& prefix = std::string{}, + std::string const& additionalDescription = std::string{}) + { + std::string printDefault; + if(m_hasDefaultValue) + printDefault = std::string(" | default: ") + getDefaultAsStr(); + + desc.add_options()( + (prefix + "." + getName()).c_str(), + boost::program_options::value(getStorage())->multitoken(), + (getDescription() + additionalDescription + printDefault).c_str()); + } + + /** get the default value + * + * Throw an exception if there is no default value defined. + * + * @param get the default value defined during the construction of this class + */ + T_ValueType getDefault() + { + if(!m_hasDefaultValue) + throw std::runtime_error( + std::string("There is no default value defined for the option: ") + getName()); + return m_defaultValue; + } + + /** set a default value + * + * The old default value will be overwritten if already exists. + * + * @param defaultValue new default value + */ + void setDefault(T_ValueType const& defaultValue) + { + m_hasDefaultValue = true; + m_defaultValue = defaultValue; + } + + //! get the default value as string + std::string getDefaultAsStr() + { + std::stringstream ss; + ss << getDefault(); + return ss.str(); + } + + /** get the value set by the user + * + * Throw an exception if there is no default value defined and idx is + * larger than the number of options provided by the user. + * + * @param idx index of the multi plugin + * @return if number of user provided option <= idx then the user defined + * value else the default value if defined + */ + T_ValueType get(uint32_t idx) + { + if(StorageType::size() <= idx) + { + if(!m_hasDefaultValue) + throw std::runtime_error(std::string( + "There is no default value defined for the option " + getName() + + " and idx is out of range")); + return m_defaultValue; + } + + return StorageType::operator[](idx); + } + + private: + std::string const m_name; + std::string const m_description; + + T_ValueType m_defaultValue; + bool m_hasDefaultValue = false; + + StorageType* getStorage() + { + return static_cast(this); + } + }; + + } // namespace multi + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/multi/multi.hpp b/include/picongpu/plugins/multi/multi.hpp index 0cd75d9005..56481a4cbb 100644 --- a/include/picongpu/plugins/multi/multi.hpp +++ b/include/picongpu/plugins/multi/multi.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * diff --git a/include/picongpu/plugins/openPMD/Json.cpp b/include/picongpu/plugins/openPMD/Json.cpp new file mode 100644 index 0000000000..42bcb280e1 --- /dev/null +++ b/include/picongpu/plugins/openPMD/Json.cpp @@ -0,0 +1,335 @@ +/* Copyright 2021 Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#if ENABLE_OPENPMD == 1 + +# include "picongpu/plugins/openPMD/Json.hpp" +# include "picongpu/plugins/openPMD/Json_private.hpp" + +# include // std::copy_n, std::find +# include // std::isspace +# include +# include + +/* + * Note: + * This is a hostonly .cpp file because CMake will not use -isystem for system + * include paths on NVCC targets created with cupla_add_executable. + * Since throws a number of warnings, this .cpp file + * ensures that NVCC never sees that library. + */ + +// Anonymous namespace so these helpers don't get exported +namespace +{ + /** + * @brief Remove leading and trailing characters from a string. + * + * @tparam F Functor type for to_remove + * @param s String to trim. + * @param to_remove Functor deciding which characters to remove. + */ + template + std::string trim(std::string const& s, F&& to_remove) + { + auto begin = s.begin(); + for(; begin != s.end(); ++begin) + { + if(!to_remove(*begin)) + { + break; + } + } + auto end = s.rbegin(); + for(; end != s.rend(); ++end) + { + if(!to_remove(*end)) + { + break; + } + } + return s.substr(begin - s.begin(), end.base() - begin); + } + + /** + * @brief Check whether the string points to a filename or not. + * + * A string is considered to point to a filename if its first + * non-whitespace character is an '@'. + * The filename will be trimmed of whitespace using trim(). + * + * @param unparsed The string that possibly points to a file. + * @return The filename if the string points to the file, an empty + * string otherwise. + * + * @todo Upon switching to C++17, use std::optional to make the return + * type clearer. + * Until then, this is somewhat safe anyway since filenames need + * to be non-empty. + */ + std::string extractFilename(std::string const& unparsed) + { + std::string trimmed = trim(unparsed, [](char c) { return std::isspace(c); }); + if(trimmed.at(0) == '@') + { + trimmed = trimmed.substr(1); + trimmed = trim(trimmed, [](char c) { return std::isspace(c); }); + return trimmed; + } + else + { + return {}; + } + } + + /** + * @brief Read a file in MPI-collective manner. + * + * The file is read on rank 0 and its contents subsequently distributed + * to all other ranks. + * + * @param path Path for the file to read. + * @param comm MPI communicator. + * @return std::string Full file content. + */ + std::string collective_file_read(std::string const& path, MPI_Comm comm) + { + int rank, size; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &size); + + std::string res; + size_t stringLength = 0; + if(rank == 0) + { + std::fstream handle; + handle.open(path, std::ios_base::in); + std::stringstream stream; + stream << handle.rdbuf(); + res = stream.str(); + if(!handle.good()) + { + throw std::runtime_error("Failed reading JSON config from file " + path + "."); + } + stringLength = res.size() + 1; + } + MPI_Datatype datatype = MPI_Types{}.value; + int err = MPI_Bcast(&stringLength, 1, datatype, 0, comm); + if(err) + { + throw std::runtime_error("[collective_file_read] MPI_Bcast stringLength failure."); + } + std::vector recvbuf(stringLength, 0); + if(rank == 0) + { + std::copy_n(res.c_str(), stringLength, recvbuf.data()); + } + err = MPI_Bcast(recvbuf.data(), stringLength, MPI_CHAR, 0, comm); + if(err) + { + throw std::runtime_error("[collective_file_read] MPI_Bcast file content failure."); + } + if(rank != 0) + { + res = recvbuf.data(); + } + return res; + } + + KindOfConfig readPattern( + std::vector& patterns, + nlohmann::json& defaultConfig, + nlohmann::json const& object) + { + static std::string const errorMsg = R"END( +[openPMD plugin] Each single pattern in an extended JSON configuration +must be a JSON object with keys 'select' and 'cfg'. +The key 'select' is optional, indicating a default configuration if it is +not set. +The key 'select' must point to either a single string or an array of strings.)END"; + + if(!object.is_object()) + { + throw std::runtime_error(errorMsg); + } + try + { + nlohmann::json const& cfg = object.at("cfg"); + if(!object.contains("select")) + { + nlohmann::json const& cfg = object.at("cfg"); + defaultConfig = cfg; + return KindOfConfig::Default; + } + else + { + nlohmann::json const& pattern = object.at("select"); + auto cfgShared = std::make_shared(cfg); + if(pattern.is_string()) + { + patterns.emplace_back(pattern.get(), std::move(cfgShared)); + } + else if(pattern.is_array()) + { + patterns.reserve(pattern.size()); + for(size_t i = 0; i < pattern.size(); ++i) + { + patterns.emplace_back(pattern[i].get(), cfgShared); + } + } + else + { + throw std::runtime_error(errorMsg); + } + return KindOfConfig::Pattern; + } + } + catch(nlohmann::json::out_of_range const&) + { + throw std::runtime_error(errorMsg); + } + } + + void MatcherPerBackend::init(nlohmann::json const& config) + { + if(config.is_object()) + { + // simple layout: only one global JSON object was passed + // forward this one directly to openPMD + m_patterns.emplace_back("", std::make_shared(config)); + } + else if(config.is_array()) + { + bool defaultEmplaced = false; + // enhanced PIConGPU-defined layout + for(size_t i = 0; i < config.size(); ++i) + { + auto kindOfConfig = readPattern(m_patterns, m_defaultConfig, config[i]); + if(kindOfConfig == KindOfConfig::Default) + { + if(defaultEmplaced) + { + throw std::runtime_error("[openPMD plugin] Specified more than one default configuration."); + } + else + { + defaultEmplaced = true; + } + } + } + } + else + { + throw std::runtime_error("[openPMD plugin] Expecting an object or an array as JSON configuration."); + } + } + + /** + * @brief Get the JSON config associated with a regex pattern. + * + * @param datasetPath The regex. + * @return The matched JSON configuration, as a string. + */ + nlohmann::json const& MatcherPerBackend::get(std::string const& datasetPath) const + { + for(auto const& pattern : m_patterns) + { + if(std::regex_match(datasetPath, pattern.pattern)) + { + return *pattern.config; + } + } + static nlohmann::json const emptyConfig; // null + return emptyConfig; + } +} // namespace + +namespace picongpu +{ + namespace json + { + void JsonMatcher::init(std::string const& config, MPI_Comm comm) + { + auto const filename = extractFilename(config); + m_wholeConfig = nlohmann::json::parse(filename.empty() ? config : collective_file_read(filename, comm)); + if(!m_wholeConfig.is_object()) + { + throw std::runtime_error("[openPMD plugin] Expected an object for the JSON configuration."); + } + m_perBackend.reserve(m_wholeConfig.size()); + for(auto it = m_wholeConfig.begin(); it != m_wholeConfig.end(); ++it) + { + std::string const& backendName = it.key(); + if(std::find(m_recognizedBackends.begin(), m_recognizedBackends.end(), backendName) + == m_recognizedBackends.end()) + { + // The key does not point to the configuration of a backend recognized by PIConGPU + // Ignore it. + continue; + } + if(!it.value().is_object()) + { + throw std::runtime_error( + "[openPMD plugin] Each backend's configuration must be a JSON object (config for backend " + + backendName + ")."); + } + if(it.value().contains("dataset")) + { + m_perBackend.emplace_back(PerBackend{backendName, MatcherPerBackend{it.value().at("dataset")}}); + } + } + } + std::string JsonMatcher::get(std::string const& datasetPath) const + { + nlohmann::json result = nlohmann::json::object(); + for(auto const& backend : m_perBackend) + { + auto const& datasetConfig = backend.matcher.get(datasetPath); + if(datasetConfig.empty()) + { + continue; + } + result[backend.backendName]["dataset"] = datasetConfig; + } + return result.dump(); + } + + std::string JsonMatcher::getDefault() const + { + nlohmann::json result = m_wholeConfig; + for(auto const& backend : m_perBackend) + { + auto const& datasetConfig = backend.matcher.getDefault(); + if(datasetConfig.empty()) + { + continue; + } + result[backend.backendName]["dataset"] = datasetConfig; + } + return result.dump(); + } + + std::unique_ptr AbstractJsonMatcher::construct(std::string const& config, MPI_Comm comm) + { + return std::unique_ptr{new JsonMatcher{config, comm}}; + } + } // namespace json +} // namespace picongpu + +#endif // ENABLE_OPENPMD diff --git a/include/picongpu/plugins/openPMD/Json.hpp b/include/picongpu/plugins/openPMD/Json.hpp new file mode 100644 index 0000000000..436e550287 --- /dev/null +++ b/include/picongpu/plugins/openPMD/Json.hpp @@ -0,0 +1,77 @@ +/* Copyright 2021 Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include + +#include // std::unique_ptr +#include + +namespace picongpu +{ + namespace json + { + /** + * @brief Class to handle extended JSON configurations as used by + * the openPMD plugin. + * + * This class handles parsing of the extended JSON patterns as well as + * selection of one JSON configuration by regex. + * + */ + class AbstractJsonMatcher + { + public: + /** + * @brief Construct a JSON matcher to hand out dataset-specific configurations + * + * This function will parse the given config, after reading it + * from a file if needed. In this case, the constructor is + * MPI-collective. + * It will distinguish per backend between ordinary openPMD JSON configurations + * and extended configurations as defined by PIConGPU. + * If an ordinary JSON configuration was detected, given regex + * patterns will be matched against "" (the empty string). + * + * @param config The JSON configuration, exactly as in --openPMD.json. + * @param comm MPI communicator for collective file reading, if needed. + * @return std::unique_ptr + */ + static std::unique_ptr construct(std::string const& config, MPI_Comm comm); + + virtual ~AbstractJsonMatcher() = default; + + /** + * @brief Get the JSON config associated with a regex pattern. + * + * @param datasetPath The regex. + * @return The matched JSON configuration, as a string. + */ + virtual std::string get(std::string const& datasetPath) const = 0; + + /** + * @brief Get the default JSON config. + * + * @return The default JSON configuration, as a string. + */ + virtual std::string getDefault() const = 0; + }; + } // namespace json +} // namespace picongpu diff --git a/include/picongpu/plugins/openPMD/Json_private.hpp b/include/picongpu/plugins/openPMD/Json_private.hpp new file mode 100644 index 0000000000..f6dec2da5a --- /dev/null +++ b/include/picongpu/plugins/openPMD/Json_private.hpp @@ -0,0 +1,273 @@ +/* Copyright 2021 Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/plugins/openPMD/Json.hpp" + +#include +#include + +#include +#include +#include + +/* + * Note: + * This header is included only into hostonly .cpp files because CMake + * will not use -isystem for system include paths on NVCC targets created + * with cupla_add_executable. + * Since throws a number of warnings, this design + * ensures that NVCC never sees that library. + */ + +// Anonymous namespace so these helpers don't get exported +namespace +{ + /** + * @brief Remove leading and trailing characters from a string. + * + * @tparam F Functor type for to_remove + * @param s String to trim. + * @param to_remove Functor deciding which characters to remove. + */ + template + std::string trim(std::string const& s, F&& to_remove); + + /** + * @brief Check whether the string points to a filename or not. + * + * A string is considered to point to a filename if its first + * non-whitespace character is an '@'. + * The filename will be trimmed of whitespace using trim(). + * + * @param unparsed The string that possibly points to a file. + * @return The filename if the string points to the file, an empty + * string otherwise. + * + * @todo Upon switching to C++17, use std::optional to make the return + * type clearer. + * Until then, this is somewhat safe anyway since filenames need + * to be non-empty. + */ + std::string extractFilename(std::string const& unparsed); + + /** + * @brief Helper class to help figure out a platform-independent + * MPI_Datatype for size_t. + */ + template + struct MPI_Types; + + template<> + struct MPI_Types + { + // can't make this constexpr due to MPI + // so, make this non-static for simplicity + MPI_Datatype value = MPI_UNSIGNED_LONG; + }; + + template<> + struct MPI_Types + { + MPI_Datatype value = MPI_UNSIGNED_LONG_LONG; + }; + + template<> + struct MPI_Types + { + MPI_Datatype value = MPI_UNSIGNED; + }; + + /** + * @brief Read a file in MPI-collective manner. + * + * The file is read on rank 0 and its contents subsequently distributed + * to all other ranks. + * + * @param path Path for the file to read. + * @param comm MPI communicator. + * @return std::string Full file content. + */ + std::string collective_file_read(std::string const& path, MPI_Comm comm); + + struct Pattern + { + std::regex pattern; + std::shared_ptr config; + + Pattern(std::string pattern_in, std::shared_ptr config_in) + // we construct the patterns once and use them often, so let's ask for some optimization + : pattern{std::move(pattern_in), std::regex_constants::egrep | std::regex_constants::optimize} + , config{std::move(config_in)} + { + } + }; + + enum class KindOfConfig : char + { + Pattern, + Default + }; + + /** + * @brief Read a single JSON pattern of the form {"select": ..., "cfg": ...} + * + * The "select" key is optional, indicating the default configuration if it + * is missing. + * + * @param patterns Output parameter: Emplace a parsed pattern into this list. + * @param defaultConfig Output parameter: If the pattern was the default pattern, + * emplace it here. + * @param object The JSON object that is parsed as the pattern. + * @return Whether the pattern was the default configuration or not. + */ + KindOfConfig readPattern( + std::vector& patterns, + nlohmann::json& defaultConfig, + nlohmann::json const& object); + + /** + * @brief Matcher for dataset configurations per backend. + * + */ + class MatcherPerBackend + { + private: + nlohmann::json m_defaultConfig; + std::vector m_patterns; + + void init(nlohmann::json const& config); + + public: + /** + * @brief For default construction. + */ + explicit MatcherPerBackend() = default; + + /** + * @brief Initialize one backend's JSON matcher from its configuration. + * + * This constructor will parse the given config. + * It will distinguish between ordinary openPMD JSON configurations + * and extended configurations as defined by PIConGPU. + * If an ordinary JSON configuration was detected, given regex + * patterns will be matched against "" (the empty string). + * + * @param config The JSON configuration for one backend. + * E.g. for ADIOS2, this will be the sub-object/array found under + * config["adios2"]["dataset"]. + */ + MatcherPerBackend(nlohmann::json const& config) + { + init(config); + } + + /** + * @brief Get the JSON config associated with a regex pattern. + * + * @param datasetPath The regex. + * @return The matched JSON configuration, as a string. + */ + nlohmann::json const& get(std::string const& datasetPath) const; + + /** + * @brief Get the default JSON config. + * + * @return The default JSON configuration, as a string. + */ + nlohmann::json const& getDefault() const + { + return m_defaultConfig; + } + }; +} // namespace + +namespace picongpu +{ + namespace json + { + /** + * @brief Class to handle extended JSON configurations as used by + * the openPMD plugin. + * + * This class handles parsing of the extended JSON patterns as well as + * selection of one JSON configuration by regex. + * + */ + class JsonMatcher : public AbstractJsonMatcher + { + private: + struct PerBackend + { + std::string backendName; + MatcherPerBackend matcher; + }; + std::vector m_perBackend; + nlohmann::json m_wholeConfig; + static std::vector const m_recognizedBackends; + + void init(std::string const& config, MPI_Comm comm); + + public: + /** + * @brief For default construction. + */ + explicit JsonMatcher() = default; + + /** + * @brief Initialize JSON matcher from command line arguments. + * + * This constructor will parse the given config, after reading it + * from a file if needed. In this case, the constructor is + * MPI-collective. + * It will distinguish between ordinary openPMD JSON configurations + * and extended configurations as defined by PIConGPU. + * If an ordinary JSON configuration was detected, given regex + * patterns will be matched against "" (the empty string). + * + * @param config The JSON configuration, exactly as in + * --openPMD.json. + * @param comm MPI communicator for collective file reading, + * if needed. + */ + JsonMatcher(std::string const& config, MPI_Comm comm) + { + init(config, comm); + } + + /** + * @brief Get the JSON config associated with a regex pattern. + * + * @param datasetPath The regex. + * @return The matched JSON configuration, as a string. + */ + std::string get(std::string const& datasetPath) const override; + + /** + * @brief Get the default JSON config. + * + * @return The default JSON configuration, as a string. + */ + std::string getDefault() const override; + }; + + std::vector const JsonMatcher::m_recognizedBackends = {"adios1", "adios2", "hdf5", "json"}; + } // namespace json +} // namespace picongpu diff --git a/include/picongpu/plugins/openPMD/NDScalars.hpp b/include/picongpu/plugins/openPMD/NDScalars.hpp new file mode 100644 index 0000000000..b83b910fd5 --- /dev/null +++ b/include/picongpu/plugins/openPMD/NDScalars.hpp @@ -0,0 +1,200 @@ +/* Copyright 2016-2021 Alexander Grund, Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/plugins/openPMD/openPMDWriter.def" +#include "picongpu/plugins/openPMD/openPMDVersion.def" + +#include +#include + +#include +#include +#include + +namespace picongpu +{ + namespace openPMD + { + /** Functor for writing N-dimensional scalar fields with N=simDim + * In the current implementation each process (of the ND grid of processes) + * writes 1 scalar value Optionally the processes can also write an + * attribute for this dataset by using a non-empty attrName + * + * @tparam T_Scalar Type of the scalar value to write + * @tparam T_Attribute Type of the attribute (can be omitted if attribute is + * not written, defaults to uint64_t) + */ + template + struct WriteNDScalars + { + WriteNDScalars( + const std::string& baseName, + const std::string& group, + const std::string& dataset, + const std::string& attrName = "") + : baseName(baseName) + , group(group) + , dataset(dataset) + , attrName(attrName) + { + } + + private: + /** Prepare the write operation: + * Define openPMD dataset and write + * attribute (if attrName is non-empty) + * + * Must be called before executing the functor + */ + std::tuple<::openPMD::MeshRecordComponent, ::openPMD::Offset, ::openPMD::Extent> prepare( + ThreadParams& params, + T_Attribute attribute) + { + auto name = baseName + "/" + group + "/" + dataset; + const auto openPMDScalarType = ::openPMD::determineDatatype(); + using Dimensions = pmacc::math::UInt64; + + log("openPMD: prepare write %1%D scalars: %2%") % simDim % name; + + // Size over all processes + Dimensions globalDomainSize = Dimensions::create(1); + Dimensions localDomainOffset = Dimensions::create(0); + + for(uint32_t d = 0; d < simDim; ++d) + { + globalDomainSize[d] = Environment::get().GridController().getGpuNodes()[d]; + localDomainOffset[d] = Environment::get().GridController().getPosition()[d]; + } + + ::openPMD::Series& series = *params.openPMDSeries; + ::openPMD::MeshRecordComponent mrc + = series.WRITE_ITERATIONS[params.currentStep].meshes[baseName + "_" + group][dataset]; + + if(!attrName.empty()) + { + log("openPMD: write attribute %1% of %2%D scalars: %3%") % attrName % simDim + % name; + + mrc.setAttribute(attrName, attribute); + } + + std::string datasetName = series.meshesPath() + baseName + "_" + group + "/" + dataset; + params.initDataset( + mrc, + openPMDScalarType, + std::move(globalDomainSize), + true, + params.compressionMethod, + datasetName); + + return std::make_tuple( + std::move(mrc), + static_cast<::openPMD::Offset>(asStandardVector(std::move(localDomainOffset))), + static_cast<::openPMD::Extent>(asStandardVector(Dimensions::create(1)))); + } + + public: + void operator()(ThreadParams& params, T_Scalar value, T_Attribute attribute = T_Attribute()) + { + auto tuple = prepare(params, std::move(attribute)); + auto name = baseName + "/" + group + "/" + dataset; + log("openPMD: write %1%D scalars: %2%") % simDim % name; + + std::get<0>(tuple).storeChunk( + std::make_shared(value), + std::move(std::get<1>(tuple)), + std::move(std::get<2>(tuple))); + params.openPMDSeries->flush(); + } + + private: + const std::string baseName, group, dataset, attrName; + int64_t varId; + }; + + /** Functor for reading ND scalar fields with N=simDim + * In the current implementation each process (of the ND grid of processes) + * reads 1 scalar value Optionally the processes can also read an attribute + * for this dataset by using a non-empty attrName + * + * @tparam T_Scalar Type of the scalar value to read + * @tparam T_Attribute Type of the attribute (can be omitted if attribute is + * not read, defaults to uint64_t) + */ + template + struct ReadNDScalars + { + /** Read the skalar field and optionally the attribute into the values + * referenced by the pointers */ + void operator()( + ThreadParams& params, + const std::string& baseName, + const std::string& group, + const std::string& dataset, + T_Scalar* value, + const std::string& attrName = "", + T_Attribute* attribute = nullptr) + { + auto name = baseName + "/" + group + "/" + dataset; + log("openPMD: read %1%D scalars: %2%") % simDim % name; + + + auto datasetName = baseName + "/" + group + "/" + dataset; + ::openPMD::Series& series = *params.openPMDSeries; + ::openPMD::MeshRecordComponent mrc + = series.iterations[params.currentStep].meshes[baseName + "_" + group][dataset]; + auto ndim = mrc.getDimensionality(); + if(ndim != simDim) + { + throw std::runtime_error(std::string("Invalid dimensionality for ") + name); + } + + DataSpace gridPos = Environment::get().GridController().getPosition(); + ::openPMD::Offset start; + ::openPMD::Extent count; + start.reserve(ndim); + count.reserve(ndim); + for(int d = 0; d < ndim; ++d) + { + start.push_back(gridPos.revert()[d]); + count.push_back(1); + } + + __getTransactionEvent().waitForFinished(); + + log("openPMD: Schedule read scalar %1%)") % datasetName; + + std::shared_ptr readValue = mrc.loadChunk(start, count); + + series.flush(); + + *value = *readValue; + + if(!attrName.empty()) + { + log("openPMD: read attribute %1% for scalars: %2%") % attrName % name; + *attribute = mrc.getAttribute(attrName).get(); + } + } + }; + + } // namespace openPMD +} // namespace picongpu diff --git a/include/picongpu/plugins/openPMD/WriteMeta.hpp b/include/picongpu/plugins/openPMD/WriteMeta.hpp new file mode 100644 index 0000000000..15cf42aad0 --- /dev/null +++ b/include/picongpu/plugins/openPMD/WriteMeta.hpp @@ -0,0 +1,239 @@ +/* Copyright 2013-2021 Axel Huebl, Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once +#include "picongpu/fields/absorber/ExponentialDamping.hpp" +#include "picongpu/fields/currentInterpolation/CurrentInterpolation.hpp" +#include "picongpu/plugins/common/stringHelpers.hpp" +#include "picongpu/plugins/openPMD/openPMDWriter.def" +#include "picongpu/plugins/openPMD/openPMDVersion.def" +#include "picongpu/simulation_defines.hpp" +#include "picongpu/traits/SIBaseUnits.hpp" + +#include + +#include + +#include +#include +#include + + +namespace picongpu +{ + namespace openPMD + { + using namespace pmacc; + + namespace writeMeta + { + /** write openPMD species meta data + * + * @tparam numSpecies count of defined species + */ + template::type::value> + struct OfAllSpecies + { + /** write meta data for species + * + * @param threadParams context of the openPMD plugin + * @param fullMeshesPath path to mesh entry + */ + void operator()(ThreadParams* threadParams) const + { + /* + * @todo set boundary per species + */ + GetStringProperties::type> particleBoundaryProp; + std::vector listParticleBoundary; + std::vector listParticleBoundaryParam; + auto n = NumberOfExchanges::value; + listParticleBoundary.reserve(n - 1); + listParticleBoundaryParam.reserve(n - 1); + for(uint32_t i = n - 1; i > 0; --i) + { + if(FRONT % i == 0) + { + listParticleBoundary.push_back(particleBoundaryProp[ExchangeTypeNames()[i]]["name"].value); + listParticleBoundaryParam.push_back( + particleBoundaryProp[ExchangeTypeNames()[i]]["param"].value); + } + } + + ::openPMD::Iteration iteration + = threadParams->openPMDSeries->WRITE_ITERATIONS[threadParams->currentStep]; + iteration.setAttribute("particleBoundary", listParticleBoundary); + iteration.setAttribute("particleBoundaryParameters", listParticleBoundaryParam); + } + }; + + /** specialization if no species are defined */ + template<> + struct OfAllSpecies<0> + { + /** write meta data for species + * + * @param threadParams context of the openPMD plugin + * @param fullMeshesPath path to mesh entry + */ + void operator()( + ThreadParams* /* threadParams */, + const std::string& /* fullMeshesPath */ + ) const + { + } + }; + + } // namespace writeMeta + + struct WriteMeta + { + void operator()(ThreadParams* threadParams) + { + log("openPMD: (begin) write meta attributes."); + + ::openPMD::Series& series = *threadParams->openPMDSeries; + + /* + * The openPMD API will kindly write the obligatory metadata by + * itself, so we don't need to do this manually. We give the + * optional metadata: + */ + + /* recommended */ + const std::string author = Environment<>::get().SimulationDescription().getAuthor(); + if(author.length() > 0) + { + series.setAuthor(author); + } + + const std::string software("PIConGPU"); + + std::stringstream softwareVersion; + softwareVersion << PICONGPU_VERSION_MAJOR << "." << PICONGPU_VERSION_MINOR << "." + << PICONGPU_VERSION_PATCH; + if(!std::string(PICONGPU_VERSION_LABEL).empty()) + softwareVersion << "-" << PICONGPU_VERSION_LABEL; + series.setSoftware(software, softwareVersion.str()); + + const std::string date = helper::getDateString("%F %T %z"); + series.setDate(date); + + ::openPMD::Iteration iteration = series.WRITE_ITERATIONS[threadParams->currentStep]; + ::openPMD::Container<::openPMD::Mesh>& meshes = iteration.meshes; + + // iteration-level attributes + iteration.setDt(DELTA_T); + iteration.setTime(float_X(threadParams->currentStep) * DELTA_T); + iteration.setTimeUnitSI(UNIT_TIME); + + GetStringProperties fieldSolverProps; + const std::string fieldSolver(fieldSolverProps["name"].value); + meshes.setAttribute("fieldSolver", fieldSolver); + + if(fieldSolverProps.find("param") != fieldSolverProps.end()) + { + const std::string fieldSolverParam(fieldSolverProps["param"].value); + meshes.setAttribute("fieldSolverParameters", fieldSolverParam); + } + + /* order as in axisLabels: + * 3D: z-lower, z-upper, y-lower, y-upper, x-lower, x-upper + * 2D: y-lower, y-upper, x-lower, x-upper + */ + GetStringProperties fieldBoundaryProp; + std::vector listFieldBoundary; + std::vector listFieldBoundaryParam; + auto n = NumberOfExchanges::value; + listFieldBoundary.reserve(n - 1); + listFieldBoundaryParam.reserve(n - 1); + for(uint32_t i = n - 1; i > 0; --i) + { + if(FRONT % i == 0) + { + listFieldBoundary.push_back(fieldBoundaryProp[ExchangeTypeNames()[i]]["name"].value); + listFieldBoundaryParam.push_back(fieldBoundaryProp[ExchangeTypeNames()[i]]["param"].value); + } + } + + meshes.setAttribute("fieldBoundary", listFieldBoundary); + meshes.setAttribute("fieldBoundaryParameters", listFieldBoundaryParam); + + writeMeta::OfAllSpecies<>()(threadParams); + + GetStringProperties currentSmoothingProp; + const std::string currentSmoothing(currentSmoothingProp["name"].value); + meshes.setAttribute("currentSmoothing", currentSmoothing); + + if(currentSmoothingProp.find("param") != currentSmoothingProp.end()) + { + const std::string currentSmoothingParam(currentSmoothingProp["param"].value); + meshes.setAttribute("currentSmoothingParameters", currentSmoothingParam); + } + + const std::string chargeCorrection("none"); + meshes.setAttribute("chargeCorrection", chargeCorrection); + + /* write current iteration */ + log("openPMD: meta: iteration"); + iteration.setAttribute( + "iteration", + threadParams->currentStep); // openPMD API will not write this + // automatically + + /* write number of slides */ + log("openPMD: meta: sim_slides"); + uint32_t slides = MovingWindow::getInstance().getSlideCounter(threadParams->currentStep); + iteration.setAttribute("sim_slides", slides); + + /* + * Required time attributes are written automatically by openPMD API + */ + + + /* write normed grid parameters */ + log("openPMD: meta: grid"); + std::string names[3] = {"cell_width", "cell_height", "cell_depth"}; + for(unsigned i = 0; i < 3; ++i) + { + iteration.setAttribute(names[i], cellSize[i]); + } + + + /* write base units */ + log("openPMD: meta: units"); + iteration.setAttribute("unit_energy", UNIT_ENERGY); + iteration.setAttribute("unit_length", UNIT_LENGTH); + iteration.setAttribute("unit_speed", UNIT_SPEED); + iteration.setAttribute("unit_time", UNIT_TIME); + iteration.setAttribute("unit_mass", UNIT_MASS); + iteration.setAttribute("unit_charge", UNIT_CHARGE); + iteration.setAttribute("unit_efield", UNIT_EFIELD); + iteration.setAttribute("unit_bfield", UNIT_BFIELD); + + + /* write physical constants */ + iteration.setAttribute("mue0", MUE0); + iteration.setAttribute("eps0", EPS0); + + log("openPMD: ( end ) wite meta attributes."); + } + }; + } // namespace openPMD +} // namespace picongpu diff --git a/include/picongpu/plugins/openPMD/WriteSpecies.hpp b/include/picongpu/plugins/openPMD/WriteSpecies.hpp new file mode 100644 index 0000000000..0cde47aab4 --- /dev/null +++ b/include/picongpu/plugins/openPMD/WriteSpecies.hpp @@ -0,0 +1,520 @@ +/* Copyright 2014-2021 Rene Widera, Felix Schmitt, Axel Huebl, + * Alexander Grund, Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/particles/traits/GetSpeciesFlagName.hpp" +#include "picongpu/plugins/ISimulationPlugin.hpp" +#include "picongpu/plugins/kernel/CopySpecies.kernel" +#include "picongpu/plugins/openPMD/openPMDWriter.def" +#include "picongpu/plugins/openPMD/openPMDVersion.def" +#include "picongpu/plugins/openPMD/writer/ParticleAttribute.hpp" +#include "picongpu/plugins/output/WriteSpeciesCommon.hpp" +#include "picongpu/plugins/output/ConstSpeciesAttributes.hpp" +#include "picongpu/plugins/openPMD/openPMDDimension.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace picongpu +{ + namespace openPMD + { + using namespace pmacc; + + template + struct StrategyRunParameters + { + pmacc::DataConnector& dc; + ThreadParams& params; + SpeciesTmp& speciesTmp; + Filter& filter; + ParticleFilter& particleFilter; + ParticleOffset& particleOffset; + uint64_t myNumParticles, globalNumParticles; + StrategyRunParameters( + pmacc::DataConnector& c_dc, + ThreadParams& c_params, + SpeciesTmp& c_speciesTmp, + Filter& c_filter, + ParticleFilter& c_particleFilter, + ParticleOffset& c_particleOffset, + uint64_t c_myNumParticles, + uint64_t c_globalNumParticles) + : dc(c_dc) + , params(c_params) + , speciesTmp(c_speciesTmp) + , filter(c_filter) + , particleFilter(c_particleFilter) + , particleOffset(c_particleOffset) + , myNumParticles(c_globalNumParticles) + , globalNumParticles(c_globalNumParticles) + { + } + }; + + template + struct Strategy + { + virtual void malloc(std::string name, openPMDFrameType&, uint64_cu const myNumParticles) = 0; + + virtual void free(openPMDFrameType& hostFrame) = 0; + + virtual void prepare(std::string name, openPMDFrameType& hostFrame, RunParameters) = 0; + + virtual ~Strategy() = default; + }; + + /* + * Use double buffer. + */ + template + struct StrategyADIOS : Strategy + { + void malloc(std::string name, openPMDFrameType& hostFrame, uint64_cu const myNumParticles) override + { + /* malloc host memory */ + log("openPMD: (begin) malloc host memory: %1%") % name; + meta::ForEach> mallocMem; + mallocMem(hostFrame, myNumParticles); + log("openPMD: ( end ) malloc host memory: %1%") % name; + } + + void free(openPMDFrameType& hostFrame) override + { + meta::ForEach> freeMem; + freeMem(hostFrame); + } + + + void prepare(std::string name, openPMDFrameType& hostFrame, RunParameters rp) override + { + log("openPMD: (begin) copy particle host (with hierarchy) to " + "host (without hierarchy): %1%") + % name; + auto mallocMCBuffer + = rp.dc.template get>(MallocMCBuffer::getName(), true); + + int globalParticleOffset = 0; + AreaMapping mapper(*(rp.params.cellDescription)); + + pmacc::particles::operations::ConcatListOfFrames concatListOfFrames(mapper.getGridDim()); + +#if(PMACC_CUDA_ENABLED == 1 || ALPAKA_ACC_GPU_HIP_ENABLED == 1) + auto particlesBox = rp.speciesTmp->getHostParticlesBox(mallocMCBuffer->getOffset()); +#else + /* This separate code path is only a workaround until + * MallocMCBuffer is alpaka compatible. + * + * @todo remove this workaround: we know that we are allowed to + * access the device memory directly. + */ + auto particlesBox = rp.speciesTmp->getDeviceParticlesBox(); + /* Notify to the event system that the particles box is used on + * the host. + * + * @todo remove this workaround + */ + __startOperation(ITask::TASK_HOST); + +#endif + concatListOfFrames( + globalParticleOffset, + hostFrame, + particlesBox, + rp.filter, + rp.particleOffset, /*relative to data domain (not to physical + domain)*/ + totalCellIdx_, + mapper, + rp.particleFilter); + + rp.dc.releaseData(MallocMCBuffer::getName()); + + /* this costs a little bit of time but writing to external is + * slower in general */ + PMACC_ASSERT((uint64_cu) globalParticleOffset == rp.globalNumParticles); + } + }; + + /* + * Use mapped memory. + */ + template + struct StrategyHDF5 : Strategy + { + void malloc(std::string name, openPMDFrameType& hostFrame, uint64_cu const myNumParticles) override + { + log("openPMD: (begin) malloc mapped memory: %1%") % name; + /*malloc mapped memory*/ + meta::ForEach> mallocMem; + mallocMem(hostFrame, myNumParticles); + log("openPMD: ( end ) malloc mapped memory: %1%") % name; + } + + void free(openPMDFrameType& hostFrame) override + { + meta::ForEach> freeMem; + freeMem(hostFrame); + } + + void prepare(std::string name, openPMDFrameType& hostFrame, RunParameters rp) override + { + log("openPMD: (begin) copy particle to host: %1%") % name; + + log("openPMD: (begin) get mapped memory device pointer: %1%") % name; + /*load device pointer of mapped memory*/ + openPMDFrameType deviceFrame; + meta::ForEach> getDevicePtr; + getDevicePtr(deviceFrame, hostFrame); + log("openPMD: ( end ) get mapped memory device pointer: %1%") % name; + + GridBuffer counterBuffer(DataSpace(1)); + AreaMapping mapper(*(rp.params.cellDescription)); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + /* this sanity check costs a little bit of time but hdf5 writing is + * slower */ + PMACC_KERNEL(CopySpecies{}) + (mapper.getGridDim(), numWorkers)( + counterBuffer.getDeviceBuffer().getPointer(), + deviceFrame, + rp.speciesTmp->getDeviceParticlesBox(), + rp.filter, + rp.particleOffset, + totalCellIdx_, + mapper, + rp.particleFilter); + counterBuffer.deviceToHost(); + log("openPMD: ( end ) copy particle to host: %1%") % name; + __getTransactionEvent().waitForFinished(); + log("openPMD: all events are finished: %1%") % name; + + PMACC_ASSERT((uint64_t) counterBuffer.getHostBuffer().getDataBox()[0] == rp.myNumParticles); + } + }; + + /** Write copy particle to host memory and dump to openPMD file + * + * @tparam T_Species type of species + */ + template + struct WriteSpecies + { + public: + using ThisSpecies = typename T_SpeciesFilter::Species; + using FrameType = typename ThisSpecies::FrameType; + using ParticleDescription = typename FrameType::ParticleDescription; + using ParticleAttributeList = typename FrameType::ValueTypeSeq; + + /* delete multiMask and localCellIdx in openPMD particle*/ + using TypesToDelete = bmpl::vector; + using ParticleCleanedAttributeList = typename RemoveFromSeq::type; + + /* add totalCellIdx for openPMD particle*/ + using ParticleNewAttributeList = typename MakeSeq::type; + + using NewParticleDescription = + typename ReplaceValueTypeSeq::type; + + using openPMDFrameType = Frame; + + void setParticleAttributes( + ::openPMD::ParticleSpecies& record, + AbstractJsonMatcher& matcher, + std::string const& basename) + { + const float_64 particleShape(GetShape::type::assignmentFunctionOrder); + record.setAttribute("particleShape", particleShape); + + traits::GetSpeciesFlagName> currentDepositionName; + const std::string currentDeposition(currentDepositionName()); + record.setAttribute("currentDeposition", currentDeposition.c_str()); + + traits::GetSpeciesFlagName> particlePushName; + const std::string particlePush(particlePushName()); + record.setAttribute("particlePush", particlePush.c_str()); + + traits::GetSpeciesFlagName> particleInterpolationName; + const std::string particleInterpolation(particleInterpolationName()); + record.setAttribute("particleInterpolation", particleInterpolation.c_str()); + + const std::string particleSmoothing("none"); + record.setAttribute("particleSmoothing", particleSmoothing.c_str()); + + // now we have a map in a writeable format with all zeroes + // for each record copy it and modify the copy, e.g. + + // const records stuff + ::openPMD::Datatype dataType = ::openPMD::Datatype::DOUBLE; + ::openPMD::Extent extent = {0}; + ::openPMD::Dataset dataSet = ::openPMD::Dataset(dataType, extent); + + // mass + plugins::output::GetMassOrZero const getMassOrZero; + if(getMassOrZero.hasMassRatio) + { + const float_64 mass(getMassOrZero()); + auto& massRecord = record["mass"]; + auto& massComponent = massRecord[::openPMD::RecordComponent::SCALAR]; + setDatasetOptions(dataSet, matcher.get(basename + "/mass")); + massComponent.resetDataset(dataSet); + massComponent.makeConstant(mass); + + auto unitMap = convertToUnitDimension(getMassOrZero.dimension()); + massRecord.setUnitDimension(unitMap); + massComponent.setUnitSI(::picongpu::UNIT_MASS); + massRecord.setAttribute("macroWeighted", int32_t(false)); + massRecord.setAttribute("weightingPower", float_64(1.0)); + massRecord.setAttribute("timeOffset", float_64(0.0)); + } + + // charge + using hasBoundElectrons = typename pmacc::traits::HasIdentifier::type; + plugins::output::GetChargeOrZero const getChargeOrZero; + if(!hasBoundElectrons::value && getChargeOrZero.hasChargeRatio) + { + const float_64 charge(getChargeOrZero()); + auto& chargeRecord = record["charge"]; + auto& chargeComponent = chargeRecord[::openPMD::RecordComponent::SCALAR]; + setDatasetOptions(dataSet, matcher.get(basename + "/charge")); + chargeComponent.resetDataset(dataSet); + chargeComponent.makeConstant(charge); + + auto unitMap = convertToUnitDimension(getChargeOrZero.dimension()); + chargeRecord.setUnitDimension(unitMap); + chargeComponent.setUnitSI(::picongpu::UNIT_CHARGE); + chargeRecord.setAttribute("macroWeighted", int32_t(false)); + chargeRecord.setAttribute("weightingPower", float_64(1.0)); + chargeRecord.setAttribute("timeOffset", float_64(0.0)); + } + } + + template // has operator[] -> integer type + HINLINE void operator()(ThreadParams* params, const Space particleOffset) + { + log("openPMD: (begin) write species: %1%") % T_SpeciesFilter::getName(); + DataConnector& dc = Environment<>::get().DataConnector(); + GridController& gc = Environment::get().GridController(); + uint64_t mpiSize = gc.getGlobalSize(); + uint64_t mpiRank = gc.getGlobalRank(); + /* load particle without copy particle data to host */ + auto speciesTmp = dc.get(ThisSpecies::FrameType::getName(), true); + const std::string speciesGroup(T_Species::getName()); + + ::openPMD::Series& series = *params->openPMDSeries; + ::openPMD::Iteration iteration = series.WRITE_ITERATIONS[params->currentStep]; + const std::string basename = series.particlesPath() + speciesGroup; + + // enforce that the filter interface is fulfilled + particles::filter::IUnary particleFilter{params->currentStep}; + using usedFilters = bmpl::vector::type>; + using MyParticleFilter = typename FilterFactory::FilterType; + MyParticleFilter filter; + /* activate filter pipeline if moving window is activated */ + filter.setStatus(MovingWindow::getInstance().isSlidingWindowActive(params->currentStep)); + filter.setWindowPosition(params->localWindowToDomainOffset, params->window.localDimensions.size); + + using RunParameters_T = StrategyRunParameters< + decltype(speciesTmp), + decltype(filter), + decltype(particleFilter), + const Space>; + + using AStrategy = Strategy; + std::unique_ptr strategy; + + switch(params->strategy) + { + case WriteSpeciesStrategy::ADIOS: + { + using type = StrategyADIOS; + strategy = std::unique_ptr(dynamic_cast(new type)); + break; + } + case WriteSpeciesStrategy::HDF5: + { + using type = StrategyHDF5; + strategy = std::unique_ptr(dynamic_cast(new type)); + break; + } + } + + + /* count total number of particles on the device */ + log("openPMD: (begin) count particles: %1%") % T_SpeciesFilter::getName(); + uint64_cu const myNumParticles = pmacc::CountParticles::countOnDevice( + *speciesTmp, + *(params->cellDescription), + params->localWindowToDomainOffset, + params->window.localDimensions.size, + particleFilter); + uint64_t allNumParticles[mpiSize]; + uint64_t globalNumParticles = 0; + uint64_t myParticleOffset = 0; + + // avoid deadlock between not finished pmacc tasks and mpi blocking + // collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather( + &myNumParticles, + 1, + MPI_UNSIGNED_LONG_LONG, + allNumParticles, + 1, + MPI_UNSIGNED_LONG_LONG, + gc.getCommunicator().getMPIComm())); + + for(uint64_t i = 0; i < mpiSize; ++i) + { + globalNumParticles += allNumParticles[i]; + if(i < mpiRank) + myParticleOffset += allNumParticles[i]; + } + log("openPMD: ( end ) count particles: %1% = %2%") % T_SpeciesFilter::getName() + % globalNumParticles; + + ::openPMD::ParticleSpecies& particleSpecies = iteration.particles[speciesGroup]; + + // copy over particles to host + openPMDFrameType hostFrame; + + strategy->malloc(T_SpeciesFilter::getName(), hostFrame, myNumParticles); + RunParameters_T runParameters( + dc, + *params, + speciesTmp, + filter, + particleFilter, + particleOffset, + myNumParticles, + globalNumParticles); + if(globalNumParticles > 0) + { + strategy->prepare(T_SpeciesFilter::getName(), hostFrame, std::move(runParameters)); + } + log("openPMD: (begin) write particle records for %1%") + % T_SpeciesFilter::getName(); + + meta::ForEach> + writeToOpenPMD; + writeToOpenPMD( + params, + hostFrame, + particleSpecies, + basename, + myNumParticles, + globalNumParticles, + myParticleOffset); + + log("openPMD: (begin) free memory: %1%") % T_SpeciesFilter::getName(); + /* free host memory */ + strategy->free(hostFrame); + log("openPMD: (end) free memory: %1%") % T_SpeciesFilter::getName(); + + log("openPMD: ( end ) writing species: %1%") % T_SpeciesFilter::getName(); + + /* write species counter table to openPMD storage */ + log("openPMD: (begin) writing particle patches for %1%") + % T_SpeciesFilter::getName(); + { + using index_t = uint64_t; + ::openPMD::Datatype const datatype = ::openPMD::determineDatatype(); + // not const, we'll switch out the JSON config + ::openPMD::Dataset ds(datatype, {mpiSize}); + + ::openPMD::ParticlePatches particlePatches = particleSpecies.particlePatches; + ::openPMD::PatchRecordComponent numParticles + = particlePatches["numParticles"][::openPMD::RecordComponent::SCALAR]; + ::openPMD::PatchRecordComponent numParticlesOffset + = particlePatches["numParticlesOffset"][::openPMD::RecordComponent::SCALAR]; + + setDatasetOptions(ds, params->jsonMatcher->get(basename + "/particlePatches/numParticles")); + numParticles.resetDataset(ds); + setDatasetOptions(ds, params->jsonMatcher->get(basename + "/particlePatches/numParticlesOffset")); + numParticlesOffset.resetDataset(ds); + + /* It is safe to use the mpi rank to write the data even if the rank can differ between simulation + * runs. During the restart the plugin is using patch information to find the corresponding data. + */ + numParticles.store(mpiRank, myNumParticles); + numParticlesOffset.store(mpiRank, myParticleOffset); + + ::openPMD::PatchRecord offset = particlePatches["offset"]; + ::openPMD::PatchRecord extent = particlePatches["extent"]; + auto const patchExtent = params->window.localDimensions.size; + + for(size_t d = 0; d < simDim; ++d) + { + ::openPMD::PatchRecordComponent offset_x = offset[name_lookup[d]]; + ::openPMD::PatchRecordComponent extent_x = extent[name_lookup[d]]; + setDatasetOptions( + ds, + params->jsonMatcher->get(basename + "/particlePatches/offset/" + name_lookup[d])); + offset_x.resetDataset(ds); + setDatasetOptions( + ds, + params->jsonMatcher->get(basename + "/particlePatches/extent/" + name_lookup[d])); + extent_x.resetDataset(ds); + + offset_x.store(mpiRank, particleOffset[d]); + extent_x.store(mpiRank, patchExtent[d]); + } + + /* openPMD ED-PIC: additional attributes */ + setParticleAttributes( + particleSpecies, + *params->jsonMatcher, + series.particlesPath() + speciesGroup); + params->openPMDSeries->flush(); + } + + log("openPMD: ( end ) writing particle patches for %1%") + % T_SpeciesFilter::getName(); + } + }; + + + } // namespace openPMD + +} // namespace picongpu diff --git a/include/picongpu/plugins/openPMD/openPMDDimension.hpp b/include/picongpu/plugins/openPMD/openPMDDimension.hpp new file mode 100644 index 0000000000..acc86d2032 --- /dev/null +++ b/include/picongpu/plugins/openPMD/openPMDDimension.hpp @@ -0,0 +1,58 @@ +/* Copyright 2014-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, + * Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include + +#include +#include + +namespace picongpu +{ + namespace openPMD + { + /** convert PIConGPU dimension unit into a corresponding openPMD map + * + * @param unitDimension PIConGPU dimension vector + * @return openPMD-api dimension map + */ + inline auto convertToUnitDimension(std::vector const& unitDimension) + { + PMACC_ASSERT(unitDimension.size() == 7); // seven openPMD base units + constexpr ::openPMD::UnitDimension openPMDUnitDimensions[7] + = {::openPMD::UnitDimension::L, + ::openPMD::UnitDimension::M, + ::openPMD::UnitDimension::T, + ::openPMD::UnitDimension::I, + ::openPMD::UnitDimension::theta, + ::openPMD::UnitDimension::N, + ::openPMD::UnitDimension::J}; + std::map<::openPMD::UnitDimension, double> unitMap; + for(unsigned i = 0; i < 7; ++i) + { + unitMap[openPMDUnitDimensions[i]] = unitDimension[i]; + } + + return unitMap; + } + } // namespace openPMD +} // namespace picongpu diff --git a/include/picongpu/plugins/openPMD/openPMDVersion.def b/include/picongpu/plugins/openPMD/openPMDVersion.def new file mode 100644 index 0000000000..b38f8dea71 --- /dev/null +++ b/include/picongpu/plugins/openPMD/openPMDVersion.def @@ -0,0 +1,75 @@ +/* Copyright 2020-2021 Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ +#pragma once + +#include "openPMD/openPMD.hpp" + +#if OPENPMDAPI_VERSION_GE(0, 13, 0) +// Streaming API is available, use it +# define WRITE_ITERATIONS writeIterations() +#else +// Not available, don't use it +# define WRITE_ITERATIONS iterations +#endif + +namespace picongpu +{ + namespace openPMD + { + /* + * Do some SFINAE tricks to detect whether the openPMD API has + * dataset-specific configuration or not. + */ + namespace detail + { + // As std::void_t in C++17. + template + using void_t = void; + + template + struct SetDatasetOptions + { + static void run(::openPMD::Dataset const&, std::string const& options) + { + if(options != "{}") + { + std::cerr + << "[openPMD plugin] Setting dataset-specific JSON options requires openPMD API 0.13.0 " + "or later." + << std::endl; + } + } + }; + + template + struct SetDatasetOptions> + { + static void run(Dataset& ds, std::string options) + { + ds.options = std::move(options); + } + }; + } // namespace detail + + void setDatasetOptions(::openPMD::Dataset& ds, std::string options) + { + detail::SetDatasetOptions<>::run(ds, std::move(options)); + } + } // namespace openPMD +} // namespace picongpu diff --git a/include/picongpu/plugins/openPMD/openPMDWriter.def b/include/picongpu/plugins/openPMD/openPMDWriter.def new file mode 100644 index 0000000000..6ed571f1cd --- /dev/null +++ b/include/picongpu/plugins/openPMD/openPMDWriter.def @@ -0,0 +1,138 @@ +/* Copyright 2014-2021 Felix Schmitt, Axel Huebl, Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation/control/MovingWindow.hpp" +#include "picongpu/simulation_defines.hpp" + +#include "picongpu/plugins/openPMD/Json.hpp" + +#include +#include +#include + +#include + +#include // std::cerr +#include +#include +#include // std::unique_ptr +#include +#include // throw std::runtime_error +#include + +#include + +namespace picongpu +{ + namespace openPMD + { + using namespace pmacc; + using AbstractJsonMatcher = json::AbstractJsonMatcher; + + + namespace po = boost::program_options; + + +#define MESHES_PATH "fields" +#define PARTICLES_PATH "particles" + + template::type::type>> + T_Ret asStandardVector(T_Vec const&); + + enum class WriteSpeciesStrategy + { + ADIOS, + HDF5 + }; + + + /** + * Writes simulation data to openPMD series. + * Implements the ILightweightPlugin interface. + */ + + class openPMDWriter; + class Help; + + struct ThreadParams + { + uint32_t currentStep; /** current simulation step */ + + + std::unique_ptr<::openPMD::Series> openPMDSeries; /* is null iff there is no series currently open */ + + /** current dump is a checkpoint */ + bool isCheckpoint; + + MPI_Comm communicator; /* MPI communicator for openPMD API */ + std::string compressionMethod; /* openPMD data transform compression method */ + std::string fileName; /* Name of the openPMDSeries, excluding the extension */ + std::string fileExtension; /* Extension of the file name */ + std::string fileInfix; + + std::unique_ptr jsonMatcher; + + WriteSpeciesStrategy strategy = WriteSpeciesStrategy::ADIOS; + + pmacc::math::UInt64 fieldsSizeDims; + pmacc::math::UInt64 fieldsGlobalSizeDims; + pmacc::math::UInt64 fieldsOffsetDims; + + GridLayout gridLayout; + MappingDesc* cellDescription; + + std::vector fieldBuffer; /* temp. buffer for fields */ + + Window window; /* window describing the volume to be dumped */ + + DataSpace localWindowToDomainOffset; /** offset from local moving + window to local domain */ + + std::vector times; + + ::openPMD::Series& openSeries(::openPMD::Access at); + + void closeSeries(); + + void initFromConfig(Help&, size_t id, std::string const& file, std::string const& dir); + + /** + * Wrapper for ::openPMD::resetDataset, set dataset parameters + * @tparam DIM number of variable dimensions + * @param recordComponent Location of the dataset within the openPMD + * Series + * @param datatype Variable type + * @param globalDimensions Dataset global dimensions + * @param compression Enable compression data transform + * @param compressionMethod String denoting the data transform to use + * @return The input recordComponent + */ + template + ::openPMD::RecordComponent& initDataset( + ::openPMD::RecordComponent& recordComponent, + ::openPMD::Datatype datatype, + pmacc::math::UInt64 const& globalDimensions, + bool compression, + std::string const& compressionMethod, + std::string const& datasetName); + }; + } // namespace openPMD +} // namespace picongpu diff --git a/include/picongpu/plugins/openPMD/openPMDWriter.hpp b/include/picongpu/plugins/openPMD/openPMDWriter.hpp new file mode 100644 index 0000000000..542bbb7779 --- /dev/null +++ b/include/picongpu/plugins/openPMD/openPMDWriter.hpp @@ -0,0 +1,1260 @@ +/* Copyright 2014-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, + * Benjamin Worpitz, Alexander Grund, Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/fields/FieldB.hpp" +#include "picongpu/fields/FieldE.hpp" +#include "picongpu/fields/FieldJ.hpp" +#include "picongpu/fields/FieldTmp.hpp" +#include "picongpu/particles/filter/filter.hpp" +#include "picongpu/particles/traits/SpeciesEligibleForSolver.hpp" +#include "picongpu/plugins/misc/ComponentNames.hpp" +#include "picongpu/plugins/misc/SpeciesFilter.hpp" +#include "picongpu/plugins/misc/misc.hpp" +#include "picongpu/plugins/multi/IHelp.hpp" +#include "picongpu/plugins/multi/Option.hpp" +#include "picongpu/plugins/openPMD/openPMDWriter.def" +#include "picongpu/simulation/control/MovingWindow.hpp" +#include "picongpu/simulation_defines.hpp" +#include "picongpu/traits/IsFieldDomainBound.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "picongpu/plugins/misc/SpeciesFilter.hpp" +#include "picongpu/plugins/openPMD/Json.hpp" +#include "picongpu/plugins/openPMD/NDScalars.hpp" +#include "picongpu/plugins/openPMD/WriteMeta.hpp" +#include "picongpu/plugins/openPMD/openPMDVersion.def" +#include "picongpu/plugins/openPMD/WriteSpecies.hpp" +#include "picongpu/plugins/openPMD/restart/LoadSpecies.hpp" +#include "picongpu/plugins/openPMD/restart/RestartFieldLoader.hpp" +#include "picongpu/plugins/output/IIOBackend.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#if !defined(_WIN32) +# include +#endif + +#include +#include +#include // getenv +#include +#include +#include +#include +#include + + +namespace picongpu +{ + namespace openPMD + { + using namespace pmacc; + + + namespace po = boost::program_options; + + template + ::openPMD::RecordComponent& ThreadParams::initDataset( + ::openPMD::RecordComponent& recordComponent, + ::openPMD::Datatype datatype, + pmacc::math::UInt64 const& globalDimensions, + bool compression, + std::string const& compressionMethod, + std::string const& datasetName) + { + std::vector v = asStandardVector(globalDimensions); + ::openPMD::Dataset dataset{datatype, std::move(v)}; + setDatasetOptions(dataset, jsonMatcher->get(datasetName)); + if(compression && compressionMethod != "none") + { + dataset.compression = compressionMethod; + } + recordComponent.resetDataset(std::move(dataset)); + return recordComponent; + } + + + template + T_Ret asStandardVector(T_Vec const& v) + { + using __T_Vec = typename std::remove_reference::type; + constexpr auto dim = __T_Vec::dim; + T_Ret res(dim); + for(unsigned i = 0; i < dim; ++i) + { + res[dim - i - 1] = v[i]; + } + return res; + } + + ::openPMD::Series& ThreadParams::openSeries(::openPMD::Access at) + { + if(!openPMDSeries) + { + std::string fullName = fileName + fileInfix + "." + fileExtension; + log("openPMD: open file: %1%") % fullName; + // avoid deadlock between not finished pmacc tasks and mpi calls in + // openPMD + __getTransactionEvent().waitForFinished(); + openPMDSeries + = std::make_unique<::openPMD::Series>(fullName, at, communicator, jsonMatcher->getDefault()); + if(openPMDSeries->backend() == "MPI_ADIOS1") + { + throw std::runtime_error(R"END( +Using ADIOS1 through PIConGPU's openPMD plugin is not supported. +Please pick either of the following: +* Use the ADIOS plugin. +* Use the openPMD plugin with another backend, such as ADIOS2. + If the openPMD API has been compiled with support for ADIOS2, the openPMD API + will automatically prefer using ADIOS2 over ADIOS1. + Make sure that environment variable OPENPMD_BP_BACKEND is not set to ADIOS1. + )END"); + } + if(at == ::openPMD::Access::CREATE) + { + openPMDSeries->setMeshesPath(MESHES_PATH); + openPMDSeries->setParticlesPath(PARTICLES_PATH); + } + log("openPMD: successfully opened file: %1%") % fullName; + return *openPMDSeries; + } + else + { + throw std::runtime_error("openPMD: Tried opening a Series while old Series was still " + "active"); + } + } + + void ThreadParams::closeSeries() + { + if(openPMDSeries) + { + log("openPMD: close file: %1%") % fileName; + openPMDSeries.reset(); + MPI_Barrier(this->communicator); + log("openPMD: successfully closed file: %1%") % fileName; + } + else + { + throw std::runtime_error("openPMD: Tried closing a Series that was not active"); + } + } + + + struct Help : public plugins::multi::IHelp + { + /** creates a instance of ISlave + * + * @param help plugin defined help + * @param id index of the plugin, range: [0;help->getNumPlugins()) + */ + std::shared_ptr create( + std::shared_ptr& help, + size_t const id, + MappingDesc* cellDescription); + // defined later since we need openPMDWriter constructor + + plugins::multi::Option notifyPeriod = {"period", "enable openPMD IO [for each n-th step]"}; + + plugins::multi::Option source = {"source", "data sources: ", "species_all, fields_all"}; + + std::vector allowedDataSources = {"species_all", "fields_all"}; + + plugins::multi::Option fileName = {"file", "openPMD file basename"}; + + plugins::multi::Option fileNameExtension + = {"ext", + "openPMD filename extension (this controls the" + "backend picked by the openPMD API)", + "bp"}; + + plugins::multi::Option fileNameInfix + = {"infix", + "openPMD filename infix (use to pick file- or group-based " + "layout in openPMD)\nSet to NULL to keep empty (e.g. to pick" + " group-based iteration layout). Parameter will be ignored" + " if a streaming backend is detected in 'ext' parameter and" + " an empty string will be assumed instead.", + "_%06T"}; + + plugins::multi::Option jsonConfig + = {"json", "advanced (backend) configuration for openPMD in JSON format", "{}"}; + + plugins::multi::Option dataPreparationStrategy + = {"dataPreparationStrategy", + "Strategy for preparation of particle data ('doubleBuffer' or " + "'mappedMemory'). Aliases 'adios' and 'hdf5' may be used " + "respectively.", + "doubleBuffer"}; + + plugins::multi::Option compression + = {"compression", + "Backend-specific openPMD compression method, e.g., zlib (see " + "`adios_config -m` for help). Legacy parameter until compression" + " can be fully configured via JSON in the openPMD API.", + "none"}; + + /** defines if the plugin must register itself to the PMacc plugin + * system + * + * true = the plugin is registering it self + * false = the plugin is not registering itself (plugin is + * controlled by another class) + */ + bool selfRegister = false; + + template + struct CreateSpeciesFilter + { + using type = plugins::misc::SpeciesFilter< + typename pmacc::math::CT::At>::type, + typename pmacc::math::CT::At>::type>; + }; + + using AllParticlesTimesAllFilters = typename AllCombinations< + bmpl::vector>::type; + + using AllSpeciesFilter = + typename bmpl::transform>::type; + + using AllEligibleSpeciesSources = + typename bmpl::copy_if>::type; + + using AllFieldSources = FileOutputFields; + + ///! method used by plugin controller to get --help description + void registerHelp( + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) + { + meta::ForEach> + getEligibleDataSourceNames; + getEligibleDataSourceNames(allowedDataSources); + + meta::ForEach> appendFieldSourceNames; + appendFieldSourceNames(allowedDataSources); + + // string list with all possible particle sources + std::string concatenatedSourceNames = plugins::misc::concatenateToString(allowedDataSources, ", "); + + notifyPeriod.registerHelp(desc, masterPrefix + prefix); + source.registerHelp(desc, masterPrefix + prefix, std::string("[") + concatenatedSourceNames + "]"); + + expandHelp(desc, ""); + selfRegister = true; + } + + void expandHelp( + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) + { + compression.registerHelp(desc, masterPrefix + prefix); + fileName.registerHelp(desc, masterPrefix + prefix); + fileNameExtension.registerHelp(desc, masterPrefix + prefix); + fileNameInfix.registerHelp(desc, masterPrefix + prefix); + jsonConfig.registerHelp(desc, masterPrefix + prefix); + dataPreparationStrategy.registerHelp(desc, masterPrefix + prefix); + } + + void validateOptions() + { + if(selfRegister) + { + if(notifyPeriod.empty() || fileName.empty()) + throw std::runtime_error(name + ": parameter period and file must be defined"); + + // check if user passed data source names are valid + for(auto const& dataSourceNames : source) + { + auto vectorOfDataSourceNames + = plugins::misc::splitString(plugins::misc::removeSpaces(dataSourceNames)); + + for(auto const& f : vectorOfDataSourceNames) + { + if(!plugins::misc::containsObject(allowedDataSources, f)) + { + throw std::runtime_error(name + ": unknown data source '" + f + "'"); + } + } + } + } + } + + size_t getNumPlugins() const + { + if(selfRegister) + return notifyPeriod.size(); + else + return 1; + } + + std::string getDescription() const + { + return description; + } + + std::string getOptionPrefix() const + { + return prefix; + } + + std::string getName() const + { + return name; + } + + std::string const name = "openPMDWriter"; + //! short description of the plugin + std::string const description = "dump simulation data with openPMD"; + //! prefix used for command line arguments + std::string const prefix = "openPMD"; + }; + + void ThreadParams::initFromConfig(Help& help, size_t id, std::string const& file, std::string const& dir) + { + fileExtension = help.fileNameExtension.get(id); + fileInfix = help.fileNameInfix.get(id); + /* + * Enforce group-based iteration layout for streaming backends + */ + if(fileInfix == "NULL" || fileExtension == "sst") + { + fileInfix = ""; + } + /* if file name is relative, prepend with common directory */ + fileName = boost::filesystem::path(file).has_root_path() ? file : dir + "/" + file; + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + + log("openPMD: setting file pattern: %1%%2%.%3%") % fileName % fileInfix + % fileExtension; + + // Avoid repeatedly parsing the JSON config + if(!jsonMatcher) + { + jsonMatcher = AbstractJsonMatcher::construct(help.jsonConfig.get(id), communicator); + } + + log("openPMD: global JSON config: %1%") % jsonMatcher->getDefault(); + + { + std::string strategyString = help.dataPreparationStrategy.get(id); + if(strategyString == "adios" || strategyString == "doubleBuffer") + { + strategy = WriteSpeciesStrategy::ADIOS; + } + else if(strategyString == "hdf5" || strategyString == "mappedMemory") + { + strategy = WriteSpeciesStrategy::HDF5; + } + else + { + std::cerr << "Passed dataPreparationStrategy for openPMD" + " plugin is invalid." + << std::endl; + } + } + } + + /** Writes simulation data to openPMD. + * + * Implements the IIOBackend interface. + */ + class openPMDWriter : public IIOBackend + { + public: + //! must be implemented by the user + static std::shared_ptr getHelp() + { + return std::shared_ptr(new Help{}); + } + + private: + template + static std::vector createUnit(UnitType unit, uint32_t numComponents) + { + std::vector tmp(numComponents); + for(uint32_t i = 0; i < numComponents; ++i) + tmp[i] = unit[i]; + return tmp; + } + + /** + * Write calculated fields to openPMD. + */ + template + struct GetFields + { + private: + using ValueType = typename T_Field::ValueType; + using ComponentType = typename GetComponentsType::type; + using UnitType = typename T_Field::UnitValueType; + + public: + static std::vector getUnit() + { + UnitType unit = T_Field::getUnit(); + return createUnit(unit, T_Field::numComponents); + } + + HDINLINE void operator()(ThreadParams* params) + { +#ifndef __CUDA_ARCH__ + DataConnector& dc = Environment::get().DataConnector(); + + auto field = dc.get(T_Field::getName()); + params->gridLayout = field->getGridLayout(); + bool const isDomainBound = traits::IsFieldDomainBound::value; + + const traits::FieldPosition fieldPos; + + std::vector> inCellPosition; + for(uint32_t n = 0; n < T_Field::numComponents; ++n) + { + std::vector inCellPositonComponent; + for(uint32_t d = 0; d < simDim; ++d) + inCellPositonComponent.push_back(fieldPos()[n][d]); + inCellPosition.push_back(inCellPositonComponent); + } + + /** \todo check if always correct at this point, depends on + * solver implementation */ + const float_X timeOffset = 0.0; + + openPMDWriter::writeField( + params, + sizeof(ComponentType), + ::openPMD::determineDatatype(), + GetNComponents::value, + T_Field::getName(), + field->getHostDataBox().getPointer(), + getUnit(), + T_Field::getUnitDimension(), + std::move(inCellPosition), + timeOffset, + isDomainBound); + + dc.releaseData(T_Field::getName()); +#endif + } + }; + + /** Calculate FieldTmp with given solver and particle species + * and write them to openPMD. + * + * FieldTmp is calculated on device and then dumped to openPMD. + */ + template + struct GetFields> + { + /* + * This is only a wrapper function to allow disable nvcc warnings. + * Warning: calling a __host__ function from __host__ __device__ + * function. + * Use of PMACC_NO_NVCC_HDWARNING is not possible if we call a + * virtual method inside of the method were we disable the warnings. + * Therefore we create this method and call a new method were we can + * call virtual functions. + */ + PMACC_NO_NVCC_HDWARNING + HDINLINE void operator()(ThreadParams* tparam) + { + this->operator_impl(tparam); + } + + private: + using UnitType = typename FieldTmp::UnitValueType; + using ValueType = typename FieldTmp::ValueType; + using ComponentType = typename GetComponentsType::type; + + /** Get the unit for the result from the solver*/ + static std::vector getUnit() + { + UnitType unit = FieldTmp::getUnit(); + const uint32_t components = GetNComponents::value; + return createUnit(unit, components); + } + + /** Create a name for the openPMD identifier. + */ + static std::string getName() + { + return FieldTmpOperation::getName(); + } + + HINLINE void operator_impl(ThreadParams* params) + { + DataConnector& dc = Environment<>::get().DataConnector(); + + /*## update field ##*/ + + /*load FieldTmp without copy data to host*/ + PMACC_CASSERT_MSG(_please_allocate_at_least_one_FieldTmp_in_memory_param, fieldTmpNumSlots > 0); + auto fieldTmp = dc.get(FieldTmp::getUniqueId(0), true); + /*load particle without copy particle data to host*/ + auto speciesTmp = dc.get(Species::FrameType::getName(), true); + + fieldTmp->getGridBuffer().getDeviceBuffer().setValue(ValueType::create(0.0)); + /*run algorithm*/ + fieldTmp->template computeValue(*speciesTmp, params->currentStep); + + EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); + __setTransactionEvent(fieldTmpEvent); + /* copy data to host that we can write same to disk*/ + fieldTmp->getGridBuffer().deviceToHost(); + dc.releaseData(Species::FrameType::getName()); + /*## finish update field ##*/ + + const uint32_t components = GetNComponents::value; + + /*wrap in a one-component vector for writeField API*/ + const traits::FieldPosition fieldPos; + + std::vector> inCellPosition; + std::vector inCellPositonComponent; + for(uint32_t d = 0; d < simDim; ++d) + inCellPositonComponent.push_back(fieldPos()[0][d]); + inCellPosition.push_back(inCellPositonComponent); + + /** \todo check if always correct at this point, depends on + * solver implementation */ + const float_X timeOffset = 0.0; + + params->gridLayout = fieldTmp->getGridLayout(); + bool const isDomainBound = traits::IsFieldDomainBound::value; + /*write data to openPMD Series*/ + openPMDWriter::template writeField( + params, + sizeof(ComponentType), + ::openPMD::determineDatatype(), + components, + getName(), + fieldTmp->getHostDataBox().getPointer(), + getUnit(), + FieldTmp::getUnitDimension(), + std::move(inCellPosition), + timeOffset, + isDomainBound); + + dc.releaseData(FieldTmp::getUniqueId(0)); + } + }; + + public: + /** constructor + * + * @param help instance of the class Help + * @param id index of this plugin instance within help + * @param cellDescription PIConGPu cell description information for + * kernel index mapping + */ + openPMDWriter(std::shared_ptr& help, size_t const id, MappingDesc* cellDescription) + : m_help(std::static_pointer_cast(help)) + , m_id(id) + , m_cellDescription(cellDescription) + , outputDirectory("openPMD") + , lastSpeciesSyncStep(pmacc::traits::limits::Max::value) + { + mThreadParams.compressionMethod = m_help->compression.get(id); + + GridController& gc = Environment::get().GridController(); + /* It is important that we never change the mpi_pos after this point + * because we get problems with the restart. + * Otherwise we do not know which gpu must load the ghost parts + * around the sliding window. + */ + mpi_pos = gc.getPosition(); + mpi_size = gc.getGpuNodes(); + + if(m_help->selfRegister) + { + std::string notifyPeriod = m_help->notifyPeriod.get(id); + /* only register for notify callback when .period is set on + * command line */ + if(!notifyPeriod.empty()) + { + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + + /** create notify directory */ + Environment::get().Filesystem().createDirectoryWithPermissions(outputDirectory); + } + } + + // avoid deadlock between not finished pmacc tasks and mpi blocking + // collectives + __getTransactionEvent().waitForFinished(); + mThreadParams.communicator = MPI_COMM_NULL; + MPI_CHECK(MPI_Comm_dup(gc.getCommunicator().getMPIComm(), &(mThreadParams.communicator))); + } + + virtual ~openPMDWriter() + { + if(mThreadParams.communicator != MPI_COMM_NULL) + { + // avoid deadlock between not finished pmacc tasks and mpi + // blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK_NO_EXCEPT(MPI_Comm_free(&(mThreadParams.communicator))); + } + } + + void notify(uint32_t currentStep) + { + // notify is only allowed if the plugin is not controlled by the + // class Checkpoint + assert(m_help->selfRegister); + + __getTransactionEvent().waitForFinished(); + + mThreadParams.initFromConfig(*m_help, m_id, m_help->fileName.get(m_id), outputDirectory); + + /* window selection */ + mThreadParams.window = MovingWindow::getInstance().getWindow(currentStep); + mThreadParams.isCheckpoint = false; + dumpData(currentStep); + } + + virtual void restart(uint32_t restartStep, std::string const& restartDirectory) + { + /* ISlave restart interface is not needed becase IIOBackend + * restart interface is used + */ + } + + virtual void checkpoint(uint32_t currentStep, std::string const& checkpointDirectory) + { + /* ISlave checkpoint interface is not needed becase IIOBackend + * checkpoint interface is used + */ + } + + void dumpCheckpoint( + const uint32_t currentStep, + const std::string& checkpointDirectory, + const std::string& checkpointFilename) + { + // checkpointing is only allowed if the plugin is controlled by the + // class Checkpoint + assert(!m_help->selfRegister); + + __getTransactionEvent().waitForFinished(); + /* if file name is relative, prepend with common directory */ + + mThreadParams.isCheckpoint = true; + mThreadParams.initFromConfig(*m_help, m_id, checkpointFilename, checkpointDirectory); + + mThreadParams.window = MovingWindow::getInstance().getDomainAsWindow(currentStep); + + dumpData(currentStep); + } + + void doRestart( + const uint32_t restartStep, + const std::string& restartDirectory, + const std::string& constRestartFilename, + const uint32_t restartChunkSize) + { + // restart is only allowed if the plugin is controlled by the class + // Checkpoint + assert(!m_help->selfRegister); + + mThreadParams.initFromConfig(*m_help, m_id, constRestartFilename, restartDirectory); + + // mThreadParams.isCheckpoint = isCheckpoint; + mThreadParams.currentStep = restartStep; + mThreadParams.cellDescription = m_cellDescription; + + mThreadParams.openSeries(::openPMD::Access::READ_ONLY); + + ::openPMD::Iteration iteration = mThreadParams.openPMDSeries->iterations[mThreadParams.currentStep]; + + /* load number of slides to initialize MovingWindow */ + log("openPMD: (begin) read attr (%1% available)") % iteration.numAttributes(); + + + uint32_t slides = iteration.getAttribute("sim_slides").get(); + log("openPMD: value of sim_slides = %1%") % slides; + + uint32_t lastStep = iteration.getAttribute("iteration").get(); + log("openPMD: value of iteration = %1%") % lastStep; + + PMACC_ASSERT(lastStep == restartStep); + + /* apply slides to set gpus to last/written configuration */ + log("openPMD: Setting slide count for moving window to %1%") % slides; + MovingWindow::getInstance().setSlideCounter(slides, restartStep); + + /* re-distribute the local offsets in y-direction + * this will work for restarts with moving window still enabled + * and restarts that disable the moving window + * \warning enabling the moving window from a checkpoint that + * had no moving window will not work + */ + GridController& gc = Environment::get().GridController(); + gc.setStateAfterSlides(slides); + + /* set window for restart, complete global domain */ + mThreadParams.window = MovingWindow::getInstance().getDomainAsWindow(restartStep); + mThreadParams.localWindowToDomainOffset = DataSpace::create(0); + + /* load all fields */ + meta::ForEach> ForEachLoadFields; + ForEachLoadFields(&mThreadParams); + + /* load all particles */ + meta::ForEach> ForEachLoadSpecies; + ForEachLoadSpecies(&mThreadParams, restartChunkSize); + + IdProvider::State idProvState; + ReadNDScalars()( + mThreadParams, + "picongpu", + "idProvider", + "startId", + &idProvState.startId, + "maxNumProc", + &idProvState.maxNumProc); + ReadNDScalars()(mThreadParams, "picongpu", "idProvider", "nextId", &idProvState.nextId); + log("Setting next free id on current rank: %1%") % idProvState.nextId; + IdProvider::setState(idProvState); + + // avoid deadlock between not finished pmacc tasks and mpi calls in + // openPMD + __getTransactionEvent().waitForFinished(); + + // Finalize the openPMD Series by calling its destructor + mThreadParams.closeSeries(); + } + + private: + void endWrite() + { + mThreadParams.fieldBuffer.resize(0); + } + + void initWrite() + { + // may be zero + auto size = mThreadParams.window.localDimensions.size.productOfComponents(); + mThreadParams.fieldBuffer.resize(size); + } + + /** + * Notification for dump or checkpoint received + * + * @param currentStep current simulation step + */ + void dumpData(uint32_t currentStep) + { + // local offset + extent + const pmacc::Selection localDomain = Environment::get().SubGrid().getLocalDomain(); + mThreadParams.cellDescription = m_cellDescription; + mThreadParams.currentStep = currentStep; + + for(uint32_t i = 0; i < simDim; ++i) + { + mThreadParams.localWindowToDomainOffset[i] = 0; + if(mThreadParams.window.globalDimensions.offset[i] > localDomain.offset[i]) + { + mThreadParams.localWindowToDomainOffset[i] + = mThreadParams.window.globalDimensions.offset[i] - localDomain.offset[i]; + } + } + + /* copy species only one time per timestep to the host */ + if(mThreadParams.strategy == WriteSpeciesStrategy::ADIOS && lastSpeciesSyncStep != currentStep) + { + DataConnector& dc = Environment<>::get().DataConnector(); + + /* synchronizes the MallocMCBuffer to the host side */ + dc.get>(MallocMCBuffer::getName()); + + /* here we are copying all species to the host side since we + * can not say at this point if this time step will need all of + * them for sure (checkpoint) or just some user-defined species + * (dump) + */ + meta::ForEach> copySpeciesToHost; + copySpeciesToHost(); + lastSpeciesSyncStep = currentStep; + + dc.releaseData(MallocMCBuffer::getName()); + } + + TimeIntervall timer; + timer.toggleStart(); + initWrite(); + + write(&mThreadParams, mpiTransportParams); + + endWrite(); + timer.toggleEnd(); + double interval = timer.getInterval(); + mThreadParams.times.push_back(interval); + double average = std::accumulate(mThreadParams.times.begin(), mThreadParams.times.end(), 0); + average /= mThreadParams.times.size(); + log("openPMD: IO plugin ran for %1% (average: %2%)") % timer.printeTime(interval) + % timer.printeTime(average); + } + + static void writeFieldAttributes( + ThreadParams* params, + std::vector const& unitDimension, + float_X timeOffset, + ::openPMD::Mesh& mesh) + { + static constexpr ::openPMD::UnitDimension openPMDUnitDimensions[7] + = {::openPMD::UnitDimension::L, + ::openPMD::UnitDimension::M, + ::openPMD::UnitDimension::T, + ::openPMD::UnitDimension::I, + ::openPMD::UnitDimension::theta, + ::openPMD::UnitDimension::N, + ::openPMD::UnitDimension::J}; + std::map<::openPMD::UnitDimension, double> unitMap; + for(unsigned i = 0; i < 7; ++i) + { + unitMap[openPMDUnitDimensions[i]] = unitDimension[i]; + } + + mesh.setUnitDimension(unitMap); + mesh.setTimeOffset(timeOffset); + mesh.setGeometry(::openPMD::Mesh::Geometry::cartesian); + mesh.setDataOrder(::openPMD::Mesh::DataOrder::C); + + if(simDim == DIM2) + { + std::vector axisLabels = {"y", "x"}; // 2D: F[y][x] + mesh.setAxisLabels(axisLabels); + } + if(simDim == DIM3) + { + std::vector axisLabels = {"z", "y", "x"}; // 3D: F[z][y][x] + mesh.setAxisLabels(axisLabels); + } + + // cellSize is {x, y, z} but fields are F[z][y][x] + std::vector gridSpacing(simDim, 0.0); + for(uint32_t d = 0; d < simDim; ++d) + gridSpacing.at(simDim - 1 - d) = cellSize[d]; + + mesh.setGridSpacing(gridSpacing); + + /* globalSlideOffset due to gpu slides between origin at time step 0 + * and origin at current time step + * ATTENTION: splash offset are globalSlideOffset + picongpu offsets + */ + DataSpace globalSlideOffset; + const pmacc::Selection localDomain = Environment::get().SubGrid().getLocalDomain(); + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(params->currentStep); + globalSlideOffset.y() += numSlides * localDomain.size.y(); + + // globalDimensions is {x, y, z} but fields are F[z][y][x] + std::vector gridGlobalOffset(simDim, 0.0); + for(uint32_t d = 0; d < simDim; ++d) + gridGlobalOffset.at(simDim - 1 - d) = float_64(cellSize[d]) + * float_64(params->window.globalDimensions.offset[d] + globalSlideOffset[d]); + + mesh.setGridGlobalOffset(std::move(gridGlobalOffset)); + mesh.setGridUnitSI(UNIT_LENGTH); + mesh.setAttribute("fieldSmoothing", "none"); + } + + template + static void writeField( + ThreadParams* params, + const uint32_t sizePtrType, + ::openPMD::Datatype openPMDType, + const uint32_t nComponents, + const std::string name, + void* ptr, + std::vector unit, + std::vector unitDimension, + std::vector> inCellPosition, + float_X timeOffset, + bool isDomainBound) + { + auto const name_lookup_tpl = plugins::misc::getComponentNames(nComponents); + + /* parameter checking */ + PMACC_ASSERT(unit.size() == nComponents); + PMACC_ASSERT(inCellPosition.size() == nComponents); + for(uint32_t n = 0; n < nComponents; ++n) + PMACC_ASSERT(inCellPosition.at(n).size() == simDim); + PMACC_ASSERT(unitDimension.size() == 7); // seven openPMD base units + + log("openPMD: write field: %1% %2% %3%") % name % nComponents % ptr; + + const bool fieldTypeCorrect(boost::is_same::value); + PMACC_CASSERT_MSG(Precision_mismatch_in_Field_Components__ADIOS, fieldTypeCorrect); + + ::openPMD::Iteration iteration = params->openPMDSeries->WRITE_ITERATIONS[params->currentStep]; + ::openPMD::Mesh mesh = iteration.meshes[name]; + + // set mesh attributes + writeFieldAttributes(params, unitDimension, timeOffset, mesh); + + /* data to describe source buffer */ + GridLayout field_layout = params->gridLayout; + DataSpace field_full = field_layout.getDataSpace(); + + DataSpace field_no_guard = params->window.localDimensions.size; + DataSpace field_guard = field_layout.getGuard() + params->localWindowToDomainOffset; + std::vector& dstBuffer = params->fieldBuffer; + + auto fieldsSizeDims = params->fieldsSizeDims; + auto fieldsGlobalSizeDims = params->fieldsGlobalSizeDims; + auto fieldsOffsetDims = params->fieldsOffsetDims; + + /* Patch for non-domain-bound fields + * Allow for the output of reduced 1d PML buffer + */ + if(!isDomainBound) + { + field_no_guard = field_layout.getDataSpaceWithoutGuarding(); + field_guard = field_layout.getGuard(); + dstBuffer.resize(field_no_guard.productOfComponents()); + + DataConnector& dc = Environment<>::get().DataConnector(); + fieldsSizeDims = precisionCast(params->gridLayout.getDataSpaceWithoutGuarding()); + dc.releaseData(name); + + /* Scan the PML buffer local size along all local domains + * This code is based on the same operation in hdf5::Field::writeField(), + * the same comments apply here + */ + log("openPMD: (begin) collect PML sizes for %1%") % name; + auto& gridController = Environment::get().GridController(); + auto const numRanks = uint64_t{gridController.getGlobalSize()}; + /* Use domain position-based rank, not MPI rank, to be independent + * of the MPI rank assignment scheme + */ + auto const rank = uint64_t{gridController.getScalarPosition()}; + std::vector localSizes(2u * numRanks, 0u); + uint64_t localSizeInfo[2] = {fieldsSizeDims[0], rank}; + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather( + localSizeInfo, + 2, + MPI_UINT64_T, + &(*localSizes.begin()), + 2, + MPI_UINT64_T, + gridController.getCommunicator().getMPIComm())); + uint64_t globalOffsetFile = 0; + uint64_t globalSize = 0; + for(uint64_t r = 0; r < numRanks; ++r) + { + globalSize += localSizes.at(2u * r); + if(localSizes.at(2u * r + 1u) < rank) + globalOffsetFile += localSizes.at(2u * r); + } + log("openPMD: (end) collect PML sizes for %1%") % name; + + fieldsGlobalSizeDims = pmacc::math::UInt64::create(1); + fieldsGlobalSizeDims[0] = globalSize; + fieldsOffsetDims = pmacc::math::UInt64::create(0); + fieldsOffsetDims[0] = globalOffsetFile; + } + + /* write the actual field data */ + for(uint32_t d = 0; d < nComponents; d++) + { + const size_t plane_full_size = field_full[1] * field_full[0] * nComponents; + const size_t plane_no_guard_size = field_no_guard[1] * field_no_guard[0]; + + /* copy strided data from source to temporary buffer + * + * \todo use d1Access as in + * `include/plugins/hdf5/writer/Field.hpp` + */ + const int maxZ = simDim == DIM3 ? field_no_guard[2] : 1; + const int guardZ = simDim == DIM3 ? field_guard[2] : 0; + for(int z = 0; z < maxZ; ++z) + { + for(int y = 0; y < field_no_guard[1]; ++y) + { + const size_t base_index_src + = (z + guardZ) * plane_full_size + (y + field_guard[1]) * field_full[0] * nComponents; + + const size_t base_index_dst = z * plane_no_guard_size + y * field_no_guard[0]; + + for(int x = 0; x < field_no_guard[0]; ++x) + { + size_t index_src = base_index_src + (x + field_guard[0]) * nComponents + d; + size_t index_dst = base_index_dst + x; + + dstBuffer[index_dst] = reinterpret_cast(ptr)[index_src]; + } + } + } + + ::openPMD::MeshRecordComponent mrc + = mesh[nComponents > 1 ? name_lookup_tpl[d] : ::openPMD::RecordComponent::SCALAR]; + + std::string datasetName = nComponents > 1 + ? params->openPMDSeries->meshesPath() + name + "/" + name_lookup_tpl[d] + : params->openPMDSeries->meshesPath() + name; + + params->initDataset( + mrc, + openPMDType, + fieldsGlobalSizeDims, + true, + params->compressionMethod, + datasetName); + if(dstBuffer.size() > 0) + mrc.storeChunk>( + dstBuffer, + asStandardVector(fieldsOffsetDims), + asStandardVector(fieldsSizeDims)); + + // define record component level attributes + mrc.setPosition(inCellPosition.at(d)); + mrc.setUnitSI(unit.at(d)); + + params->openPMDSeries->flush(); + } + } + + + template + struct CallWriteSpecies + { + template + void operator()( + const std::vector& vectorOfDataSourceNames, + ThreadParams* params, + const Space domainOffset) + { + bool const containsDataSource + = plugins::misc::containsObject(vectorOfDataSourceNames, T_ParticleFilter::getName()); + + if(containsDataSource) + { + WriteSpecies writeSpecies; + writeSpecies(params, domainOffset); + } + } + }; + + template + struct CallGetFields + { + void operator()(const std::vector& vectorOfDataSourceNames, ThreadParams* params) + { + bool const containsDataSource + = plugins::misc::containsObject(vectorOfDataSourceNames, T_Fields::getName()); + + if(containsDataSource) + { + GetFields getFields; + getFields(params); + } + } + }; + + void write(ThreadParams* threadParams, std::string mpiTransportParams) + { + /* y direction can be negative for first gpu */ + const pmacc::Selection localDomain = Environment::get().SubGrid().getLocalDomain(); + DataSpace particleOffset(localDomain.offset); + particleOffset.y() -= threadParams->window.globalDimensions.offset.y(); + + threadParams->fieldsOffsetDims = precisionCast(localDomain.offset); + + /* write created variable values */ + for(uint32_t d = 0; d < simDim; ++d) + { + /* dimension 1 is y and is the direction of the moving window + * (if any) */ + if(1 == d) + { + uint64_t offset + = std::max(0, localDomain.offset.y() - threadParams->window.globalDimensions.offset.y()); + threadParams->fieldsOffsetDims[d] = offset; + } + + threadParams->fieldsSizeDims[d] = threadParams->window.localDimensions.size[d]; + threadParams->fieldsGlobalSizeDims[d] = threadParams->window.globalDimensions.size[d]; + } + + std::vector vectorOfDataSourceNames; + if(m_help->selfRegister) + { + std::string dataSourceNames = m_help->source.get(m_id); + + vectorOfDataSourceNames = plugins::misc::splitString(plugins::misc::removeSpaces(dataSourceNames)); + } + + bool dumpFields = plugins::misc::containsObject(vectorOfDataSourceNames, "fields_all"); + + if(threadParams->openPMDSeries) + { + log("openPMD: Series still open, reusing"); + // TODO check for same configuration + } + else + { + log("openPMD: opening Series %1%") % threadParams->fileName; + threadParams->openSeries(::openPMD::Access::CREATE); + } + + bool dumpAllParticles = plugins::misc::containsObject(vectorOfDataSourceNames, "species_all"); + + /* write fields */ + log("openPMD: (begin) writing fields."); + if(threadParams->isCheckpoint) + { + meta::ForEach> ForEachGetFields; + ForEachGetFields(threadParams); + } + else + { + if(dumpFields) + { + meta::ForEach> ForEachGetFields; + ForEachGetFields(threadParams); + } + + // move over all field data sources + meta::ForEach>{}( + vectorOfDataSourceNames, + threadParams); + } + log("openPMD: ( end ) writing fields."); + + + /* print all particle species */ + log("openPMD: (begin) writing particle species."); + if(threadParams->isCheckpoint) + { + meta::ForEach< + FileCheckpointParticles, + WriteSpecies< + plugins::misc::SpeciesFilter, + plugins::misc::UnfilteredSpecies>> + writeSpecies; + writeSpecies(threadParams, particleOffset); + } + else + { + // dump data if data source "species_all" is selected + if(dumpAllParticles) + { + // move over all species defined in FileOutputParticles + meta::ForEach>> + writeSpecies; + writeSpecies(threadParams, particleOffset); + } + + // move over all species data sources + meta::ForEach>{}( + vectorOfDataSourceNames, + threadParams, + particleOffset); + } + log("openPMD: ( end ) writing particle species."); + + + auto idProviderState = IdProvider::getState(); + log("openPMD: Writing IdProvider state (StartId: %1%, NextId: %2%, " + "maxNumProc: %3%)") + % idProviderState.startId % idProviderState.nextId % idProviderState.maxNumProc; + + WriteNDScalars writeIdProviderStartId( + "picongpu", + "idProvider", + "startId", + "maxNumProc"); + WriteNDScalars writeIdProviderNextId("picongpu", "idProvider", "nextId"); + writeIdProviderStartId(*threadParams, idProviderState.startId, idProviderState.maxNumProc); + writeIdProviderNextId(*threadParams, idProviderState.nextId); + + /* attributes written here are pure meta data */ + WriteMeta writeMetaAttributes; + writeMetaAttributes(threadParams); + + // avoid deadlock between not finished pmacc tasks and mpi calls in + // openPMD + __getTransactionEvent().waitForFinished(); + mThreadParams.openPMDSeries->WRITE_ITERATIONS[mThreadParams.currentStep].close(); + + return; + } + + ThreadParams mThreadParams; + + std::shared_ptr m_help; + size_t m_id; + + MappingDesc* m_cellDescription; + + std::string outputDirectory; + + /* select MPI method, #OSTs and #aggregators */ + std::string mpiTransportParams; + + uint32_t lastSpeciesSyncStep; + + DataSpace mpi_pos; + DataSpace mpi_size; + }; + + std::shared_ptr Help::create( + std::shared_ptr& help, + size_t const id, + MappingDesc* cellDescription) + { + return std::shared_ptr(new openPMDWriter(help, id, cellDescription)); + } + + } // namespace openPMD +} // namespace picongpu diff --git a/include/picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp b/include/picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp new file mode 100644 index 0000000000..c13a21b601 --- /dev/null +++ b/include/picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp @@ -0,0 +1,134 @@ +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Rene Widera, Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + + +#include "picongpu/plugins/openPMD/openPMDWriter.def" +#include "picongpu/simulation_defines.hpp" +#include "picongpu/traits/PICToOpenPMD.hpp" + +#include +#include +#include +#include + +#include + +#include + +namespace picongpu +{ + namespace openPMD + { + using namespace pmacc; + + /** Load attribute of a species from openPMD checkpoint storage + * + * @tparam T_Identifier identifier of species attribute + */ + template + struct LoadParticleAttributesFromOpenPMD + { + /** read attributes from openPMD file + * + * @param params thread params + * @param frame frame with all particles + * @param particleSpecies the openpmd representation of the species + * @param particlesOffset read offset in the attribute array + * @param elements number of elements which should be read the attribute + * array + */ + template + HINLINE void operator()( + ThreadParams* params, + FrameType& frame, + ::openPMD::ParticleSpecies particleSpecies, + const uint64_t particlesOffset, + const uint64_t elements) + { + using Identifier = T_Identifier; + using ValueType = typename pmacc::traits::Resolve::type::type; + const uint32_t components = GetNComponents::value; + using ComponentType = typename GetComponentsType::type; + OpenPMDName openPMDName; + + log("openPMD: ( begin ) load species attribute: %1%") % openPMDName(); + + const std::string name_lookup[] = {"x", "y", "z"}; + + std::shared_ptr loadBfr; + if(elements > 0) + { + loadBfr = std::shared_ptr{new ComponentType[elements], [](ComponentType* ptr) { + delete[] ptr; + }}; + } + + for(uint32_t n = 0; n < components; ++n) + { + ::openPMD::Record record = particleSpecies[openPMDName()]; + ::openPMD::RecordComponent rc + = components > 1 ? record[name_lookup[n]] : record[::openPMD::RecordComponent::SCALAR]; + + ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer(); + + if(elements > 0) + { + // avoid deadlock between not finished pmacc tasks and mpi + // calls in openPMD + __getTransactionEvent().waitForFinished(); + rc.loadChunk( + loadBfr, + ::openPMD::Offset{particlesOffset}, + ::openPMD::Extent{elements}); + } + + /** start a blocking read of all scheduled variables + * (this is collective call in many methods of openPMD + * backends) + */ + params->openPMDSeries->flush(); + + uint64_t globalNumElements = 1; + for(auto ext : rc.getExtent()) + { + globalNumElements *= ext; + } + + log("openPMD: Did read %1% local of %2% global elements for " + "%3%") + % elements % globalNumElements % openPMDName(); + +/* copy component from temporary array to array of structs */ +#pragma omp parallel for simd + for(size_t i = 0; i < elements; ++i) + { + ComponentType* ref = &reinterpret_cast(dataPtr)[i * components + n]; + *ref = loadBfr.get()[i]; + } + } + + log("openPMD: ( end ) load species attribute: %1%") % openPMDName(); + } + }; + + } /* namespace openPMD */ +} /* namespace picongpu */ diff --git a/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp b/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp new file mode 100644 index 0000000000..1c8e7847cc --- /dev/null +++ b/include/picongpu/plugins/openPMD/restart/LoadSpecies.hpp @@ -0,0 +1,232 @@ +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Axel Huebl, Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/plugins/ISimulationPlugin.hpp" +#include "picongpu/plugins/openPMD/openPMDWriter.def" +#include "picongpu/plugins/openPMD/restart/LoadParticleAttributesFromOpenPMD.hpp" +#include "picongpu/plugins/output/WriteSpeciesCommon.hpp" +#include "picongpu/simulation_defines.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace picongpu +{ + namespace openPMD + { + using namespace pmacc; + + /** Load species from openPMD checkpoint storage + * + * @tparam T_Species type of species + */ + template + struct LoadSpecies + { + public: + using ThisSpecies = T_Species; + using FrameType = typename ThisSpecies::FrameType; + using ParticleDescription = typename FrameType::ParticleDescription; + using ParticleAttributeList = typename FrameType::ValueTypeSeq; + + + /* delete multiMask and localCellIdx in openPMD particle*/ + using TypesToDelete = bmpl::vector2; + using ParticleCleanedAttributeList = typename RemoveFromSeq::type; + + /* add totalCellIdx for openPMD particle*/ + using ParticleNewAttributeList = typename MakeSeq::type; + + using NewParticleDescription = + typename ReplaceValueTypeSeq::type; + + using openPMDFrameType = Frame; + + /** Load species from openPMD checkpoint storage + * + * @param params thread params + * @param restartChunkSize number of particles processed in one kernel + * call + */ + HINLINE void operator()(ThreadParams* params, const uint32_t restartChunkSize) + { + std::string const speciesName = FrameType::getName(); + log("openPMD: (begin) load species: %1%") % speciesName; + DataConnector& dc = Environment<>::get().DataConnector(); + GridController& gc = Environment::get().GridController(); + + ::openPMD::Series& series = *params->openPMDSeries; + ::openPMD::Container<::openPMD::ParticleSpecies>& particles + = series.iterations[params->currentStep].particles; + ::openPMD::ParticleSpecies particleSpecies = particles[speciesName]; + + const pmacc::Selection localDomain = Environment::get().SubGrid().getLocalDomain(); + + /* load particle without copying particle data to host */ + auto speciesTmp = dc.get(FrameType::getName(), true); + + // avoid deadlock between not finished pmacc tasks and mpi calls in + // openPMD + __getTransactionEvent().waitForFinished(); + + auto numRanks = gc.getGlobalSize(); + + size_t patchIdx = getPatchIdx(params, series, particleSpecies, numRanks); + + std::shared_ptr fullParticlesInfoShared + = particleSpecies.particlePatches["numParticles"][::openPMD::RecordComponent::SCALAR] + .load(); + series.flush(); + uint64_t* fullParticlesInfo = fullParticlesInfoShared.get(); + + /* Run a prefix sum over the numParticles[0] element in + * particlesInfo to retreive the offset of particles + */ + uint64_t particleOffset = 0u; + /* count total number of particles on the device */ + uint64_t totalNumParticles = 0u; + + assert(patchIdx < numRanks); + + for(size_t i = 0u; i <= patchIdx; ++i) + { + if(i < patchIdx) + particleOffset += fullParticlesInfo[i]; + if(i == patchIdx) + totalNumParticles = fullParticlesInfo[i]; + } + + log("openPMD: Loading %1% particles from offset %2%") + % (long long unsigned) totalNumParticles % (long long unsigned) particleOffset; + + openPMDFrameType hostFrame; + log("openPMD: malloc mapped memory: %1%") % speciesName; + /*malloc mapped memory*/ + meta::ForEach> mallocMem; + mallocMem(hostFrame, totalNumParticles); + + log("openPMD: get mapped memory device pointer: %1%") % speciesName; + /*load device pointer of mapped memory*/ + openPMDFrameType deviceFrame; + meta::ForEach> getDevicePtr; + getDevicePtr(deviceFrame, hostFrame); + + meta::ForEach> + loadAttributes; + loadAttributes(params, hostFrame, particleSpecies, particleOffset, totalNumParticles); + + if(totalNumParticles != 0) + { + pmacc::particles::operations::splitIntoListOfFrames( + *speciesTmp, + deviceFrame, + totalNumParticles, + restartChunkSize, + localDomain.offset, + totalCellIdx_, + *(params->cellDescription), + picLog::INPUT_OUTPUT()); + + /*free host memory*/ + meta::ForEach> freeMem; + freeMem(hostFrame); + } + log("openPMD: ( end ) load species: %1%") % speciesName; + } + + private: + /** get index for particle data within the openPMD patch data + * + * It is not possible to assume that we can use the MPI rank to load the particle data. + * There is no guarantee that the MPI rank is corresponding to the position within + * the simulation volume. + * + * Use patch information offset and extent to find the index which should be used + * to load openPMD particle patch data. + * + * @return index of the particle patch within the openPMD data + */ + HINLINE size_t getPatchIdx( + ThreadParams* params, + ::openPMD::Series& series, + ::openPMD::ParticleSpecies particleSpecies, + size_t numRanks) + { + const std::string name_lookup[] = {"x", "y", "z"}; + + std::vector> offsets(numRanks); + std::vector> extents(numRanks); + + // transform openPMD particle patch data into PIConGPU data objects + for(uint32_t d = 0; d < simDim; ++d) + { + std::shared_ptr patchOffsetsInfoShared + = particleSpecies.particlePatches["offset"][name_lookup[d]].load(); + std::shared_ptr patchExtentsInfoShared + = particleSpecies.particlePatches["extent"][name_lookup[d]].load(); + series.flush(); + for(size_t i = 0; i < numRanks; ++i) + { + offsets[i][d] = patchOffsetsInfoShared.get()[i]; + extents[i][d] = patchExtentsInfoShared.get()[i]; + } + } + + pmacc::Selection const globalDomain = Environment::get().SubGrid().getGlobalDomain(); + DataSpace const patchOffset = globalDomain.offset + params->window.globalDimensions.offset + + params->window.localDimensions.offset; + DataSpace const patchExtent = params->window.localDimensions.size; + + size_t patchIdx = 0; + // search the patch index based on the offset and extents of local domain size + for(size_t i = 0; i < numRanks; ++i) + { + if(patchOffset == offsets[i] && patchExtent == extents[i]) + { + patchIdx = i; + break; + } + } + return patchIdx; + } + }; + + + } /* namespace openPMD */ + +} /* namespace picongpu */ diff --git a/include/picongpu/plugins/openPMD/restart/RestartFieldLoader.hpp b/include/picongpu/plugins/openPMD/restart/RestartFieldLoader.hpp new file mode 100644 index 0000000000..d311ed9922 --- /dev/null +++ b/include/picongpu/plugins/openPMD/restart/RestartFieldLoader.hpp @@ -0,0 +1,231 @@ +/* Copyright 2014-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera + * Benjamin Worpitz, Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/plugins/openPMD/openPMDWriter.def" +#include "picongpu/plugins/misc/ComponentNames.hpp" +#include "picongpu/simulation/control/MovingWindow.hpp" +#include "picongpu/simulation_defines.hpp" +#include "picongpu/traits/IsFieldDomainBound.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + + +namespace picongpu +{ + namespace openPMD + { + /** + * Helper class for openPMD plugin to load fields from parallel openPMD + * storages. + */ + class RestartFieldLoader + { + public: + template + static void loadField( + Data& field, + const uint32_t numComponents, + std::string objectName, + ThreadParams* params, + bool const isDomainBound) + { + log("Begin loading field '%1%'") % objectName; + + auto const name_lookup_tpl = plugins::misc::getComponentNames(numComponents); + const DataSpace field_guard = field.getGridLayout().getGuard(); + + const pmacc::Selection localDomain = Environment::get().SubGrid().getLocalDomain(); + + using ValueType = typename Data::ValueType; + field.getHostBuffer().setValue(ValueType::create(0.0)); + + DataSpace domain_offset = localDomain.offset; + DataSpace local_domain_size = params->window.localDimensions.size; + bool useLinearIdxAsDestination = false; + + /* Patch for non-domain-bound fields + * This is an ugly fix to allow output of reduced 1d PML buffers + */ + if(!isDomainBound) + { + auto const field_layout = params->gridLayout; + auto const field_no_guard = field_layout.getDataSpaceWithoutGuarding(); + auto const elementCount = field_no_guard.productOfComponents(); + + /* Scan the PML buffer local size along all local domains + * This code is symmetric to one in Field::writeField() + */ + log("openPMD: (begin) collect PML sizes for %1%") % objectName; + auto& gridController = Environment::get().GridController(); + auto const numRanks = uint64_t{gridController.getGlobalSize()}; + /* Use domain position-based rank, not MPI rank, to be independent + * of the MPI rank assignment scheme + */ + auto const rank = uint64_t{gridController.getScalarPosition()}; + std::vector localSizes(2 * numRanks, 0u); + uint64_t localSizeInfo[2] = {static_cast(elementCount), rank}; + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather( + localSizeInfo, + 2, + MPI_UINT64_T, + &(*localSizes.begin()), + 2, + MPI_UINT64_T, + gridController.getCommunicator().getMPIComm())); + uint64_t domainOffset = 0; + for(uint64_t r = 0; r < numRanks; ++r) + { + if(localSizes.at(2u * r + 1u) < rank) + domainOffset += localSizes.at(2u * r); + } + log("openPMD: (end) collect PML sizes for %1%") % objectName; + + domain_offset = DataSpace::create(0); + domain_offset[0] = static_cast(domainOffset); + local_domain_size = DataSpace::create(1); + local_domain_size[0] = elementCount; + useLinearIdxAsDestination = true; + } + + ::openPMD::Series& series = *params->openPMDSeries; + ::openPMD::Container<::openPMD::Mesh>& meshes = series.iterations[params->currentStep].meshes; + + auto destBox = field.getHostBuffer().getDataBox(); + for(uint32_t n = 0; n < numComponents; ++n) + { + // Read the subdomain which belongs to our mpi position. + // The total grid size must match the grid size of the stored + // data. + log("openPMD: Read from domain: offset=%1% size=%2%") % domain_offset + % local_domain_size; + ::openPMD::RecordComponent rc = numComponents > 1 + ? meshes[objectName][name_lookup_tpl[n]] + : meshes[objectName][::openPMD::RecordComponent::SCALAR]; + + log("openPMD: Read from field '%1%'") % objectName; + + auto ndim = rc.getDimensionality(); + ::openPMD::Offset start = asStandardVector&, ::openPMD::Offset>(domain_offset); + ::openPMD::Extent count + = asStandardVector&, ::openPMD::Extent>(local_domain_size); + + log("openPMD: Allocate %1% elements") + % local_domain_size.productOfComponents(); + + // avoid deadlock between not finished pmacc tasks and mpi calls + // in openPMD backends + __getTransactionEvent().waitForFinished(); + + /* + * @todo float_X should be some kind of gridBuffer's + * GetComponentsType::type + */ + std::shared_ptr field_container = rc.loadChunk(start, count); + + /* start a blocking read of all scheduled variables */ + series.flush(); + + + int const elementCount = local_domain_size.productOfComponents(); + +#pragma omp parallel for simd + for(int linearId = 0; linearId < elementCount; ++linearId) + { + DataSpace destIdx; + if(useLinearIdxAsDestination) + { + destIdx[0] = linearId; + } + else + { + /* calculate index inside the moving window domain which + * is located on the local grid*/ + destIdx = DataSpaceOperations::map(params->window.localDimensions.size, linearId); + /* jump over guard and local sliding window offset*/ + destIdx += field_guard + params->localWindowToDomainOffset; + } + + destBox(destIdx)[n] = field_container.get()[linearId]; + } + } + + field.hostToDevice(); + + __getTransactionEvent().waitForFinished(); + + log("openPMD: Read from domain: offset=%1% size=%2%") % domain_offset + % local_domain_size; + log("openPMD: Finished loading field '%1%'") % objectName; + } + }; + + /** + * Helper class for openPMDWriter (forEach operator) to load a field from + * openPMD + * + * @tparam T_Field field class to load + */ + template + struct LoadFields + { + public: + HDINLINE void operator()(ThreadParams* params) + { +#ifndef __CUDA_ARCH__ + DataConnector& dc = Environment<>::get().DataConnector(); + ThreadParams* tp = params; + + /* load field without copying data to host */ + auto field = dc.get(T_Field::getName(), true); + tp->gridLayout = field->getGridLayout(); + + /* load from openPMD */ + bool const isDomainBound = traits::IsFieldDomainBound::value; + RestartFieldLoader::loadField( + field->getGridBuffer(), + (uint32_t) T_Field::numComponents, + T_Field::getName(), + tp, + isDomainBound); + + dc.releaseData(T_Field::getName()); +#endif + } + }; + + using namespace pmacc; + + } /* namespace openPMD */ +} /* namespace picongpu */ diff --git a/include/picongpu/plugins/openPMD/writer/ParticleAttribute.hpp b/include/picongpu/plugins/openPMD/writer/ParticleAttribute.hpp new file mode 100644 index 0000000000..149281f906 --- /dev/null +++ b/include/picongpu/plugins/openPMD/writer/ParticleAttribute.hpp @@ -0,0 +1,143 @@ +/* Copyright 2014-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, + * Franz Poeschel + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/plugins/openPMD/openPMDWriter.def" +#include "picongpu/simulation_defines.hpp" +#include "picongpu/traits/PICToOpenPMD.tpp" +#include "picongpu/plugins/openPMD/openPMDDimension.hpp" + +#include +#include +#include + +namespace picongpu +{ + namespace openPMD + { + using namespace pmacc; + + static const std::string name_lookup[] = {"x", "y", "z"}; + + + /** write attribute of a particle to openPMD series + * + * @tparam T_Identifier identifier of a particle attribute + */ + template + struct ParticleAttribute + { + /** write attribute to openPMD series + * + * @param params wrapped params + * @param elements elements of this attribute + */ + template + HINLINE void operator()( + ThreadParams* params, + FrameType& frame, + ::openPMD::Container<::openPMD::Record>& particleSpecies, + std::string const& basepath, + const size_t elements, + const size_t globalElements, + const size_t globalOffset) + { + using Identifier = T_Identifier; + using ValueType = typename pmacc::traits::Resolve::type::type; + const uint32_t components = GetNComponents::value; + using ComponentType = typename GetComponentsType::type; + + OpenPMDName openPMDName; + ::openPMD::Record record = particleSpecies[openPMDName()]; + std::string baseName = basepath + "/" + openPMDName(); + ::openPMD::Datatype openPMDType = ::openPMD::determineDatatype(); + + // get the SI scaling, dimensionality and weighting of the attribute + OpenPMDUnit openPMDUnit; + std::vector unit = openPMDUnit(); + OpenPMDUnitDimension openPMDUnitDimension; + std::vector unitDimension = openPMDUnitDimension(); + const bool macroWeightedBool = MacroWeighted::get(); + const uint32_t macroWeighted = (macroWeightedBool ? 1 : 0); + const float_64 weightingPower = WeightingPower::get(); + + PMACC_ASSERT(unit.size() == components); // unitSI for each component + PMACC_ASSERT(unitDimension.size() == 7); // seven openPMD base units + + log("openPMD: (begin) write species attribute: %1%") % Identifier::getName(); + + std::shared_ptr storeBfr; + if(elements > 0) + storeBfr = std::shared_ptr{new ComponentType[elements], [](ComponentType* ptr) { + delete[] ptr; + }}; + + for(uint32_t d = 0; d < components; d++) + { + ::openPMD::RecordComponent recordComponent + = components > 1 ? record[name_lookup[d]] : record[::openPMD::MeshRecordComponent::SCALAR]; + std::string datasetName = components > 1 ? baseName + "/" + name_lookup[d] : baseName; + + ValueType* dataPtr = frame.getIdentifier(Identifier()).getPointer(); // can be moved up? + auto storePtr = storeBfr.get(); + +/* copy strided data from source to temporary buffer */ +#pragma omp parallel for simd + for(size_t i = 0; i < elements; ++i) + { + storePtr[i] = reinterpret_cast(dataPtr)[d + i * components]; + } + + params->initDataset( + recordComponent, + openPMDType, + {globalElements}, + true, + params->compressionMethod, + datasetName); + if(storeBfr) + recordComponent.storeChunk(storeBfr, {globalOffset}, {elements}); + + if(unit.size() >= (d + 1)) + { + recordComponent.setUnitSI(unit[d]); + } + params->openPMDSeries->flush(); + } + + auto unitMap = convertToUnitDimension(unitDimension); + + record.setUnitDimension(unitMap); + record.setAttribute("macroWeighted", macroWeighted); + record.setAttribute("weightingPower", weightingPower); + + /* @todo check if always correct at this point, + * depends on attribute and MW-solver/pusher implementation + */ + float_X const timeOffset = 0.0; + record.setAttribute("timeOffset", timeOffset); + + log("openPMD: ( end ) write species attribute: %1%") % Identifier::getName(); + } + }; + + } // namespace openPMD +} // namespace picongpu diff --git a/include/picongpu/plugins/output/ConstSpeciesAttributes.hpp b/include/picongpu/plugins/output/ConstSpeciesAttributes.hpp new file mode 100644 index 0000000000..627cdc9f4b --- /dev/null +++ b/include/picongpu/plugins/output/ConstSpeciesAttributes.hpp @@ -0,0 +1,93 @@ +/* Copyright 2014-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, + * Franz Poeschel, Richard Pausch + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/traits/frame/GetMass.hpp" +#include "picongpu/traits/frame/GetCharge.hpp" + +#include + +#include + +namespace picongpu +{ + namespace plugins + { + namespace output + { + template + struct GetChargeOrZero + { + static constexpr bool hasChargeRatio = pmacc::traits::HasFlag>::type::value; + + template + typename std::enable_if::type operator()() const + { + return frame::getCharge(); + } + + template + typename std::enable_if::type operator()() const + { + return float_X(0.); + } + + std::vector dimension() const + { + // L, M, T, I, theta, N, J + std::vector unitDimension(NUnitDimension, 0.0); + unitDimension.at(SIBaseUnits::electricCurrent) = 1.0; + unitDimension.at(SIBaseUnits::time) = 1.0; + + return unitDimension; + } + }; + + template + struct GetMassOrZero + { + static constexpr bool hasMassRatio = pmacc::traits::HasFlag>::type::value; + + template + typename std::enable_if::type operator()() const + { + return frame::getMass(); + } + + template + typename std::enable_if::type operator()() const + { + return float_X(0.); + } + + std::vector dimension() const + { + // L, M, T, I, theta, N, J + std::vector unitDimension(NUnitDimension, 0.0); + unitDimension.at(SIBaseUnits::mass) = 1.0; + + return unitDimension; + } + }; + } // namespace output + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/output/GatherSlice.hpp b/include/picongpu/plugins/output/GatherSlice.hpp index 48b44108b1..aa67e95096 100644 --- a/include/picongpu/plugins/output/GatherSlice.hpp +++ b/include/picongpu/plugins/output/GatherSlice.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -34,216 +34,226 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -struct GatherSlice -{ - - GatherSlice() : - mpiRank(-1), - numRanks(0), - filteredData(nullptr), - comm(MPI_COMM_NULL), - fullData(nullptr), - masterRank(0), - isMPICommInitialized(false) - { - } - - ~GatherSlice() + struct GatherSlice { - reset(); - } - - /* - * @return true if object has reduced data after reduce call else false - */ - bool init(bool isActive) - { - static int masterRankOffset = 0; - - /* free old communicator if `init()` is called again */ - if (isMPICommInitialized) + GatherSlice() + : mpiRank(-1) + , numRanks(0) + , filteredData(nullptr) + , comm(MPI_COMM_NULL) + , fullData(nullptr) + , masterRank(0) + , isMPICommInitialized(false) { - reset(); } - int countRanks = Environment::get().GridController().getGpuNodes().productOfComponents(); - std::vector gatherRanks(countRanks); - std::vector groupRanks(countRanks); - mpiRank = Environment::get().GridController().getGlobalRank(); - if (!isActive) - mpiRank = -1; - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Allgather(&mpiRank, 1, MPI_INT, &gatherRanks[0], 1, MPI_INT, MPI_COMM_WORLD)); - - for (int i = 0; i < countRanks; ++i) + ~GatherSlice() { - if (gatherRanks[i] != -1) - { - groupRanks[numRanks] = gatherRanks[i]; - numRanks++; - } + reset(); } - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_Group group = MPI_GROUP_NULL; - MPI_Group newgroup = MPI_GROUP_NULL; - MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &group)); - MPI_CHECK(MPI_Group_incl(group, numRanks, &groupRanks[0], &newgroup)); - - MPI_CHECK(MPI_Comm_create(MPI_COMM_WORLD, newgroup, &comm)); - - if (mpiRank != -1) + /* + * @return true if object has reduced data after reduce call else false + */ + bool init(bool isActive) { - MPI_Comm_rank(comm, &mpiRank); - isMPICommInitialized = true; - } - MPI_CHECK(MPI_Group_free(&group)); - MPI_CHECK(MPI_Group_free(&newgroup)); + static int masterRankOffset = 0; - masterRankOffset++; - /* avoid that only rank zero is the master - * this reduces the load of rank zero - */ - masterRank = (masterRankOffset % numRanks); + /* free old communicator if `init()` is called again */ + if(isMPICommInitialized) + { + reset(); + } - return mpiRank == masterRank; - } + int countRanks = Environment::get().GridController().getGpuNodes().productOfComponents(); + std::vector gatherRanks(countRanks); + std::vector groupRanks(countRanks); + mpiRank = Environment::get().GridController().getGlobalRank(); + if(!isActive) + mpiRank = -1; - template - Box operator()(Box & data, const MessageHeader & header) - { - using ValueType = typename Box::ValueType; + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather(&mpiRank, 1, MPI_INT, &gatherRanks[0], 1, MPI_INT, MPI_COMM_WORLD)); - Box dstBox = Box(PitchedBox ( - (ValueType*) filteredData, - DataSpace (), - header.sim.size, - header.sim.size.x() * sizeof (ValueType) - )); + for(int i = 0; i < countRanks; ++i) + { + if(gatherRanks[i] != -1) + { + groupRanks[numRanks] = gatherRanks[i]; + numRanks++; + } + } - MessageHeader* fakeHeader = MessageHeader::create(); - memcpy(fakeHeader, &header, sizeof (MessageHeader)); + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_Group group = MPI_GROUP_NULL; + MPI_Group newgroup = MPI_GROUP_NULL; + MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &group)); + MPI_CHECK(MPI_Group_incl(group, numRanks, &groupRanks[0], &newgroup)); - char* recvHeader = new char[ MessageHeader::bytes * numRanks]; + MPI_CHECK(MPI_Comm_create(MPI_COMM_WORLD, newgroup, &comm)); - if (fullData == nullptr && mpiRank == masterRank) - fullData = (char*) new ValueType[header.sim.size.productOfComponents()]; + if(mpiRank != -1) + { + MPI_Comm_rank(comm, &mpiRank); + isMPICommInitialized = true; + } + MPI_CHECK(MPI_Group_free(&group)); + MPI_CHECK(MPI_Group_free(&newgroup)); + masterRankOffset++; + /* avoid that only rank zero is the master + * this reduces the load of rank zero + */ + masterRank = (masterRankOffset % numRanks); - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Gather(fakeHeader, MessageHeader::bytes, MPI_CHAR, recvHeader, MessageHeader::bytes, - MPI_CHAR, masterRank, comm)); + return mpiRank == masterRank; + } - std::vector counts(numRanks); - std::vector displs(numRanks); - int offset = 0; - for (int i = 0; i < numRanks; ++i) + template + Box operator()(Box& data, const MessageHeader& header) { - MessageHeader* head = (MessageHeader*) (recvHeader + MessageHeader::bytes * i); - counts[i] = head->node.maxSize.productOfComponents() * sizeof (ValueType); - displs[i] = offset; - offset += counts[i]; - } + using ValueType = typename Box::ValueType; - const size_t elementsCount = header.node.maxSize.productOfComponents() * sizeof (ValueType); + Box dstBox = Box(PitchedBox( + (ValueType*) filteredData, + DataSpace(), + header.sim.size, + header.sim.size.x() * sizeof(ValueType))); - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Gatherv( - (char*) (data.getPointer()), elementsCount, MPI_CHAR, - fullData, &counts[0], &displs[0], MPI_CHAR, - masterRank, comm)); + MessageHeader* fakeHeader = MessageHeader::create(); + *fakeHeader = header; + char* recvHeader = new char[MessageHeader::bytes * numRanks]; + if(fullData == nullptr && mpiRank == masterRank) + fullData = (char*) new ValueType[header.sim.size.productOfComponents()]; - if (mpiRank == masterRank) - { - log ("Master create image"); - if (filteredData == nullptr) - filteredData = (char*) new ValueType[header.sim.size.productOfComponents()]; - - /*create box with valid memory*/ - dstBox = Box(PitchedBox ( - (ValueType*) filteredData, - DataSpace (), - header.sim.size, - header.sim.size.x() * sizeof (ValueType) - )); - - for (int i = 0; i < numRanks; ++i) + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Gather( + fakeHeader, + MessageHeader::bytes, + MPI_CHAR, + recvHeader, + MessageHeader::bytes, + MPI_CHAR, + masterRank, + comm)); + + std::vector counts(numRanks); + std::vector displs(numRanks); + int offset = 0; + for(int i = 0; i < numRanks; ++i) { MessageHeader* head = (MessageHeader*) (recvHeader + MessageHeader::bytes * i); - - log ("part image with offset %1%byte=%2%elements | size %3% | offset %4%") % - displs[i] % (displs[i] / sizeof (ValueType)) % - head->node.maxSize.toString() % - head->node.offset.toString(); - Box srcBox = Box(PitchedBox ( - (ValueType*) (fullData + displs[i]), - DataSpace (), - head->node.maxSize, - head->node.maxSize.x() * sizeof (ValueType) - )); - - insertData(dstBox, srcBox, head->node.offset, head->node.maxSize); + counts[i] = head->node.maxSize.productOfComponents() * sizeof(ValueType); + displs[i] = offset; + offset += counts[i]; } - __deleteArray(fullData); - } + const size_t elementsCount = header.node.maxSize.productOfComponents() * sizeof(ValueType); - delete[] recvHeader; - MessageHeader::destroy(fakeHeader); + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Gatherv( + (char*) (data.getPointer()), + elementsCount, + MPI_CHAR, + fullData, + &counts[0], + &displs[0], + MPI_CHAR, + masterRank, + comm)); + + + if(mpiRank == masterRank) + { + log("Master create image"); + if(filteredData == nullptr) + filteredData = (char*) new ValueType[header.sim.size.productOfComponents()]; + + /*create box with valid memory*/ + dstBox = Box(PitchedBox( + (ValueType*) filteredData, + DataSpace(), + header.sim.size, + header.sim.size.x() * sizeof(ValueType))); + + for(int i = 0; i < numRanks; ++i) + { + MessageHeader* head = (MessageHeader*) (recvHeader + MessageHeader::bytes * i); + + log("part image with offset %1%byte=%2%elements | size %3% | offset %4%") + % displs[i] % (displs[i] / sizeof(ValueType)) % head->node.maxSize.toString() + % head->node.offset.toString(); + Box srcBox = Box(PitchedBox( + (ValueType*) (fullData + displs[i]), + DataSpace(), + head->node.maxSize, + head->node.maxSize.x() * sizeof(ValueType))); + + insertData(dstBox, srcBox, head->node.offset, head->node.maxSize); + } + + __deleteArray(fullData); + } - return dstBox; - } + delete[] recvHeader; + MessageHeader::destroy(fakeHeader); - template - void insertData(DstBox& dst, const SrcBox& src, MessageHeader::Size2D offsetToSimNull, MessageHeader::Size2D srcSize) - { - for (int y = 0; y < srcSize.y(); ++y) + return dstBox; + } + + template + void insertData( + DstBox& dst, + const SrcBox& src, + MessageHeader::Size2D offsetToSimNull, + MessageHeader::Size2D srcSize) { - for (int x = 0; x < srcSize.x(); ++x) + for(int y = 0; y < srcSize.y(); ++y) { - dst[y + offsetToSimNull.y()][x + offsetToSimNull.x()] = src[y][x]; + for(int x = 0; x < srcSize.x(); ++x) + { + dst[y + offsetToSimNull.y()][x + offsetToSimNull.x()] = src[y][x]; + } } } - } -private: - - /*reset this object und set all values to initial state*/ - void reset() - { - mpiRank = -1; - numRanks = 0; - if (filteredData != nullptr) - delete[] filteredData; - filteredData = nullptr; - if (fullData != nullptr) - delete[] fullData; - fullData = nullptr; - if (isMPICommInitialized) + private: + /*reset this object und set all values to initial state*/ + void reset() { - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Comm_free(&comm)); + mpiRank = -1; + numRanks = 0; + if(filteredData != nullptr) + delete[] filteredData; + filteredData = nullptr; + if(fullData != nullptr) + delete[] fullData; + fullData = nullptr; + if(isMPICommInitialized) + { + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Comm_free(&comm)); + } + isMPICommInitialized = false; } - isMPICommInitialized = false; - } - - char* filteredData; - char* fullData; - MPI_Comm comm; - int mpiRank; - int numRanks; - int masterRank; - bool isMPICommInitialized; -}; - -}//namespace + + char* filteredData; + char* fullData; + MPI_Comm comm; + int mpiRank; + int numRanks; + int masterRank; + bool isMPICommInitialized; + }; + +} // namespace picongpu diff --git a/include/picongpu/plugins/output/IIOBackend.hpp b/include/picongpu/plugins/output/IIOBackend.hpp index 618cbb08ef..8dbe0f4fa7 100644 --- a/include/picongpu/plugins/output/IIOBackend.hpp +++ b/include/picongpu/plugins/output/IIOBackend.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,37 +27,32 @@ namespace picongpu { - //! Interface for IO-backends with restart capability class IIOBackend : public plugins::multi::ISlave { public: - - IIOBackend() { - } - virtual ~IIOBackend() + virtual ~IIOBackend() { - } //! create a checkpoint virtual void dumpCheckpoint( uint32_t currentStep, - std::string const & checkpointDirectory, - std::string const & checkpointFilename - ) = 0; + std::string const& checkpointDirectory, + std::string const& checkpointFilename) + = 0; //! restart from a checkpoint virtual void doRestart( uint32_t restartStep, - std::string const & restartDirectory, - std::string const & restartFilename, - uint32_t restartChunkSize - ) = 0; + std::string const& restartDirectory, + std::string const& restartFilename, + uint32_t restartChunkSize) + = 0; }; } // namespace picongpu diff --git a/include/picongpu/plugins/output/WriteSpeciesCommon.hpp b/include/picongpu/plugins/output/WriteSpeciesCommon.hpp index d9e1b4bb9c..ad272045fc 100644 --- a/include/picongpu/plugins/output/WriteSpeciesCommon.hpp +++ b/include/picongpu/plugins/output/WriteSpeciesCommon.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Felix Schmitt +/* Copyright 2014-2021 Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -42,109 +42,109 @@ namespace picongpu { + using namespace pmacc; -using namespace pmacc; - - -template -struct MallocMemory -{ - template - HINLINE void operator()(ValueType& v1, const size_t size) const + template + struct MallocMemory { - typedef typename pmacc::traits::Resolve::type::type type; - - type* ptr = nullptr; - if (size != 0) + template + HINLINE void operator()(ValueType& v1, const size_t size) const { -#if( PMACC_CUDA_ENABLED == 1 ) - CUDA_CHECK((cuplaError_t)cudaHostAlloc(&ptr, size * sizeof (type), cudaHostAllocMapped)); + typedef typename pmacc::traits::Resolve::type::type type; + + type* ptr = nullptr; + if(size != 0) + { +#if(PMACC_CUDA_ENABLED == 1) + CUDA_CHECK((cuplaError_t) cudaHostAlloc(&ptr, size * sizeof(type), cudaHostAllocMapped)); +#elif(ALPAKA_ACC_GPU_HIP_ENABLED == 1) + CUDA_CHECK((cuplaError_t) hipHostMalloc((void**) &ptr, size * sizeof(type), hipHostRegisterMapped)); #else - ptr = new type[size]; + ptr = new type[size]; #endif + } + v1.getIdentifier(T_Type()) = VectorDataBox(ptr); } - v1.getIdentifier(T_Type()) = VectorDataBox(ptr); - - } -}; + }; -/** allocate memory on host - * - * This functor use `new[]` to allocate memory - */ -template -struct MallocHostMemory -{ - template - HINLINE void operator()(ValueType& v1, const size_t size) const + /** allocate memory on host + * + * This functor use `new[]` to allocate memory + */ + template + struct MallocHostMemory { - typedef T_Attribute Attribute; - typedef typename pmacc::traits::Resolve::type::type type; - - type* ptr = nullptr; - if (size != 0) + template + HINLINE void operator()(ValueType& v1, const size_t size) const { - ptr = new type[size]; + typedef T_Attribute Attribute; + typedef typename pmacc::traits::Resolve::type::type type; + + type* ptr = nullptr; + if(size != 0) + { + ptr = new type[size]; + } + v1.getIdentifier(Attribute()) = VectorDataBox(ptr); } - v1.getIdentifier(Attribute()) = VectorDataBox(ptr); + }; - } -}; + /** copy species to host memory + * + * use `DataConnector::get<...>()` to copy data + */ + template + struct CopySpeciesToHost + { + typedef T_SpeciesType SpeciesType; -/** copy species to host memory - * - * use `DataConnector::get<...>()` to copy data - */ -template -struct CopySpeciesToHost -{ - typedef T_SpeciesType SpeciesType; + HINLINE void operator()() const + { + /* DataConnector copies data to host */ + DataConnector& dc = Environment<>::get().DataConnector(); + dc.get(SpeciesType::FrameType::getName()); + dc.releaseData(SpeciesType::FrameType::getName()); + } + }; - HINLINE void operator()() const + template + struct GetDevicePtr { - /* DataConnector copies data to host */ - DataConnector &dc = Environment<>::get().DataConnector(); - dc.get< SpeciesType >( SpeciesType::FrameType::getName() ); - dc.releaseData( SpeciesType::FrameType::getName() ); - } -}; - -template -struct GetDevicePtr -{ - template - HINLINE void operator()(ValueType& dest, ValueType& src) - { - typedef typename pmacc::traits::Resolve::type::type type; - - type* ptr = nullptr; - type* srcPtr = src.getIdentifier(T_Type()).getPointer(); - if (srcPtr != nullptr) + template + HINLINE void operator()(ValueType& dest, ValueType& src) { -#if( PMACC_CUDA_ENABLED == 1 ) - CUDA_CHECK((cuplaError_t)cudaHostGetDevicePointer(&ptr, srcPtr, 0)); + typedef typename pmacc::traits::Resolve::type::type type; + + type* ptr = nullptr; + type* srcPtr = src.getIdentifier(T_Type()).getPointer(); + if(srcPtr != nullptr) + { +#if(PMACC_CUDA_ENABLED == 1) + CUDA_CHECK((cuplaError_t) cudaHostGetDevicePointer(&ptr, srcPtr, 0)); +#elif(ALPAKA_ACC_GPU_HIP_ENABLED == 1) + CUDA_CHECK((cuplaError_t) hipHostGetDevicePointer((void**) &ptr, srcPtr, 0)); #else - ptr = srcPtr; + ptr = srcPtr; #endif + } + dest.getIdentifier(T_Type()) = VectorDataBox(ptr); } - dest.getIdentifier(T_Type()) = VectorDataBox(ptr); - } -}; + }; -template -struct FreeMemory -{ - template - HINLINE void operator()(ValueType& value) const + template + struct FreeMemory { - typedef typename pmacc::traits::Resolve::type::type type; - - type* ptr = value.getIdentifier(T_Type()).getPointer(); - if (ptr != nullptr) + template + HINLINE void operator()(ValueType& value) const { -#if( PMACC_CUDA_ENABLED == 1 ) + typedef typename pmacc::traits::Resolve::type::type type; + + type* ptr = value.getIdentifier(T_Type()).getPointer(); + if(ptr != nullptr) + { +#if(PMACC_CUDA_ENABLED == 1) /* cupla 0.2.0 does not support the function cudaHostAlloc to create mapped memory. * Therefore we need to call the native CUDA function cudaFreeHost to free memory. * Due to the renaming of cuda functions with cupla via macros we need to remove @@ -156,52 +156,49 @@ struct FreeMemory * https://github.com/ComputationalRadiationPhysics/alpaka/issues/296 * https://github.com/ComputationalRadiationPhysics/alpaka/issues/612 */ -# undef cudaFreeHost - CUDA_CHECK((cuplaError_t)cudaFreeHost(ptr)); +# undef cudaFreeHost + CUDA_CHECK((cuplaError_t) cudaFreeHost(ptr)); // re-introduce the cupla macro -# define cudaFreeHost(...) cuplaFreeHost(__VA_ARGS__) +# define cudaFreeHost(...) cuplaFreeHost(__VA_ARGS__) +#elif(ALPAKA_ACC_GPU_HIP_ENABLED == 1) + CUDA_CHECK((cuplaError_t) hipHostFree(ptr)); #else - __deleteArray(ptr); + __deleteArray(ptr); #endif + } } - } -}; - -/** free memory - * - * use `__deleteArray()` to free memory - */ -template -struct FreeHostMemory -{ + }; - template - HINLINE void operator()(ValueType& value) const + /** free memory + * + * use `__deleteArray()` to free memory + */ + template + struct FreeHostMemory { - typedef T_Attribute Attribute; - typedef typename pmacc::traits::Resolve::type::type type; - - type* ptr = value.getIdentifier(Attribute()).getPointer(); - if (ptr != nullptr) + template + HINLINE void operator()(ValueType& value) const { - __deleteArray(ptr); - ptr=nullptr; + typedef T_Attribute Attribute; + typedef typename pmacc::traits::Resolve::type::type type; + + type* ptr = value.getIdentifier(Attribute()).getPointer(); + if(ptr != nullptr) + { + __deleteArray(ptr); + ptr = nullptr; + } } - } -}; + }; -/*functor to create a pair for a MapTuple map*/ -struct OperatorCreateVectorBox -{ - template - struct apply + /*functor to create a pair for a MapTuple map*/ + struct OperatorCreateVectorBox { - typedef - bmpl::pair< InType, - pmacc::VectorDataBox< typename pmacc::traits::Resolve::type::type > > - type; + template + struct apply + { + typedef bmpl::pair::type::type>> type; + }; }; -}; - -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/plugins/output/header/ColorHeader.hpp b/include/picongpu/plugins/output/header/ColorHeader.hpp index a7478de109..bf631444b1 100644 --- a/include/picongpu/plugins/output/header/ColorHeader.hpp +++ b/include/picongpu/plugins/output/header/ColorHeader.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -28,54 +28,53 @@ namespace picongpu { -/** Color Header for Preview Images - * - * Used to store the relation of color channels to min/max units - * and data names they represent. - */ -struct ColorHeader -{ - struct channel { - /// assign a physical meaningful name to the channel - std::string name; - /// assign a unit to the range values - std::string unitName; - /// min/max real values for 0 and 255 - picongpu::float_32 range[2]; - }; - - channel particles; - channel channel1; - channel channel2; - channel channel3; - - ColorHeader() + /** Color Header for Preview Images + * + * Used to store the relation of color channels to min/max units + * and data names they represent. + */ + struct ColorHeader { - particles.range[0] = 0.f; - particles.range[1] = 0.f; + struct channel + { + /// assign a physical meaningful name to the channel + std::string name; + /// assign a unit to the range values + std::string unitName; + /// min/max real values for 0 and 255 + picongpu::float_32 range[2]; + }; - channel1.range[0] = 0.f; - channel1.range[1] = 0.f; + channel particles; + channel channel1; + channel channel2; + channel channel3; - channel2.range[0] = 0.f; - channel2.range[1] = 0.f; + ColorHeader() + { + particles.range[0] = 0.f; + particles.range[1] = 0.f; - channel3.range[0] = 0.f; - channel3.range[1] = 0.f; - } + channel1.range[0] = 0.f; + channel1.range[1] = 0.f; - //void setScale(picongpu::float_32 x, picongpu::float_32 y) - //{ - // scale[0] = x; - // scale[1] = y; - //} + channel2.range[0] = 0.f; + channel2.range[1] = 0.f; - void writeToConsole(std::ostream& ocons) const - { - //ocons << "ColorHeader.XYZ " << "..." << std::endl; + channel3.range[0] = 0.f; + channel3.range[1] = 0.f; + } - } + // void setScale(picongpu::float_32 x, picongpu::float_32 y) + //{ + // scale[0] = x; + // scale[1] = y; + //} -}; + void writeToConsole(std::ostream& ocons) const + { + // ocons << "ColorHeader.XYZ " << "..." << std::endl; + } + }; } // namespace picongpu diff --git a/include/picongpu/plugins/output/header/DataHeader.hpp b/include/picongpu/plugins/output/header/DataHeader.hpp index 2d8ce4fb2d..20b86c1b33 100644 --- a/include/picongpu/plugins/output/header/DataHeader.hpp +++ b/include/picongpu/plugins/output/header/DataHeader.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -22,21 +22,18 @@ namespace picongpu { - -struct DataHeader -{ - - uint32_t byte; - - DataHeader() : byte(0) + struct DataHeader { - } + uint32_t byte; - void writeToConsole(std::ostream& ocons) const - { - ocons << "DataHeader.byte " << byte << std::endl; - } + DataHeader() : byte(0) + { + } -}; + void writeToConsole(std::ostream& ocons) const + { + ocons << "DataHeader.byte " << byte << std::endl; + } + }; } // namespace picongpu diff --git a/include/picongpu/plugins/output/header/MessageHeader.hpp b/include/picongpu/plugins/output/header/MessageHeader.hpp index f5d8b059fd..8c1325073b 100644 --- a/include/picongpu/plugins/output/header/MessageHeader.hpp +++ b/include/picongpu/plugins/output/header/MessageHeader.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -35,140 +35,142 @@ namespace picongpu { - -struct MessageHeader -{ - using Size2D = WindowHeader::Size2D; - - enum + struct MessageHeader { - realBytes = sizeof (DataHeader) + sizeof (SimHeader) + sizeof (WindowHeader) + sizeof (NodeHeader), - bytes = realBytes < 120 ? 128 : 256 - }; - - template - void update(CellDesc & cellDesc, - picongpu::Window vWindow, - Size2D transpose, - uint32_t currentStep, - picongpu::float_32* cellSizeArr = nullptr, - const pmacc::DataSpace gpus = pmacc::DataSpace ()) - { - using namespace pmacc; - using namespace picongpu; + using Size2D = WindowHeader::Size2D; enum { - Dim = CellDesc::Dim + realBytes = sizeof(DataHeader) + sizeof(SimHeader) + sizeof(WindowHeader) + sizeof(NodeHeader), + bytes = realBytes < 120 ? 128 : 256 }; - const DataSpace localSize(cellDesc.getGridLayout().getDataSpaceWithoutGuarding()); - const DataSpace localSize2D(localSize[transpose.x()], localSize[transpose.y()]); + template + void update( + CellDesc& cellDesc, + picongpu::Window vWindow, + Size2D transpose, + uint32_t currentStep, + picongpu::float_32* cellSizeArr = nullptr, + const pmacc::DataSpace gpus = pmacc::DataSpace()) + { + using namespace pmacc; + using namespace picongpu; - /*update only if nuber of gpus are set, else use old value*/ - if (gpus.productOfComponents() != 0) - sim.nodes = DataSpace (gpus[transpose.x()], gpus[transpose.y()]); + enum + { + Dim = CellDesc::Dim + }; - const SubGrid& subGrid = Environment::get().SubGrid(); + const DataSpace localSize(cellDesc.getGridLayout().getDataSpaceWithoutGuarding()); + const DataSpace localSize2D(localSize[transpose.x()], localSize[transpose.y()]); - const DataSpace globalSize(subGrid.getGlobalDomain().size); - sim.size.x() = globalSize[transpose.x()]; - sim.size.y() = globalSize[transpose.y()]; + /*update only if nuber of gpus are set, else use old value*/ + if(gpus.productOfComponents() != 0) + sim.nodes = DataSpace(gpus[transpose.x()], gpus[transpose.y()]); - node.maxSize = DataSpace (localSize[transpose.x()], localSize[transpose.y()]); + const SubGrid& subGrid = Environment::get().SubGrid(); - const DataSpace windowSize = vWindow.globalDimensions.size; - window.size = DataSpace (windowSize[transpose.x()], windowSize[transpose.y()]); + const DataSpace globalSize(subGrid.getGlobalDomain().size); + sim.size.x() = globalSize[transpose.x()]; + sim.size.y() = globalSize[transpose.y()]; - if (cellSizeArr != nullptr) - { - picongpu::float_32 scale[2]; - scale[0] = cellSizeArr[transpose.x()]; - scale[1] = cellSizeArr[transpose.y()]; - sim.cellSizeArr[0] = cellSizeArr[transpose.x()]; - sim.cellSizeArr[1] = cellSizeArr[transpose.y()]; + node.maxSize = DataSpace(localSize[transpose.x()], localSize[transpose.y()]); - const picongpu::float_32 scale0to1 = scale[0] / scale[1]; + const DataSpace windowSize = vWindow.globalDimensions.size; + window.size = DataSpace(windowSize[transpose.x()], windowSize[transpose.y()]); - if (scale0to1 > 1.0f) - { - sim.setScale(scale0to1, 1.f); - } - else if (scale0to1 < 1.0f) - { - sim.setScale(1.f, 1.0f / scale0to1); - } - else + if(cellSizeArr != nullptr) { - sim.setScale(1.f, 1.f); + picongpu::float_32 scale[2]; + scale[0] = cellSizeArr[transpose.x()]; + scale[1] = cellSizeArr[transpose.y()]; + sim.cellSizeArr[0] = cellSizeArr[transpose.x()]; + sim.cellSizeArr[1] = cellSizeArr[transpose.y()]; + + const picongpu::float_32 scale0to1 = scale[0] / scale[1]; + + if(scale0to1 > 1.0f) + { + sim.setScale(scale0to1, 1.f); + } + else if(scale0to1 < 1.0f) + { + sim.setScale(1.f, 1.0f / scale0to1); + } + else + { + sim.setScale(1.f, 1.f); + } } - } - const DataSpace offsetToSimNull(subGrid.getLocalDomain().offset); - const DataSpace windowOffsetToSimNull(vWindow.globalDimensions.offset); - const DataSpace localOffset(vWindow.localDimensions.offset); + const DataSpace offsetToSimNull(subGrid.getLocalDomain().offset); + const DataSpace windowOffsetToSimNull(vWindow.globalDimensions.offset); + const DataSpace localOffset(vWindow.localDimensions.offset); - const DataSpace localOffset2D(localOffset[transpose.x()], localOffset[transpose.y()]); - node.localOffset = localOffset2D; + const DataSpace localOffset2D(localOffset[transpose.x()], localOffset[transpose.y()]); + node.localOffset = localOffset2D; - DataSpace offsetToWindow(offsetToSimNull - windowOffsetToSimNull); + DataSpace offsetToWindow(offsetToSimNull - windowOffsetToSimNull); - const DataSpace offsetToWindow2D(offsetToWindow[transpose.x()], offsetToWindow[transpose.y()]); - node.offsetToWindow = offsetToWindow2D; + const DataSpace offsetToWindow2D(offsetToWindow[transpose.x()], offsetToWindow[transpose.y()]); + node.offsetToWindow = offsetToWindow2D; - const DataSpace offsetToSimNull2D(offsetToSimNull[transpose.x()], offsetToSimNull[transpose.y()]); - node.offset = offsetToSimNull2D; + const DataSpace offsetToSimNull2D(offsetToSimNull[transpose.x()], offsetToSimNull[transpose.y()]); + node.offset = offsetToSimNull2D; - const DataSpace windowOffsetToSimNull2D(windowOffsetToSimNull[transpose.x()], windowOffsetToSimNull[transpose.y()]); - window.offset = windowOffsetToSimNull2D; + const DataSpace windowOffsetToSimNull2D( + windowOffsetToSimNull[transpose.x()], + windowOffsetToSimNull[transpose.y()]); + window.offset = windowOffsetToSimNull2D; - const DataSpace currentLocalSize(vWindow.localDimensions.size); - const DataSpace currentLocalSize2D(currentLocalSize[transpose.x()], currentLocalSize[transpose.y()]); - node.size = currentLocalSize2D; + const DataSpace currentLocalSize(vWindow.localDimensions.size); + const DataSpace currentLocalSize2D(currentLocalSize[transpose.x()], currentLocalSize[transpose.y()]); + node.size = currentLocalSize2D; - sim.step = currentStep; + sim.step = currentStep; - /*add sliding windo informations to header*/ - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); - sim.simOffsetToNull = DataSpace (); - if (transpose.x() == 1) - sim.simOffsetToNull.x() = node.maxSize.x() * numSlides; - else if (transpose.y() == 1) - sim.simOffsetToNull.y() = node.maxSize.y() * numSlides; + /*add sliding windo informations to header*/ + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + sim.simOffsetToNull = DataSpace(); + if(transpose.x() == 1) + sim.simOffsetToNull.x() = node.maxSize.x() * numSlides; + else if(transpose.y() == 1) + sim.simOffsetToNull.y() = node.maxSize.y() * numSlides; + } - } + static MessageHeader* create() + { + return (MessageHeader*) new uint8_t[bytes]; + } - static MessageHeader * create() - { - return (MessageHeader*) new uint8_t[bytes]; - } + static void destroy(MessageHeader* obj) + { + __deleteArray(obj); + } - static void destroy(MessageHeader * obj) - { - __deleteArray(obj); - } + MessageHeader& operator=(MessageHeader const&) = default; - DataHeader data; - SimHeader sim; - WindowHeader window; - NodeHeader node; - //ColorHeader color; will be used later on to save channel ranges + DataHeader data; + SimHeader sim; + WindowHeader window; + NodeHeader node; + // ColorHeader color; will be used later on to save channel ranges - void writeToConsole(std::ostream& ocons) const - { - data.writeToConsole(ocons); - sim.writeToConsole(ocons); - window.writeToConsole(ocons); - node.writeToConsole(ocons); - } - -private: - /** constructor - * - * it is only allowed to create Message header with @see create() - */ - MessageHeader(); - -}; + void writeToConsole(std::ostream& ocons) const + { + data.writeToConsole(ocons); + sim.writeToConsole(ocons); + window.writeToConsole(ocons); + node.writeToConsole(ocons); + } + + private: + /** constructor + * + * it is only allowed to create Message header with @see create() + */ + MessageHeader(); + }; } // namespace picongpu diff --git a/include/picongpu/plugins/output/header/NodeHeader.hpp b/include/picongpu/plugins/output/header/NodeHeader.hpp index 8020525362..2b6a9e9414 100644 --- a/include/picongpu/plugins/output/header/NodeHeader.hpp +++ b/include/picongpu/plugins/output/header/NodeHeader.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -28,36 +28,34 @@ namespace picongpu { - -struct NodeHeader -{ - typedef pmacc::DataSpace Size2D; - - Size2D maxSize; - Size2D size; - Size2D offset; - Size2D localOffset; //not valid data - Size2D offsetToWindow; - - Size2D getLocalOffsetToWindow() - { - Size2D tmp(offsetToWindow); - if (tmp.x() < 0) - tmp.x() = 0; - if (tmp.y() < 0) - tmp.y() = 0; - return tmp; - } - - void writeToConsole(std::ostream& ocons) const + struct NodeHeader { - ocons << "NodeHeader.maxSize " << maxSize.x() << " " << maxSize.y() << std::endl; - ocons << "NodeHeader.size " << size.x() << " " << size.y() << std::endl; - ocons << "NodeHeader.localOffset " << localOffset.x() << " " << localOffset.y() << std::endl; - ocons << "NodeHeader.offset " << offset.x() << " " << offset.y() << std::endl; - ocons << "NodeHeader.offsetToWindow " << offsetToWindow.x() << " " << offsetToWindow.y() << std::endl; - } - -}; + typedef pmacc::DataSpace Size2D; + + Size2D maxSize; + Size2D size; + Size2D offset; + Size2D localOffset; // not valid data + Size2D offsetToWindow; + + Size2D getLocalOffsetToWindow() + { + Size2D tmp(offsetToWindow); + if(tmp.x() < 0) + tmp.x() = 0; + if(tmp.y() < 0) + tmp.y() = 0; + return tmp; + } + + void writeToConsole(std::ostream& ocons) const + { + ocons << "NodeHeader.maxSize " << maxSize.x() << " " << maxSize.y() << std::endl; + ocons << "NodeHeader.size " << size.x() << " " << size.y() << std::endl; + ocons << "NodeHeader.localOffset " << localOffset.x() << " " << localOffset.y() << std::endl; + ocons << "NodeHeader.offset " << offset.x() << " " << offset.y() << std::endl; + ocons << "NodeHeader.offsetToWindow " << offsetToWindow.x() << " " << offsetToWindow.y() << std::endl; + } + }; } // namespace picongpu diff --git a/include/picongpu/plugins/output/header/SimHeader.hpp b/include/picongpu/plugins/output/header/SimHeader.hpp index b829a63d70..b57035528c 100644 --- a/include/picongpu/plugins/output/header/SimHeader.hpp +++ b/include/picongpu/plugins/output/header/SimHeader.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -28,42 +28,40 @@ namespace picongpu { + struct SimHeader + { + typedef pmacc::DataSpace Size2D; -struct SimHeader -{ - typedef pmacc::DataSpace Size2D; - - Size2D size; - Size2D nodes; - Size2D simOffsetToNull; - uint32_t step; - picongpu::float_32 scale[2]; - picongpu::float_32 cellSizeArr[2]; + Size2D size; + Size2D nodes; + Size2D simOffsetToNull; + uint32_t step; + picongpu::float_32 scale[2]; + picongpu::float_32 cellSizeArr[2]; - SimHeader() : step(0) - { - scale[0] = 1.f; - scale[1] = 1.f; - cellSizeArr[0] = 0.f; - cellSizeArr[1] = 0.f; - } + SimHeader() : step(0) + { + scale[0] = 1.f; + scale[1] = 1.f; + cellSizeArr[0] = 0.f; + cellSizeArr[1] = 0.f; + } - void setScale(picongpu::float_32 x, picongpu::float_32 y) - { - scale[0] = x; - scale[1] = y; - } - - void writeToConsole(std::ostream& ocons) const - { - ocons << "SimHeader.size " << size.x() << " " << size.y() << std::endl; - ocons << "SimHeader.nodes " << nodes.x() << " " << nodes.y() << std::endl; - ocons << "SimHeader.step " << step << std::endl; - ocons << "SimHeader.scale " << scale[0] << " " << scale[1] << std::endl; - ocons << "SimHeader.cellSize " << cellSizeArr[0] << " " << cellSizeArr[1] << std::endl; - } + void setScale(picongpu::float_32 x, picongpu::float_32 y) + { + scale[0] = x; + scale[1] = y; + } -}; + void writeToConsole(std::ostream& ocons) const + { + ocons << "SimHeader.size " << size.x() << " " << size.y() << std::endl; + ocons << "SimHeader.nodes " << nodes.x() << " " << nodes.y() << std::endl; + ocons << "SimHeader.step " << step << std::endl; + ocons << "SimHeader.scale " << scale[0] << " " << scale[1] << std::endl; + ocons << "SimHeader.cellSize " << cellSizeArr[0] << " " << cellSizeArr[1] << std::endl; + } + }; } // namespace picongpu diff --git a/include/picongpu/plugins/output/header/WindowHeader.hpp b/include/picongpu/plugins/output/header/WindowHeader.hpp index 0a7729dbec..663fc43b35 100644 --- a/include/picongpu/plugins/output/header/WindowHeader.hpp +++ b/include/picongpu/plugins/output/header/WindowHeader.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -28,20 +28,18 @@ namespace picongpu { - -struct WindowHeader -{ - using Size2D = pmacc::DataSpace< DIM2 >; - - Size2D size; - Size2D offset; - - void writeToConsole(std::ostream& ocons) const + struct WindowHeader { - ocons << "WindowHeader.size " << size.x() << " " << size.y() << std::endl; - ocons << "WindowHeader.offset " << offset.x() << " " << offset.y() << std::endl; - } + using Size2D = pmacc::DataSpace; + + Size2D size; + Size2D offset; -}; + void writeToConsole(std::ostream& ocons) const + { + ocons << "WindowHeader.size " << size.x() << " " << size.y() << std::endl; + ocons << "WindowHeader.offset " << offset.x() << " " << offset.y() << std::endl; + } + }; } // namespace picongpu diff --git a/include/picongpu/plugins/output/images/PngCreator.hpp b/include/picongpu/plugins/output/images/PngCreator.hpp index 90b3cc802a..29383cb3bc 100644 --- a/include/picongpu/plugins/output/images/PngCreator.hpp +++ b/include/picongpu/plugins/output/images/PngCreator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -39,12 +39,11 @@ namespace picongpu struct PngCreator { - - PngCreator(std::string name, std::string folder) : - m_name(folder + "/" + name), - m_folder(folder), - m_createFolder(true), - m_isThreadActive(false) + PngCreator(std::string name, std::string folder) + : m_name(folder + "/" + name) + , m_folder(folder) + , m_createFolder(true) + , m_isThreadActive(false) { } @@ -93,10 +92,7 @@ namespace picongpu * @param header meta information about the simulation */ template - void operator()( - const Box data, - const MessageHeader::Size2D size, - const MessageHeader header) + void operator()(const Box data, const MessageHeader::Size2D size, const MessageHeader header) { if(m_isThreadActive) { @@ -107,11 +103,8 @@ namespace picongpu } private: - template - void createImage(const Box data, - const MessageHeader::Size2D size, - const MessageHeader header); + void createImage(const Box data, const MessageHeader::Size2D size, const MessageHeader header); std::string m_name; std::string m_folder; @@ -119,7 +112,6 @@ namespace picongpu std::thread workerThread; /* status whether a thread is currently active */ bool m_isThreadActive; - }; } /* namespace picongpu */ diff --git a/include/picongpu/plugins/output/images/PngCreator.tpp b/include/picongpu/plugins/output/images/PngCreator.tpp index 3a9b12ae14..42cd3045db 100644 --- a/include/picongpu/plugins/output/images/PngCreator.tpp +++ b/include/picongpu/plugins/output/images/PngCreator.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -34,90 +34,83 @@ #include #include -#if( PIC_ENABLE_PNG == 1 ) -# include +#if(PIC_ENABLE_PNG == 1) +# include #endif namespace picongpu { - template< class Box > - inline void PngCreator::createImage( - const Box data, - const MessageHeader::Size2D size, - const MessageHeader header - ) + template + inline void PngCreator::createImage(const Box data, const MessageHeader::Size2D size, const MessageHeader header) { -#if( PIC_ENABLE_PNG == 1 ) - if ( m_createFolder ) +#if(PIC_ENABLE_PNG == 1) + if(m_createFolder) { - Environment< simDim >::get( ).Filesystem( ).createDirectoryWithPermissions( m_folder ); + Environment::get().Filesystem().createDirectoryWithPermissions(m_folder); m_createFolder = false; } std::stringstream step; - step << std::setw( 6 ) << std::setfill( '0' ) << header.sim.step; - std::string filename( m_name + "_" + step.str( ) + ".png" ); + step << std::setw(6) << std::setfill('0') << header.sim.step; + std::string filename(m_name + "_" + step.str() + ".png"); - pngwriter png( size.x( ), size.y( ), 0, filename.c_str( ) ); + pngwriter png(size.x(), size.y(), 0, filename.c_str()); /* default compression: 6 * zlib level 1 is ~12% bigger but ~2.3x faster in write_png( ) */ - png.setcompressionlevel( 1 ); + png.setcompressionlevel(1); - //PngWriter coordinate system begin with 1,1 - for( int y = 0; y < size.y( ); ++y) + // PngWriter coordinate system begin with 1,1 + for(int y = 0; y < size.y(); ++y) { - for( int x = 0; x < size.x( ); ++x ) + for(int x = 0; x < size.x(); ++x) { - float3_X p = data[ y ][ x ]; - png.plot( x + 1, size.y( ) - y, p.x( ), p.y( ), p.z( ) ); + float3_X p = data[y][x]; + png.plot(x + 1, size.y() - y, p.x(), p.y(), p.z()); } } /* scale the image by a user defined relative factor * `scale_image` is defined in `png.param` */ - float_X scale_x( scale_image ); - float_X scale_y( scale_image ); + float_X scale_x(scale_image); + float_X scale_y(scale_image); - if( scale_to_cellsize ) + if(scale_to_cellsize) { // scale to real cell size - scale_x *= header.sim.scale[ 0 ]; - scale_y *= header.sim.scale[ 1 ]; + scale_x *= header.sim.scale[0]; + scale_y *= header.sim.scale[1]; } /* to prevent artifacts scale only, if at least one of scale_x and * scale_y is != 1.0 */ - if( ( scale_x != float_X( 1.0 ) ) || - ( scale_y != float_X( 1.0 ) ) - ) - //process the cell size and by factor scaling within one step - png.scale_kxky( scale_x, scale_y ); + if((scale_x != float_X(1.0)) || (scale_y != float_X(1.0))) + // process the cell size and by factor scaling within one step + png.scale_kxky(scale_x, scale_y); // add some meta information - //header.writeToConsole( std::cout ); + // header.writeToConsole( std::cout ); - std::ostringstream description( std::ostringstream::out ); - header.writeToConsole( description ); + std::ostringstream description(std::ostringstream::out); + header.writeToConsole(description); - char title[ ] = "PIConGPU preview image"; - std::string author = Environment<>::get().SimulationDescription().getAuthor( ); - char software[ ] = "PIConGPU with PNGwriter"; + char title[] = "PIConGPU preview image"; + std::string author = Environment<>::get().SimulationDescription().getAuthor(); + char software[] = "PIConGPU with PNGwriter"; - png.settext( title, author.c_str( ), description.str( ).c_str( ), software ); + png.settext(title, author.c_str(), description.str().c_str(), software); // write to disk and close object - png.close( ); + png.close(); #else - boost::ignore_unused( data, size, header ); + boost::ignore_unused(data, size, header); /* always fail with an exception at runtime */ - PMACC_VERIFY_MSG( false, "not allowed to call createImage (missing dependency PNGwriter)" ); + PMACC_VERIFY_MSG(false, "not allowed to call createImage (missing dependency PNGwriter)"); #endif - } } /* namespace picongpu */ diff --git a/include/picongpu/plugins/output/images/Visualisation.hpp b/include/picongpu/plugins/output/images/Visualisation.hpp index df35e5aa4b..61ffadbe6e 100644 --- a/include/picongpu/plugins/output/images/Visualisation.hpp +++ b/include/picongpu/plugins/output/images/Visualisation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, Felix Schmitt * * This file is part of PIConGPU. * @@ -60,278 +60,240 @@ namespace picongpu { - -// normalize EM fields to typical laser or plasma quantities -//-1: Auto: enable adaptive scaling for each output -// 1: Laser: typical fields calculated out of the laser amplitude -// 2: Drift: outdated -// 3: PlWave: typical fields calculated out of the plasma freq., -// assuming the wave moves approx. with c -// 4: Thermal: outdated -// 5: BlowOut: typical fields, assuming that a LWFA in the blowout -// regime causes a bubble with radius of approx. the laser's -// beam waist (use for bubble fields) -/// \return float3_X( tyBField, tyEField, tyCurrent ) - -template< int T > -struct typicalFields -{ - - HDINLINE static float3_X get() + // normalize EM fields to typical laser or plasma quantities + //-1: Auto: enable adaptive scaling for each output + // 1: Laser: typical fields calculated out of the laser amplitude + // 2: Drift: outdated + // 3: PlWave: typical fields calculated out of the plasma freq., + // assuming the wave moves approx. with c + // 4: Thermal: outdated + // 5: BlowOut: typical fields, assuming that a LWFA in the blowout + // regime causes a bubble with radius of approx. the laser's + // beam waist (use for bubble fields) + /// \return float3_X( tyBField, tyEField, tyCurrent ) + + template + struct typicalFields { - return float3_X(float_X(1.0), float_X(1.0), float_X(1.0)); - } -}; - -template< > -struct typicalFields < -1 > -{ + HDINLINE static float3_X get() + { + return float3_X(float_X(1.0), float_X(1.0), float_X(1.0)); + } + }; - HDINLINE static float3_X get() + template<> + struct typicalFields<-1> { - return float3_X(float_X(1.0), float_X(1.0), float_X(1.0)); - } -}; - -template< > -struct typicalFields < 1 > -{ + HDINLINE static float3_X get() + { + return float3_X(float_X(1.0), float_X(1.0), float_X(1.0)); + } + }; - HDINLINE static float3_X get() + template<> + struct typicalFields<1> { + HDINLINE static float3_X get() + { #if !(EM_FIELD_SCALE_CHANNEL1 == 1 || EM_FIELD_SCALE_CHANNEL2 == 1 || EM_FIELD_SCALE_CHANNEL3 == 1) - return float3_X(float_X(1.0), float_X(1.0), float_X(1.0)); + return float3_X(float_X(1.0), float_X(1.0), float_X(1.0)); #else - const float_X tyCurrent = particles::TYPICAL_PARTICLES_PER_CELL * particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE - * abs(BASE_CHARGE) / DELTA_T; - const float_X tyEField = fields::laserProfiles::Selected::Unitless::AMPLITUDE + FLT_MIN; - const float_X tyBField = tyEField * MUE0_EPS0; + constexpr auto baseCharge = BASE_CHARGE; + const float_X tyCurrent = particles::TYPICAL_PARTICLES_PER_CELL + * particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE * math::abs(baseCharge) / DELTA_T; + const float_X tyEField = fields::laserProfiles::Selected::Unitless::AMPLITUDE + FLT_MIN; + const float_X tyBField = tyEField * MUE0_EPS0; - return float3_X(tyBField, tyEField, tyCurrent); + return float3_X(tyBField, tyEField, tyCurrent); #endif - } -}; - + } + }; -/* outdated drift normalization */ -template< > -struct typicalFields < 2 >; -template< > -struct typicalFields < 3 > -{ + /* outdated drift normalization */ + template<> + struct typicalFields<2>; - HDINLINE static float3_X get() + template<> + struct typicalFields<3> { + HDINLINE static float3_X get() + { #if !(EM_FIELD_SCALE_CHANNEL1 == 3 || EM_FIELD_SCALE_CHANNEL2 == 3 || EM_FIELD_SCALE_CHANNEL3 == 3) - return float3_X(float_X(1.0), float_X(1.0), float_X(1.0)); + return float3_X(float_X(1.0), float_X(1.0), float_X(1.0)); #else - const float_X lambda_pl = pmacc::algorithms::math::Pi< float_X >::doubleValue * - SPEED_OF_LIGHT * sqrt(BASE_MASS * EPS0 / BASE_DENSITY / BASE_CHARGE / BASE_CHARGE); - const float_X tyEField = lambda_pl * BASE_DENSITY / 3.0f / EPS0; - const float_X tyBField = tyEField * MUE0_EPS0; - const float_X tyCurrent = tyBField / MUE0; - - return float3_X(tyBField, tyEField, tyCurrent); + constexpr auto baseCharge = BASE_CHARGE; + const float_X lambda_pl = pmacc::math::Pi::doubleValue * SPEED_OF_LIGHT + * sqrt(BASE_MASS * EPS0 / BASE_DENSITY / baseCharge / baseCharge); + const float_X tyEField = lambda_pl * BASE_DENSITY / 3.0f / EPS0; + const float_X tyBField = tyEField * MUE0_EPS0; + const float_X tyCurrent = tyBField / MUE0; + + return float3_X(tyBField, tyEField, tyCurrent); #endif - } -}; - -/* outdated ELECTRON_TEMPERATURE normalization */ -template< > -struct typicalFields < 4 >; + } + }; -template< > -struct typicalFields < 5 > -{ + /* outdated ELECTRON_TEMPERATURE normalization */ + template<> + struct typicalFields<4>; - HDINLINE static float3_X get() + template<> + struct typicalFields<5> { + HDINLINE static float3_X get() + { #if !(EM_FIELD_SCALE_CHANNEL1 == 5 || EM_FIELD_SCALE_CHANNEL2 == 5 || EM_FIELD_SCALE_CHANNEL3 == 5) - return float3_X(float_X(1.0), float_X(1.0), float_X(1.0)); + return float3_X(float_X(1.0), float_X(1.0), float_X(1.0)); #else - const float_X tyEField = fields::laserProfiles::Selected::Unitless::W0 * BASE_DENSITY / 3.0f / EPS0; - const float_X tyBField = tyEField * MUE0_EPS0; - const float_X tyCurrent = particles::TYPICAL_PARTICLES_PER_CELL * particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE - * abs(BASE_CHARGE) / DELTA_T; + constexpr auto baseCharge = BASE_CHARGE; + const float_X tyEField = fields::laserProfiles::Selected::Unitless::W0 * BASE_DENSITY / 3.0f / EPS0; + const float_X tyBField = tyEField * MUE0_EPS0; + const float_X tyCurrent = particles::TYPICAL_PARTICLES_PER_CELL + * particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE * math::abs(baseCharge) / DELTA_T; - return float3_X(tyBField, tyEField, tyCurrent); + return float3_X(tyBField, tyEField, tyCurrent); #endif - } -}; - + } + }; -/** Check if an offset is part of the slicing domain - * - * Check if a N dimensional local domain offset is equal to a scalar offset of - * a given dimension. - * The results can be taken to decide if a cell is within a slice of a volume. - */ -template< uint32_t T_dim = simDim > -struct IsPartOfSlice; -template< > -struct IsPartOfSlice< DIM3 > -{ - /** perform check - * - * @param cellOffset cell offset relative to the origin of the local domain - * @param sliceDim dimension of the slice - * @param localDomainOffset local domain offset relative to the origin of the global domain - * (in the slice dimension) - * @param sliceOffset cell offset of the slice relative to the origin of the global domain - * ( in the slice dimension) - * @return true if cellOffset is part of the slicing domain, else false + /** Check if an offset is part of the slicing domain * - * @return always true + * Check if a N dimensional local domain offset is equal to a scalar offset of + * a given dimension. + * The results can be taken to decide if a cell is within a slice of a volume. */ - template< typename T_Space > - HDINLINE bool operator()( - T_Space const & cellOffset, - uint32_t const sliceDim, - uint32_t const localDomainOffset, - uint32_t const sliceOffset - ) + template + struct IsPartOfSlice; + + template<> + struct IsPartOfSlice { - // offset of the cell relative to the global origin - uint32_t const localCellOffset = cellOffset[ sliceDim ] + localDomainOffset; - return localCellOffset == sliceOffset; - } -}; - -template< > -struct IsPartOfSlice< DIM2 > -{ - /** perform check - * - * @return always true - */ - template< typename T_Space > - HDINLINE bool operator()( - T_Space const &, - uint32_t const, - uint32_t const, - uint32_t const - ) + /** perform check + * + * @param cellOffset cell offset relative to the origin of the local domain + * @param sliceDim dimension of the slice + * @param localDomainOffset local domain offset relative to the origin of the global domain + * (in the slice dimension) + * @param sliceOffset cell offset of the slice relative to the origin of the global domain + * ( in the slice dimension) + * @return true if cellOffset is part of the slicing domain, else false + * + * @return always true + */ + template + HDINLINE bool operator()( + T_Space const& cellOffset, + uint32_t const sliceDim, + uint32_t const localDomainOffset, + uint32_t const sliceOffset) + { + // offset of the cell relative to the global origin + uint32_t const localCellOffset = cellOffset[sliceDim] + localDomainOffset; + return localCellOffset == sliceOffset; + } + }; + + template<> + struct IsPartOfSlice { - return true; - } -}; + /** perform check + * + * @return always true + */ + template + HDINLINE bool operator()(T_Space const&, uint32_t const, uint32_t const, uint32_t const) + { + return true; + } + }; -/** derives two dimensional field from a slice of field - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelPaintFields -{ - /** derive field values + /** derives two dimensional field from a slice of field * - * @tparam T_EBox pmacc::DataBox, electric field box type - * @tparam T_BBox pmacc::DataBox, magnetic field box type - * @tparam T_JBox particle current box type - * @tparam T_Mapping mapper functor type - * @tparam T_Acc alpaka accelerator type - * - * @param acc alpaka accelerator - * @param fieldE electric field - * @param fieldB magnetic field - * @param fieldJ field with particle current - * @param image[in,out] two dimensional image (without guarding cells) - * @param transpose indices to transpose dimensions range per dimension [0,simDim) - * @param slice offset (in cells) of the slice in the dimension sliceDim relative to - * the origin of the global domain - * @param localDomainOffset offset (in cells) of the local domain relative to the - * origin of the global domain - * @param sliceDim dimension to slice range [0,simDim) - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_EBox, - typename T_BBox, - typename T_JBox, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_EBox const fieldE, - T_BBox const fieldB, - T_JBox const fieldJ, - DataBox< - PitchedBox< - float3_X, - DIM2 - > - > image, - DataSpace< DIM2 > const transpose, - int const slice, - uint32_t const localDomainOffset, - uint32_t const sliceDim, - T_Mapping mapper - ) const + template + struct KernelPaintFields { - using namespace mappings::threads; + /** derive field values + * + * @tparam T_EBox pmacc::DataBox, electric field box type + * @tparam T_BBox pmacc::DataBox, magnetic field box type + * @tparam T_JBox particle current box type + * @tparam T_Mapping mapper functor type + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param fieldE electric field + * @param fieldB magnetic field + * @param fieldJ field with particle current + * @param image[in,out] two dimensional image (without guarding cells) + * @param transpose indices to transpose dimensions range per dimension [0,simDim) + * @param slice offset (in cells) of the slice in the dimension sliceDim relative to + * the origin of the global domain + * @param localDomainOffset offset (in cells) of the local domain relative to the + * origin of the global domain + * @param sliceDim dimension to slice range [0,simDim) + * @param mapper functor to map a block to a supercell + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_EBox const fieldE, + T_BBox const fieldB, + T_JBox const fieldJ, + DataBox> image, + DataSpace const transpose, + int const slice, + uint32_t const localDomainOffset, + uint32_t const sliceDim, + T_Mapping mapper) const + { + using namespace mappings::threads; - using SuperCellSize = typename T_Mapping::SuperCellSize; + using SuperCellSize = typename T_Mapping::SuperCellSize; - constexpr uint32_t cellsPerSupercell = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; + constexpr uint32_t cellsPerSupercell = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; - DataSpace< simDim > const suplercellIdx = mapper.getSuperCellIndex( DataSpace< simDim >( blockIdx ) ); - // offset of the supercell (in cells) to the origin of the local domain - DataSpace< simDim > const supercellCellOffset( - ( suplercellIdx - mapper.getGuardingSuperCells( ) ) * SuperCellSize::toRT( ) - ); + DataSpace const suplercellIdx = mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))); + // offset of the supercell (in cells) to the origin of the local domain + DataSpace const supercellCellOffset( + (suplercellIdx - mapper.getGuardingSuperCells()) * SuperCellSize::toRT()); - using SupercellDomCfg = IdxConfig< - cellsPerSupercell, - numWorkers - >; + using SupercellDomCfg = IdxConfig; - // each cell in a supercell is handled as a virtual worker - ForEachIdx< SupercellDomCfg > forEachCell( workerIdx ); + // each cell in a supercell is handled as a virtual worker + ForEachIdx forEachCell(workerIdx); - forEachCell( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { + forEachCell([&](uint32_t const linearIdx, uint32_t const) { // cell index within the superCell - DataSpace< simDim > const cellIdx = DataSpaceOperations< simDim >::template map< SuperCellSize >( linearIdx ); + DataSpace const cellIdx = DataSpaceOperations::template map(linearIdx); // offset to the origin of the local domain + guarding cells - DataSpace< simDim > const cellOffset( suplercellIdx * SuperCellSize::toRT() + cellIdx ); + DataSpace const cellOffset(suplercellIdx * SuperCellSize::toRT() + cellIdx); // cell offset without guarding cells - DataSpace< simDim > const realCell( supercellCellOffset + cellIdx ); + DataSpace const realCell(supercellCellOffset + cellIdx); // offset within the two dimensional result buffer - DataSpace< DIM2 > const imageCell( - realCell[ transpose.x( ) ], - realCell[ transpose.y( ) ] - ); - - bool const isCellOnSlice = IsPartOfSlice< >{}( - realCell, - sliceDim, - localDomainOffset, - slice - ); + DataSpace const imageCell(realCell[transpose.x()], realCell[transpose.y()]); + + bool const isCellOnSlice = IsPartOfSlice<>{}(realCell, sliceDim, localDomainOffset, slice); /* if the virtual worker is not calculating a cell out of the * selected slice then exit */ - if( !isCellOnSlice ) + if(!isCellOnSlice) return; // set fields of this cell to vars - typename T_BBox::ValueType field_b = fieldB( cellOffset ); - typename T_EBox::ValueType field_e = fieldE( cellOffset ); - typename T_JBox::ValueType field_j = fieldJ( cellOffset ); + typename T_BBox::ValueType field_b = fieldB(cellOffset); + typename T_EBox::ValueType field_e = fieldE(cellOffset); + typename T_JBox::ValueType field_j = fieldJ(cellOffset); // multiply with the area size of each plane - field_j *= float3_X::create( CELL_VOLUME ) / cellSize; + field_j *= float3_X::create(CELL_VOLUME) / cellSize; /* reset picture to black * color range for each RGB channel: [0.0, 1.0] @@ -342,303 +304,202 @@ struct KernelPaintFields * [1] = EField normalization, [2] = Current normalization */ visPreview::preChannel1( - field_b / typicalFields< EM_FIELD_SCALE_CHANNEL1 >::get( )[ 0 ], - field_e / typicalFields< EM_FIELD_SCALE_CHANNEL1 >::get( )[ 1 ], - field_j / typicalFields< EM_FIELD_SCALE_CHANNEL1 >::get( )[ 2 ] - ), + field_b / typicalFields::get()[0], + field_e / typicalFields::get()[1], + field_j / typicalFields::get()[2]), visPreview::preChannel2( - field_b / typicalFields< EM_FIELD_SCALE_CHANNEL2 >::get( )[ 0 ], - field_e / typicalFields< EM_FIELD_SCALE_CHANNEL2 >::get( )[ 1 ], - field_j / typicalFields< EM_FIELD_SCALE_CHANNEL2 >::get( )[ 2 ] - ), + field_b / typicalFields::get()[0], + field_e / typicalFields::get()[1], + field_j / typicalFields::get()[2]), visPreview::preChannel3( - field_b / typicalFields< EM_FIELD_SCALE_CHANNEL3 >::get( )[ 0 ], - field_e / typicalFields< EM_FIELD_SCALE_CHANNEL3 >::get( )[ 1 ], - field_j / typicalFields< EM_FIELD_SCALE_CHANNEL3 >::get( )[ 2 ] - ) - ); + field_b / typicalFields::get()[0], + field_e / typicalFields::get()[1], + field_j / typicalFields::get()[2])); // draw to (perhaps smaller) image cell - image( imageCell ) = pic; - } - ); - } -}; + image(imageCell) = pic; + }); + } + }; -/** derives two dimensional field from a particle slice - * - * The shape of a particle is not taken in account. - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelPaintParticles3D -{ - /** derive particle values + /** derives two dimensional field from a particle slice * - * @tparam T_ParBox pmacc::ParticlesBox, particle box type - * @tparam T_Mapping mapper functor type - * @tparam T_Acc alpaka accelerator type + * The shape of a particle is not taken in account. * - * @param acc alpaka accelerator - * @param pb particle memory - * @param image[in,out] two dimensional image (without guarding cells) - * @param transpose indices to transpose dimensions range per dimension [0,simDim) - * @param slice offset (in cells) of the slice in the dimension sliceDim relative to - * the origin of the global domain - * @param localDomainOffset offset (in cells) of the local domain relative to the - * origin of the global domain - * @param sliceDim dimension to slice range [0,simDim) - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_ParBox, - typename T_Mapping, - typename T_Acc - > - DINLINE void - operator()( - T_Acc const & acc, - T_ParBox pb, - DataBox< - PitchedBox< - float3_X, - DIM2 - > - > image, - DataSpace< DIM2 > const transpose, - int const slice, - uint32_t const localDomainOffset, - uint32_t const sliceDim, - T_Mapping mapper - ) const + template + struct KernelPaintParticles3D { - using namespace mappings::threads; + /** derive particle values + * + * @tparam T_ParBox pmacc::ParticlesBox, particle box type + * @tparam T_Mapping mapper functor type + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param pb particle memory + * @param image[in,out] two dimensional image (without guarding cells) + * @param transpose indices to transpose dimensions range per dimension [0,simDim) + * @param slice offset (in cells) of the slice in the dimension sliceDim relative to + * the origin of the global domain + * @param localDomainOffset offset (in cells) of the local domain relative to the + * origin of the global domain + * @param sliceDim dimension to slice range [0,simDim) + * @param mapper functor to map a block to a supercell + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_ParBox pb, + DataBox> image, + DataSpace const transpose, + int const slice, + uint32_t const localDomainOffset, + uint32_t const sliceDim, + T_Mapping mapper) const + { + using namespace mappings::threads; - using SuperCellSize = typename T_Mapping::SuperCellSize; + using SuperCellSize = typename T_Mapping::SuperCellSize; - constexpr uint32_t numParticlesPerFrame = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numCellsPerSupercell = numParticlesPerFrame; - constexpr uint32_t numWorkers = T_numWorkers; + constexpr uint32_t numParticlesPerFrame = pmacc::math::CT::volume::type::value; + constexpr uint32_t numCellsPerSupercell = numParticlesPerFrame; + constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; - using ParticleDomCfg = IdxConfig< - numParticlesPerFrame, - numWorkers - >; + using ParticleDomCfg = IdxConfig; - using SupercellDomCfg = IdxConfig< - numCellsPerSupercell, - numWorkers - >; + using SupercellDomCfg = IdxConfig; - ForEachIdx< - IdxConfig< - 1, - numWorkers - > - > onlyMaster{ workerIdx }; + ForEachIdx> onlyMaster{workerIdx}; - // each virtual worker works on a cell in the supercell - ForEachIdx< SupercellDomCfg > forEachCell( workerIdx ); + // each virtual worker works on a cell in the supercell + ForEachIdx forEachCell(workerIdx); - /* is 1 if a offset of a cell in the supercell is equal the slice (offset) - * else 0 - */ - PMACC_SMEM( - acc, - superCellParticipate, - int - ); - - /* true if the virtual worker is processing a pixel within the resulting image, - * else false - */ - memory::CtxArray< - bool, - SupercellDomCfg - > isImageThreadCtx( false ); - - DataSpace< simDim > const suplercellIdx = mapper.getSuperCellIndex(DataSpace (blockIdx)); - // offset of the supercell (in cells) to the origin of the local domain - DataSpace< simDim > const supercellCellOffset( - ( suplercellIdx - mapper.getGuardingSuperCells( ) ) * SuperCellSize::toRT( ) - ); - - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - superCellParticipate = 0; - } - ); + /* is 1 if a offset of a cell in the supercell is equal the slice (offset) + * else 0 + */ + PMACC_SMEM(acc, superCellParticipate, int); - __syncthreads(); + /* true if the virtual worker is processing a pixel within the resulting image, + * else false + */ + memory::CtxArray isImageThreadCtx(false); - forEachCell( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { + DataSpace const suplercellIdx = mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))); + // offset of the supercell (in cells) to the origin of the local domain + DataSpace const supercellCellOffset( + (suplercellIdx - mapper.getGuardingSuperCells()) * SuperCellSize::toRT()); + + onlyMaster([&](uint32_t const, uint32_t const) { superCellParticipate = 0; }); + + cupla::__syncthreads(acc); + + forEachCell([&](uint32_t const linearIdx, uint32_t const idx) { // cell index within the superCell - DataSpace< simDim > const cellIdx = DataSpaceOperations< simDim >::template map< SuperCellSize >( linearIdx ); + DataSpace const cellIdx = DataSpaceOperations::template map(linearIdx); // cell offset to origin of the local domain - DataSpace< simDim > const realCell( supercellCellOffset + cellIdx ); + DataSpace const realCell(supercellCellOffset + cellIdx); - bool const isCellOnSlice = IsPartOfSlice< >{}( - realCell, - sliceDim, - localDomainOffset, - slice - ); + bool const isCellOnSlice = IsPartOfSlice<>{}(realCell, sliceDim, localDomainOffset, slice); - if( isCellOnSlice ) + if(isCellOnSlice) { // atomic avoids: WAW Error in cuda-memcheck racecheck - nvidia::atomicAllExch( - acc, - &superCellParticipate, - 1, - ::alpaka::hierarchy::Threads{ } - ); - isImageThreadCtx[ idx ] = true; + nvidia::atomicAllExch(acc, &superCellParticipate, 1, ::alpaka::hierarchy::Threads{}); + isImageThreadCtx[idx] = true; } - } - ); - - __syncthreads(); - - if( superCellParticipate == 0 ) - return; - - // slice is always two dimensional - using SharedMem = DataBox< - PitchedBox< - float_X, - DIM2 - > - >; - - sharedMemExtern( - shBlock, - float_X - ); - - // shared memory box for particle counter - SharedMem counter( - PitchedBox< - float_X, - DIM2 - >( - ( float_X* ) shBlock, - DataSpace< DIM2 > (), + }); + + cupla::__syncthreads(acc); + + if(superCellParticipate == 0) + return; + + // slice is always two dimensional + using SharedMem = DataBox>; + + sharedMemExtern(shBlock, float_X); + + // shared memory box for particle counter + SharedMem counter(PitchedBox( + (float_X*) shBlock, + DataSpace(), // pitch in byte - SuperCellSize::toRT( )[ transpose.x() ] * sizeof( float_X ) - ) - ); - - forEachCell( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { + SuperCellSize::toRT()[transpose.x()] * sizeof(float_X))); + + forEachCell([&](uint32_t const linearIdx, uint32_t const idx) { /* cell index within the superCell */ - DataSpace< simDim > const cellIdx = DataSpaceOperations< simDim >::template map< SuperCellSize >( linearIdx ); + DataSpace const cellIdx = DataSpaceOperations::template map(linearIdx); - DataSpace< DIM2 > const localCell( - cellIdx[ transpose.x() ], - cellIdx[ transpose.y() ] - ); + DataSpace const localCell(cellIdx[transpose.x()], cellIdx[transpose.y()]); - if( isImageThreadCtx[ idx ] ) + if(isImageThreadCtx[idx]) { - counter( localCell ) = float_X(0.0); + counter(localCell) = float_X(0.0); } - } - ); + }); - // wait that shared memory is set to zero - __syncthreads(); + // wait that shared memory is set to zero + cupla::__syncthreads(acc); - using FramePtr = typename T_ParBox::FramePtr; - FramePtr frame = pb.getFirstFrame( suplercellIdx ); + using FramePtr = typename T_ParBox::FramePtr; + FramePtr frame = pb.getFirstFrame(suplercellIdx); - // each virtual worker works on a particle in the frame - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); + // each virtual worker works on a particle in the frame + ForEachIdx forEachParticle(workerIdx); - while( frame.isValid( ) ) - { - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - auto particle = frame[ linearIdx ] ; - if( particle[ multiMask_ ] == 1) + while(frame.isValid()) + { + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + auto particle = frame[linearIdx]; + if(particle[multiMask_] == 1) { - int const linearCellIdx = particle[ localCellIdx_ ]; + int const linearCellIdx = particle[localCellIdx_]; // we only draw the first slice of cells in the super cell (z == 0) - DataSpace< simDim > const particleCellOffset( - DataSpaceOperations< simDim >::template map< SuperCellSize >( linearCellIdx ) - ); - bool const isParticleOnSlice = IsPartOfSlice< >{}( + DataSpace const particleCellOffset( + DataSpaceOperations::template map(linearCellIdx)); + bool const isParticleOnSlice = IsPartOfSlice<>{}( particleCellOffset + supercellCellOffset, sliceDim, localDomainOffset, - slice - ); - if( isParticleOnSlice ) + slice); + if(isParticleOnSlice) { - DataSpace< DIM2 > const reducedCell( - particleCellOffset[ transpose.x( ) ], - particleCellOffset[ transpose.y( ) ] - ); - atomicAdd( - &( counter( reducedCell ) ), + DataSpace const reducedCell( + particleCellOffset[transpose.x()], + particleCellOffset[transpose.y()]); + cupla::atomicAdd( + acc, + &(counter(reducedCell)), // normalize the value to avoid bad precision for large macro particle weightings - particle[ weighting_ ] / particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE, - ::alpaka::hierarchy::Threads{ } - ); + particle[weighting_] / particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE, + ::alpaka::hierarchy::Threads{}); } } - } - ); + }); - frame = pb.getNextFrame(frame); - } + frame = pb.getNextFrame(frame); + } - // wait that all worker finsihed the reduce operation - __syncthreads(); + // wait that all worker finsihed the reduce operation + cupla::__syncthreads(acc); - forEachCell( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if( isImageThreadCtx[ idx ] ) + forEachCell([&](uint32_t const linearIdx, uint32_t const idx) { + if(isImageThreadCtx[idx]) { // cell index within the superCell - DataSpace< simDim > const cellIdx = DataSpaceOperations< simDim >::template map< SuperCellSize >( linearIdx ); + DataSpace const cellIdx + = DataSpaceOperations::template map(linearIdx); // cell offset to origin of the local domain - DataSpace< simDim > const realCell( supercellCellOffset + cellIdx ); + DataSpace const realCell(supercellCellOffset + cellIdx); // index in image - DataSpace< DIM2 > const imageCell( - realCell[ transpose.x( ) ], - realCell[ transpose.y( ) ] - ); + DataSpace const imageCell(realCell[transpose.x()], realCell[transpose.y()]); - DataSpace< DIM2 > const localCell( - cellIdx[ transpose.x( ) ], - cellIdx[ transpose.y( ) ] - ); + DataSpace const localCell(cellIdx[transpose.x()], cellIdx[transpose.y()]); /** Note: normally, we would multiply by particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE again. * BUT: since we are interested in a simple value between 0 and 1, @@ -646,482 +507,396 @@ struct KernelPaintParticles3D * particles) and devide by the number of typical macro particles * per cell */ - float_X value = counter( localCell ) / - float_X( particles::TYPICAL_PARTICLES_PER_CELL ); - if( value > 1.0 ) + float_X value = counter(localCell) / float_X(particles::TYPICAL_PARTICLES_PER_CELL); + if(value > 1.0) value = 1.0; visPreview::preParticleDensCol::addRGB( - image( imageCell ), + image(imageCell), value, - visPreview::preParticleDens_opacity - ); + visPreview::preParticleDens_opacity); // cut to [0, 1] - for( uint32_t d = 0; d < DIM3; ++d ) + for(uint32_t d = 0; d < DIM3; ++d) { - if( image( imageCell )[ d ] < float_X( 0.0 ) ) - image( imageCell )[ d ] = float_X( 0.0 ); - if( image( imageCell )[ d ] > float_X( 1.0 ) ) - image( imageCell )[ d ] = float_X( 1.0 ); + if(image(imageCell)[d] < float_X(0.0)) + image(imageCell)[d] = float_X(0.0); + if(image(imageCell)[d] > float_X(1.0)) + image(imageCell)[d] = float_X(1.0); } } - } - ); - } -}; - -namespace vis_kernels -{ + }); + } + }; -/** divide each cell by a value - * - * @tparam T_numWorkers number of workers - * @tparam T_blockSize number of elements which will be handled - * within a kernel block - */ -template< - uint32_t T_numWorkers, - uint32_t T_blockSize -> -struct DivideAnyCell -{ - /** derive particle values - * - * @tparam T_Mem pmacc::DataBox, type of the on dimensional memory - * @tparam T_Type divisor type - * @tparam T_Acc alpaka accelerator type - * - * @param acc alpaka accelerator - * @param mem memory[in,out] to manipulate, must provide the `operator[](int)` - * @param n number of elements in mem - * @param divisor divisor for the division - */ - template< - typename T_Mem, - typename T_Type, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_Mem mem, - uint32_t n, - T_Type divisor - ) const + namespace vis_kernels { - using namespace mappings::threads; + /** divide each cell by a value + * + * @tparam T_numWorkers number of workers + * @tparam T_blockSize number of elements which will be handled + * within a kernel block + */ + template + struct DivideAnyCell + { + /** derive particle values + * + * @tparam T_Mem pmacc::DataBox, type of the on dimensional memory + * @tparam T_Type divisor type + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param mem memory[in,out] to manipulate, must provide the `operator[](int)` + * @param n number of elements in mem + * @param divisor divisor for the division + */ + template + DINLINE void operator()(T_Acc const& acc, T_Mem mem, uint32_t n, T_Type divisor) const + { + using namespace mappings::threads; - constexpr uint32_t numWorkers = T_numWorkers; + constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; - using SupercellDomCfg = IdxConfig< - T_blockSize, - numWorkers - >; - // each virtual worker works on a cell - ForEachIdx< SupercellDomCfg > forEachCell( workerIdx ); + using SupercellDomCfg = IdxConfig; + // each virtual worker works on a cell + ForEachIdx forEachCell(workerIdx); - forEachCell( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - uint32_t tid = blockIdx.x * T_blockSize + linearIdx; - if( tid >= n ) - return; + forEachCell([&](uint32_t const linearIdx, uint32_t const) { + uint32_t tid = cupla::blockIdx(acc).x * T_blockSize + linearIdx; + if(tid >= n) + return; - float3_X const FLT3_MIN = float3_X::create( FLT_MIN ); - mem[ tid ] /= ( divisor + FLT3_MIN ); + float3_X const FLT3_MIN = float3_X::create(FLT_MIN); + mem[tid] /= (divisor + FLT3_MIN); + }); } - ); - } -}; + }; -/** convert channel value to an RGB color - * - * @tparam T_numWorkers number of workers - * @tparam T_blockSize number of elements which will be handled - * within a kernel block - */ -template< - uint32_t T_numWorkers, - uint32_t T_blockSize -> -struct ChannelsToRGB -{ - /** convert each element to an RGB color - * - * @tparam T_Mem pmacc::DataBox, type of the on dimensional memory - * @tparam T_Acc alpaka accelerator type - * - * @param acc alpaka accelerator - * @param mem memory[in,out] to manipulate, must provide the `operator[](int)` - * @param n number of elements in mem - */ - template< - typename T_Mem, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_Mem mem, - uint32_t n - ) const - { - using namespace mappings::threads; + /** convert channel value to an RGB color + * + * @tparam T_numWorkers number of workers + * @tparam T_blockSize number of elements which will be handled + * within a kernel block + */ + template + struct ChannelsToRGB + { + /** convert each element to an RGB color + * + * @tparam T_Mem pmacc::DataBox, type of the on dimensional memory + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param mem memory[in,out] to manipulate, must provide the `operator[](int)` + * @param n number of elements in mem + */ + template + DINLINE void operator()(T_Acc const& acc, T_Mem mem, uint32_t n) const + { + using namespace mappings::threads; - constexpr uint32_t numWorkers = T_numWorkers; + constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; - using SupercellDomCfg = IdxConfig< - T_blockSize, - numWorkers - >; - // each virtual worker works on a cell - ForEachIdx< SupercellDomCfg > forEachCell( workerIdx ); + using SupercellDomCfg = IdxConfig; + // each virtual worker works on a cell + ForEachIdx forEachCell(workerIdx); - forEachCell( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - uint32_t const tid = blockIdx.x * T_blockSize + linearIdx; - if( tid >= n ) - return; + forEachCell([&](uint32_t const linearIdx, uint32_t const) { + uint32_t const tid = cupla::blockIdx(acc).x * T_blockSize + linearIdx; + if(tid >= n) + return; + + float3_X rgb(float3_X::create(0.0)); - float3_X rgb(float3_X::create(0.0)); - - visPreview::preChannel1Col::addRGB( - rgb, - mem[ tid ].x( ), - visPreview::preChannel1_opacity - ); - visPreview::preChannel2Col::addRGB( - rgb, - mem[ tid ].y( ), - visPreview::preChannel2_opacity - ); - visPreview::preChannel3Col::addRGB( - rgb, - mem[ tid ].z( ), - visPreview::preChannel3_opacity - ); - mem[ tid ] = rgb; + visPreview::preChannel1Col::addRGB(rgb, mem[tid].x(), visPreview::preChannel1_opacity); + visPreview::preChannel2Col::addRGB(rgb, mem[tid].y(), visPreview::preChannel2_opacity); + visPreview::preChannel3Col::addRGB(rgb, mem[tid].z(), visPreview::preChannel3_opacity); + mem[tid] = rgb; + }); } - ); - } -}; + }; -} + } // namespace vis_kernels -/** - * Visualizes simulation data by writing png files. - * Visulization is performed in an additional thread. - */ -template -class Visualisation : public ILightweightPlugin -{ -private: - typedef MappingDesc::SuperCellSize SuperCellSize; - - -public: - using FrameType = typename ParticlesType::FrameType; - using CreatorType = Output; - - Visualisation(std::string name, Output output, std::string notifyPeriod, DataSpace transpose, float_X slicePoint) : - m_output(output), - pluginName(name), - cellDescription(nullptr), - particleTag(ParticlesType::FrameType::getName()), - m_notifyPeriod(notifyPeriod), - m_transpose(transpose), - m_slicePoint(slicePoint), - isMaster(false), - header(nullptr), - reduce(1024), - img(nullptr) + /** + * Visualizes simulation data by writing png files. + * Visulization is performed in an additional thread. + */ + template + class Visualisation : public ILightweightPlugin { - sliceDim = 0; - if (m_transpose.x() == 0 || m_transpose.y() == 0) - sliceDim = 1; - if ((m_transpose.x() == 1 || m_transpose.y() == 1) && sliceDim == 1) - sliceDim = 2; + private: + typedef MappingDesc::SuperCellSize SuperCellSize; + + + public: + using FrameType = typename ParticlesType::FrameType; + using CreatorType = Output; + + Visualisation( + std::string name, + Output output, + std::string notifyPeriod, + DataSpace transpose, + float_X slicePoint) + : m_output(output) + , pluginName(name) + , cellDescription(nullptr) + , particleTag(ParticlesType::FrameType::getName()) + , m_notifyPeriod(notifyPeriod) + , m_transpose(transpose) + , m_slicePoint(slicePoint) + , isMaster(false) + , header(nullptr) + , reduce(1024) + , img(nullptr) + { + sliceDim = 0; + if(m_transpose.x() == 0 || m_transpose.y() == 0) + sliceDim = 1; + if((m_transpose.x() == 1 || m_transpose.y() == 1) && sliceDim == 1) + sliceDim = 2; + + Environment<>::get().PluginConnector().registerPlugin(this); + Environment<>::get().PluginConnector().setNotificationPeriod(this, m_notifyPeriod); + } - Environment<>::get().PluginConnector().registerPlugin(this); - Environment<>::get().PluginConnector().setNotificationPeriod(this, m_notifyPeriod); - } + virtual ~Visualisation() + { + /* wait that shared buffers can destroyed */ + m_output.join(); + if(!m_notifyPeriod.empty()) + { + __delete(img); + MessageHeader::destroy(header); + } + } - virtual ~Visualisation() - { - /* wait that shared buffers can destroyed */ - m_output.join(); - if(!m_notifyPeriod.empty()) + std::string pluginGetName() const { - __delete(img); - MessageHeader::destroy(header); + return "Visualisation"; } - } - std::string pluginGetName() const - { - return "Visualisation"; - } + void notify(uint32_t currentStep) + { + PMACC_ASSERT(cellDescription != nullptr); + const DataSpace localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); + Window window(MovingWindow::getInstance().getWindow(currentStep)); - void notify(uint32_t currentStep) - { - PMACC_ASSERT(cellDescription != nullptr); - const DataSpace localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); - Window window(MovingWindow::getInstance().getWindow(currentStep)); + /*sliceOffset is only used in 3D*/ + sliceOffset = (int) ((float_32)(window.globalDimensions.size[sliceDim]) * m_slicePoint) + + window.globalDimensions.offset[sliceDim]; - /*sliceOffset is only used in 3D*/ - sliceOffset = (int) ((float_32) (window.globalDimensions.size[sliceDim]) * m_slicePoint) + window.globalDimensions.offset[sliceDim]; + if(!doDrawing()) + { + return; + } + createImage(currentStep, window); + } - if (!doDrawing()) + void setMappingDescription(MappingDesc* cellDescription) { - return; + PMACC_ASSERT(cellDescription != nullptr); + this->cellDescription = cellDescription; } - createImage(currentStep, window); - } - void setMappingDescription(MappingDesc *cellDescription) - { - PMACC_ASSERT(cellDescription != nullptr); - this->cellDescription = cellDescription; - } + void createImage(uint32_t currentStep, Window window) + { + DataConnector& dc = Environment<>::get().DataConnector(); + // Data does not need to be synchronized as visualization is + // done at the device. + auto fieldB = dc.get(FieldB::getName(), true); + auto fieldE = dc.get(FieldE::getName(), true); + auto fieldJ = dc.get(FieldJ::getName(), true); + auto particles = dc.get(particleTag, true); - void createImage(uint32_t currentStep, Window window) - { - DataConnector &dc = Environment<>::get().DataConnector(); - // Data does not need to be synchronized as visualization is - // done at the device. - auto fieldB = dc.get< FieldB >( FieldB::getName(), true ); - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); - auto fieldJ = dc.get< FieldJ >( FieldJ::getName(), true ); - auto particles = dc.get< ParticlesType >( particleTag, true ); - - /* wait that shared buffers can accessed without conflicts */ - m_output.join(); - - uint32_t localDomainOffset = 0; - if( simDim == DIM3 ) - localDomainOffset = Environment::get().SubGrid().getLocalDomain().offset[ sliceDim ]; - - constexpr uint32_t cellsPerSupercell = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - cellsPerSupercell - >::value; - - PMACC_ASSERT(cellDescription != nullptr); - - AreaMapping< - CORE + BORDER, - MappingDesc - > mapper( *cellDescription ); - - //create image fields - PMACC_KERNEL( KernelPaintFields< numWorkers >{} )( - mapper.getGridDim(), - numWorkers - )( - fieldE->getDeviceDataBox(), - fieldB->getDeviceDataBox(), - fieldJ->getDeviceDataBox(), - img->getDeviceBuffer().getDataBox(), - m_transpose, - sliceOffset, - localDomainOffset, - sliceDim, - mapper - ); - - // find maximum for img.x()/y and z and return it as float3_X - int elements = img->getGridLayout().getDataSpace().productOfComponents(); - - //Add one dimension access to 2d DataBox - typedef DataBoxDim1Access::DataBoxType> D1Box; - D1Box d1access(img->getDeviceBuffer().getDataBox(), img->getGridLayout().getDataSpace()); - -#if (EM_FIELD_SCALE_CHANNEL1 == -1 || EM_FIELD_SCALE_CHANNEL2 == -1 || EM_FIELD_SCALE_CHANNEL3 == -1) - //reduce with functor max - float3_X max = reduce(nvidia::functors::Max(), - d1access, - elements); - //reduce with functor min - //float3_X min = reduce(nvidia::functors::Min(), - // d1access, - // elements); -#if (EM_FIELD_SCALE_CHANNEL1 != -1 ) - max.x() = float_X(1.0); -#endif -#if (EM_FIELD_SCALE_CHANNEL2 != -1 ) - max.y() = float_X(1.0); -#endif -#if (EM_FIELD_SCALE_CHANNEL3 != -1 ) - max.z() = float_X(1.0); -#endif + /* wait that shared buffers can accessed without conflicts */ + m_output.join(); - /* We don't know the size of the supercell plane at compile time - * (because of the runtime dimension selection in any plugin), - * thus we must use a one dimension kernel and no mapper - */ - PMACC_KERNEL( - vis_kernels::DivideAnyCell< - numWorkers, - cellsPerSupercell - >{ } - )( - ( elements + cellsPerSupercell - 1u ) / cellsPerSupercell, - numWorkers - )( - d1access, - elements, - max - ); + uint32_t localDomainOffset = 0; + if(simDim == DIM3) + localDomainOffset = Environment::get().SubGrid().getLocalDomain().offset[sliceDim]; + + constexpr uint32_t cellsPerSupercell = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers::value; + + PMACC_ASSERT(cellDescription != nullptr); + + AreaMapping mapper(*cellDescription); + + // create image fields + PMACC_KERNEL(KernelPaintFields{}) + (mapper.getGridDim(), numWorkers)( + fieldE->getDeviceDataBox(), + fieldB->getDeviceDataBox(), + fieldJ->getDeviceDataBox(), + img->getDeviceBuffer().getDataBox(), + m_transpose, + sliceOffset, + localDomainOffset, + sliceDim, + mapper); + + // find maximum for img.x()/y and z and return it as float3_X + int elements = img->getGridLayout().getDataSpace().productOfComponents(); + + // Add one dimension access to 2d DataBox + typedef DataBoxDim1Access::DataBoxType> D1Box; + D1Box d1access(img->getDeviceBuffer().getDataBox(), img->getGridLayout().getDataSpace()); + +#if(EM_FIELD_SCALE_CHANNEL1 == -1 || EM_FIELD_SCALE_CHANNEL2 == -1 || EM_FIELD_SCALE_CHANNEL3 == -1) + // reduce with functor max + float3_X max = reduce(nvidia::functors::Max(), d1access, elements); + // reduce with functor min + // float3_X min = reduce(nvidia::functors::Min(), + // d1access, + // elements); +# if(EM_FIELD_SCALE_CHANNEL1 != -1) + max.x() = float_X(1.0); +# endif +# if(EM_FIELD_SCALE_CHANNEL2 != -1) + max.y() = float_X(1.0); +# endif +# if(EM_FIELD_SCALE_CHANNEL3 != -1) + max.z() = float_X(1.0); +# endif + + /* We don't know the size of the supercell plane at compile time + * (because of the runtime dimension selection in any plugin), + * thus we must use a one dimension kernel and no mapper + */ + PMACC_KERNEL(vis_kernels::DivideAnyCell{}) + ((elements + cellsPerSupercell - 1u) / cellsPerSupercell, numWorkers)(d1access, elements, max); #endif - // convert channels to RGB - PMACC_KERNEL( - vis_kernels::ChannelsToRGB< - numWorkers, - cellsPerSupercell - >{ } - )( - ( elements + cellsPerSupercell - 1u ) / cellsPerSupercell, - numWorkers - )( - d1access, - elements - ); - - // add density color channel - DataSpace blockSize(MappingDesc::SuperCellSize::toRT()); - DataSpace blockSize2D(blockSize[m_transpose.x()], blockSize[m_transpose.y()]); - - //create image particles - PMACC_KERNEL( KernelPaintParticles3D< numWorkers >{} )( - mapper.getGridDim(), - numWorkers, - blockSize2D.productOfComponents() * sizeof( float_X ) - )( - particles->getDeviceParticlesBox(), - img->getDeviceBuffer().getDataBox(), - m_transpose, - sliceOffset, - localDomainOffset, - sliceDim, - mapper - ); - - // send the RGB image back to host - img->deviceToHost(); - - - header->update(*cellDescription, window, m_transpose, currentStep); - - - __getTransactionEvent().waitForFinished(); //wait for copy picture - - DataSpace size = img->getGridLayout().getDataSpace(); - - auto hostBox = img->getHostBuffer().getDataBox(); - - if (picongpu::white_box_per_GPU) - { - hostBox[0 ][0 ] = float3_X(1.0, 1.0, 1.0); - hostBox[size.y() - 1 ][0 ] = float3_X(1.0, 1.0, 1.0); - hostBox[0 ][size.x() - 1] = float3_X(1.0, 1.0, 1.0); - hostBox[size.y() - 1 ][size.x() - 1] = float3_X(1.0, 1.0, 1.0); - } - auto resultBox = gather(hostBox, *header); - if (isMaster) - { - m_output(resultBox.shift(header->window.offset), header->window.size, *header); - } + // convert channels to RGB + PMACC_KERNEL(vis_kernels::ChannelsToRGB{}) + ((elements + cellsPerSupercell - 1u) / cellsPerSupercell, numWorkers)(d1access, elements); - } + // add density color channel + DataSpace blockSize(MappingDesc::SuperCellSize::toRT()); + DataSpace blockSize2D(blockSize[m_transpose.x()], blockSize[m_transpose.y()]); - void init() - { - if(!m_notifyPeriod.empty()) - { - PMACC_ASSERT(cellDescription != nullptr); - const DataSpace localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); + // create image particles + PMACC_KERNEL(KernelPaintParticles3D{}) + (mapper.getGridDim(), numWorkers, blockSize2D.productOfComponents() * sizeof(float_X))( + particles->getDeviceParticlesBox(), + img->getDeviceBuffer().getDataBox(), + m_transpose, + sliceOffset, + localDomainOffset, + sliceDim, + mapper); + + // send the RGB image back to host + img->deviceToHost(); - Window window(MovingWindow::getInstance().getWindow(0)); - sliceOffset = (int) ((float_32) (window.globalDimensions.size[sliceDim]) * m_slicePoint) + window.globalDimensions.offset[sliceDim]; + header->update(*cellDescription, window, m_transpose, currentStep); - const DataSpace gpus = Environment::get().GridController().getGpuNodes(); - float_32 cellSizeArr[3] = {0, 0, 0}; - for (uint32_t i = 0; i < simDim; ++i) - cellSizeArr[i] = cellSize[i]; + __getTransactionEvent().waitForFinished(); // wait for copy picture - header = MessageHeader::create(); - header->update(*cellDescription, window, m_transpose, 0, cellSizeArr, gpus); + DataSpace size = img->getGridLayout().getDataSpace(); - bool isDrawing = doDrawing(); - isMaster = gather.init(isDrawing); - reduce.participate(isDrawing); + auto hostBox = img->getHostBuffer().getDataBox(); - /* create memory for the local picture if the gpu participate on the visualization */ - if(isDrawing) - img = new GridBuffer (header->node.maxSize); + if(picongpu::white_box_per_GPU) + { + hostBox[0][0] = float3_X(1.0, 1.0, 1.0); + hostBox[size.y() - 1][0] = float3_X(1.0, 1.0, 1.0); + hostBox[0][size.x() - 1] = float3_X(1.0, 1.0, 1.0); + hostBox[size.y() - 1][size.x() - 1] = float3_X(1.0, 1.0, 1.0); + } + auto resultBox = gather(hostBox, *header); + if(isMaster) + { + m_output(resultBox.shift(header->window.offset), header->window.size, *header); + } } - } - void pluginRegisterHelp(po::options_description& desc) - { - // nothing to do here - } + void init() + { + if(!m_notifyPeriod.empty()) + { + PMACC_ASSERT(cellDescription != nullptr); + const DataSpace localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); -private: + Window window(MovingWindow::getInstance().getWindow(0)); + sliceOffset = (int) ((float_32)(window.globalDimensions.size[sliceDim]) * m_slicePoint) + + window.globalDimensions.offset[sliceDim]; - bool doDrawing() - { - PMACC_ASSERT(cellDescription != nullptr); - const DataSpace globalRootCellPos(Environment::get().SubGrid().getLocalDomain().offset); -#if(SIMDIM==DIM3) - const bool tmp = globalRootCellPos[sliceDim] + Environment::get().SubGrid().getLocalDomain().size[sliceDim] > sliceOffset && - globalRootCellPos[sliceDim] <= sliceOffset; - return tmp; -#else - return true; -#endif - } + const DataSpace gpus = Environment::get().GridController().getGpuNodes(); + + float_32 cellSizeArr[3] = {0, 0, 0}; + for(uint32_t i = 0; i < simDim; ++i) + cellSizeArr[i] = cellSize[i]; + + header = MessageHeader::create(); + header->update(*cellDescription, window, m_transpose, 0, cellSizeArr, gpus); - MappingDesc *cellDescription; - SimulationDataId particleTag; + bool isDrawing = doDrawing(); + isMaster = gather.init(isDrawing); + reduce.participate(isDrawing); + + /* create memory for the local picture if the gpu participate on the visualization */ + if(isDrawing) + img = new GridBuffer(header->node.maxSize); + } + } + + void pluginRegisterHelp(po::options_description& desc) + { + // nothing to do here + } + + private: + bool doDrawing() + { + PMACC_ASSERT(cellDescription != nullptr); + const DataSpace globalRootCellPos(Environment::get().SubGrid().getLocalDomain().offset); +#if(SIMDIM == DIM3) + const bool tmp + = globalRootCellPos[sliceDim] + Environment::get().SubGrid().getLocalDomain().size[sliceDim] + > sliceOffset + && globalRootCellPos[sliceDim] <= sliceOffset; + return tmp; +#else + return true; +#endif + } - GridBuffer *img; - int sliceOffset; - std::string m_notifyPeriod; - float_X m_slicePoint; + MappingDesc* cellDescription; + SimulationDataId particleTag; - std::string pluginName; + GridBuffer* img; + int sliceOffset; + std::string m_notifyPeriod; + float_X m_slicePoint; - DataSpace m_transpose; - uint32_t sliceDim; + std::string pluginName; - MessageHeader* header; - Output m_output; - GatherSlice gather; - bool isMaster; - algorithms::GlobalReduce reduce; -}; + DataSpace m_transpose; + uint32_t sliceDim; + MessageHeader* header; + Output m_output; + GatherSlice gather; + bool isMaster; + algorithms::GlobalReduce reduce; + }; -} +} // namespace picongpu diff --git a/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeter.hpp b/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeter.hpp index 97d0c40d9e..b66e70d3b9 100644 --- a/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeter.hpp +++ b/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau, Rene Widera +/* Copyright 2016-2021 Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -43,813 +43,701 @@ #include #include +#include #include #include #include #include +#include #include #include #include #include +#include namespace picongpu { -using namespace pmacc; + using namespace pmacc; -namespace po = boost::program_options; + namespace po = boost::program_options; -/** Virtual particle calorimeter plugin. - * - * (virtually) propagates and collects particles to infinite distance. - * - */ -template -class ParticleCalorimeter : public plugins::multi::ISlave -{ - typedef pmacc::container::DeviceBuffer DBufCalorimeter; - typedef pmacc::container::HostBuffer HBufCalorimeter; + /** Virtual particle calorimeter plugin. + * + * (virtually) propagates and collects particles to infinite distance. + * + */ + template + class ParticleCalorimeter : public plugins::multi::ISlave + { + typedef pmacc::container::DeviceBuffer DBufCalorimeter; + typedef pmacc::container::HostBuffer HBufCalorimeter; - template - struct DivideInPlace - { - using Type = T_Type; - const Type divisor; + template + struct DivideInPlace + { + using Type = T_Type; + const Type divisor; - DivideInPlace( const Type& divisor ) : divisor( divisor ) {} + DivideInPlace(const Type& divisor) : divisor(divisor) + { + } - template< typename T_Acc > - HDINLINE void operator()( T_Acc const &, T_Type& val ) const + template + HDINLINE void operator()(T_Acc const&, T_Type& val) const + { + val = val / this->divisor; + } + }; + + public: + typedef CalorimeterFunctor MyCalorimeterFunctor; + + private: + typedef boost::shared_ptr MyCalorimeterFunctorPtr; + MyCalorimeterFunctorPtr calorimeterFunctor; + + typedef boost::shared_ptr> AllGPU_reduce; + AllGPU_reduce allGPU_reduce; + + public: + void restart(uint32_t restartStep, const std::string& restartDirectory) { - val = val / this->divisor; - } - }; + HBufCalorimeter hBufLeftParsCalorimeter(this->dBufLeftParsCalorimeter->size()); + + pmacc::GridController& gridCon = pmacc::Environment::get().GridController(); + pmacc::CommunicatorMPI& comm = gridCon.getCommunicator(); + uint32_t rank = comm.getRank(); -public: - typedef CalorimeterFunctor MyCalorimeterFunctor; -private: - typedef boost::shared_ptr MyCalorimeterFunctorPtr; - MyCalorimeterFunctorPtr calorimeterFunctor; + if(rank == 0) + { + splash::SerialDataCollector hdf5DataFile(1); + splash::DataCollector::FileCreationAttr fAttr; - typedef boost::shared_ptr > AllGPU_reduce; - AllGPU_reduce allGPU_reduce; + splash::DataCollector::initFileCreationAttr(fAttr); + fAttr.fileAccType = splash::DataCollector::FAT_READ; -public: - void restart(uint32_t restartStep, const std::string & restartDirectory) - { - HBufCalorimeter hBufLeftParsCalorimeter(this->dBufLeftParsCalorimeter->size()); + std::stringstream filename; + filename << restartDirectory << "/" << (this->foldername + "/" + filenamePrefix) << "_" << restartStep; + + hdf5DataFile.open(filename.str().c_str(), fAttr); + + splash::Dimensions dimensions; + + hdf5DataFile.read( + restartStep, + this->leftParticlesDatasetName.c_str(), + dimensions, + &(*hBufLeftParsCalorimeter.origin())); - pmacc::GridController& gridCon = pmacc::Environment::get().GridController(); - pmacc::CommunicatorMPI& comm = gridCon.getCommunicator(); - uint32_t rank = comm.getRank(); + hdf5DataFile.close(); + + /* rank 0 divides and distributes the calorimeter to all ranks in equal parts */ + uint32_t numRanks = gridCon.getGlobalSize(); + // get a host accelerator + auto hostDev = cupla::manager::Device::get().device(); + pmacc::algorithm::host::Foreach()( + hostDev, + hBufLeftParsCalorimeter.zone(), + hBufLeftParsCalorimeter.origin(), + DivideInPlace(float_X(numRanks))); + } + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_Bcast( + &(*hBufLeftParsCalorimeter.origin()), + hBufLeftParsCalorimeter.size().productOfComponents() * sizeof(float_X), + MPI_CHAR, + 0, /* rank 0 */ + comm.getMPIComm()); + + *this->dBufLeftParsCalorimeter = hBufLeftParsCalorimeter; + } - if(rank == 0) + + void checkpoint(uint32_t currentStep, const std::string& checkpointDirectory) { + /* create folder for hdf5 checkpoint files*/ + Environment::get().Filesystem().createDirectoryWithPermissions( + checkpointDirectory + "/" + this->foldername); + HBufCalorimeter hBufLeftParsCalorimeter(this->dBufLeftParsCalorimeter->size()); + HBufCalorimeter hBufTotal(hBufLeftParsCalorimeter.size()); + + hBufLeftParsCalorimeter = *this->dBufLeftParsCalorimeter; + + /* mpi reduce */ + (*this->allGPU_reduce)(hBufTotal, hBufLeftParsCalorimeter, pmacc::algorithm::functor::Add{}); + if(!this->allGPU_reduce->root()) + return; + splash::SerialDataCollector hdf5DataFile(1); splash::DataCollector::FileCreationAttr fAttr; splash::DataCollector::initFileCreationAttr(fAttr); - fAttr.fileAccType = splash::DataCollector::FAT_READ; std::stringstream filename; - filename << restartDirectory << "/" << ( this->foldername + "/" + filenamePrefix ) << "_" << restartStep; + filename << checkpointDirectory << "/" << (this->foldername + "/" + filenamePrefix) << "_" << currentStep; hdf5DataFile.open(filename.str().c_str(), fAttr); - splash::Dimensions dimensions; + typename PICToSplash::type SplashTypeX; - hdf5DataFile.read(restartStep, - this->leftParticlesDatasetName.c_str(), - dimensions, - &(*hBufLeftParsCalorimeter.origin())); + splash::Dimensions bufferSize(hBufTotal.size().x(), hBufTotal.size().y(), hBufTotal.size().z()); - hdf5DataFile.close(); + /* if there is only one energy bin, omit the energy axis */ + uint32_t dimension = this->numBinsEnergy == 1 ? DIM2 : DIM3; + hdf5DataFile.write( + currentStep, + SplashTypeX, + dimension, + splash::Selection(bufferSize), + this->leftParticlesDatasetName.c_str(), + &(*hBufTotal.origin())); - /* rank 0 divides and distributes the calorimeter to all ranks in equal parts */ - uint32_t numRanks = gridCon.getGlobalSize(); - // get a host accelerator - auto hostDev = cupla::manager::Device< cupla::AccHost >::get().device( ); - pmacc::algorithm::host::Foreach()(hostDev, - hBufLeftParsCalorimeter.zone(), - hBufLeftParsCalorimeter.origin(), - DivideInPlace(float_X(numRanks))); + hdf5DataFile.close(); } - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_Bcast(&(*hBufLeftParsCalorimeter.origin()), - hBufLeftParsCalorimeter.size().productOfComponents() * sizeof(float_X), - MPI_CHAR, - 0, /* rank 0 */ - comm.getMPIComm()); + private: + void initPlugin() + { + namespace pm = pmacc::math; - *this->dBufLeftParsCalorimeter = hBufLeftParsCalorimeter; - } + if(!(this->openingYaw_deg > float_X(0.0) && this->openingYaw_deg <= float_X(360.0))) + { + std::stringstream msg; + msg << "[Plugin] [" << m_help->getOptionPrefix() << "] openingYaw has to be within (0, 360]." + << std::endl; + throw std::runtime_error(msg.str()); + } + if(!(this->openingPitch_deg > float_X(0.0) && this->openingPitch_deg <= float_X(180.0))) + { + std::stringstream msg; + msg << "[Plugin] [" << m_help->getOptionPrefix() << "] openingPitch has to be within (0, 180]." + << std::endl; + throw std::runtime_error(msg.str()); + } + if(this->minEnergy < float_X(0.0)) + { + std::stringstream msg; + msg << "[Plugin] [" << m_help->getOptionPrefix() << "] minEnergy can not be negative." << std::endl; + throw std::runtime_error(msg.str()); + } + if(this->logScale && this->minEnergy == float_X(0.0)) + { + std::stringstream msg; + msg << "[Plugin] [" << m_help->getOptionPrefix() + << "] minEnergy can not be zero in logarithmic scaling." << std::endl; + throw std::runtime_error(msg.str()); + } + if(this->numBinsEnergy > 1 && this->maxEnergy <= this->minEnergy) + { + std::stringstream msg; + msg << "[Plugin] [" << m_help->getOptionPrefix() << "] minEnergy has to be less than maxEnergy." + << std::endl; + throw std::runtime_error(msg.str()); + } + this->maxYaw_deg = float_X(0.5) * this->openingYaw_deg; + this->maxPitch_deg = float_X(0.5) * this->openingPitch_deg; + /* convert units */ + const float_64 minEnergy_SI = this->minEnergy * UNITCONV_keV_to_Joule; + const float_64 maxEnergy_SI = this->maxEnergy * UNITCONV_keV_to_Joule; + this->minEnergy = minEnergy_SI / UNIT_ENERGY; + this->maxEnergy = maxEnergy_SI / UNIT_ENERGY; + + /* allocate memory buffers */ + this->dBufCalorimeter = new DBufCalorimeter(this->numBinsYaw, this->numBinsPitch, this->numBinsEnergy); + this->dBufLeftParsCalorimeter = new DBufCalorimeter(this->dBufCalorimeter->size()); + this->hBufCalorimeter = new HBufCalorimeter(this->dBufCalorimeter->size()); + this->hBufTotalCalorimeter = new HBufCalorimeter(this->dBufCalorimeter->size()); + + /* fill calorimeter for left particles with zero */ + this->dBufLeftParsCalorimeter->assign(float_X(0.0)); + + /* create mpi reduce algorithm */ + pmacc::GridController& con = pmacc::Environment::get().GridController(); + pm::Size_t gpuDim = (pm::Size_t) con.getGpuNodes(); + zone::SphericZone zone_allGPUs(gpuDim); + this->allGPU_reduce = AllGPU_reduce(new pmacc::algorithm::mpi::Reduce(zone_allGPUs)); + + /* calculate rotated calorimeter frame from posYaw_deg and posPitch_deg */ + constexpr float_64 radsInDegree = pmacc::math::Pi::value / float_64(180.0); + const float_64 posYaw_rad = this->posYaw_deg * radsInDegree; + const float_64 posPitch_rad = this->posPitch_deg * radsInDegree; + this->calorimeterFrameVecY = float3_X( + math::sin(posYaw_rad) * math::cos(posPitch_rad), + math::cos(posYaw_rad) * math::cos(posPitch_rad), + math::sin(posPitch_rad)); + /* If the y-axis is pointing exactly up- or downwards we need to define the x-axis manually */ + if(math::abs(this->calorimeterFrameVecY.z()) == float_X(1.0)) + { + this->calorimeterFrameVecX = float3_X(1.0, 0.0, 0.0); + } + else + { + /* choose `calorimeterFrameVecX` so that the roll is zero. */ + const float3_X vecUp(0.0, 0.0, -1.0); + this->calorimeterFrameVecX = pmacc::math::cross(vecUp, this->calorimeterFrameVecY); + /* normalize vector */ + this->calorimeterFrameVecX /= math::abs(this->calorimeterFrameVecX); + } + this->calorimeterFrameVecZ = pmacc::math::cross(this->calorimeterFrameVecX, this->calorimeterFrameVecY); + + /* create calorimeter functor instance */ + this->calorimeterFunctor = MyCalorimeterFunctorPtr(new MyCalorimeterFunctor( + this->maxYaw_deg * radsInDegree, + this->maxPitch_deg * radsInDegree, + this->numBinsYaw, + this->numBinsPitch, + this->numBinsEnergy, + this->logScale ? pmacc::math::log10(this->minEnergy) : this->minEnergy, + this->logScale ? pmacc::math::log10(this->maxEnergy) : this->maxEnergy, + this->logScale, + this->calorimeterFrameVecX, + this->calorimeterFrameVecY, + this->calorimeterFrameVecZ)); + + /* create folder for hdf5 files*/ + Environment::get().Filesystem().createDirectoryWithPermissions(this->foldername); + + // set how often the plugin should be executed while PIConGPU is running + Environment<>::get().PluginConnector().setNotificationPeriod(this, m_help->notifyPeriod.get(m_id)); + } - void checkpoint(uint32_t currentStep, const std::string & checkpointDirectory) - { - /* create folder for hdf5 checkpoint files*/ - Environment::get().Filesystem().createDirectoryWithPermissions( checkpointDirectory + "/" + this->foldername); - HBufCalorimeter hBufLeftParsCalorimeter(this->dBufLeftParsCalorimeter->size()); - HBufCalorimeter hBufTotal(hBufLeftParsCalorimeter.size()); + void writeToHDF5File(uint32_t currentStep) + { + splash::SerialDataCollector hdf5DataFile(1); + splash::DataCollector::FileCreationAttr fAttr; - hBufLeftParsCalorimeter = *this->dBufLeftParsCalorimeter; + splash::DataCollector::initFileCreationAttr(fAttr); - /* mpi reduce */ - (*this->allGPU_reduce)(hBufTotal, hBufLeftParsCalorimeter, pmacc::algorithm::functor::Add{}); - if(!this->allGPU_reduce->root()) - return; + std::stringstream filename; + filename << this->foldername << "/" << filenamePrefix << "_" << currentStep; - splash::SerialDataCollector hdf5DataFile(1); - splash::DataCollector::FileCreationAttr fAttr; + hdf5DataFile.open(filename.str().c_str(), fAttr); - splash::DataCollector::initFileCreationAttr(fAttr); + typename PICToSplash::type SplashTypeX; + typename PICToSplash::type SplashType64; + typename PICToSplash::type SplashTypeBool; - std::stringstream filename; - filename << checkpointDirectory << "/" << ( this->foldername + "/" + filenamePrefix ) << "_" << currentStep; + splash::Dimensions bufferSize( + this->hBufTotalCalorimeter->size().x(), + this->hBufTotalCalorimeter->size().y(), + this->hBufTotalCalorimeter->size().z()); - hdf5DataFile.open(filename.str().c_str(), fAttr); + hdf5DataFile.write( + currentStep, + SplashTypeX, + this->numBinsEnergy == 1 ? DIM2 : DIM3, + splash::Selection(bufferSize), + "calorimeter", + &(*this->hBufTotalCalorimeter->origin())); - typename PICToSplash::type SplashTypeX; + const float_64 unitSI = particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE * UNIT_ENERGY; - splash::Dimensions bufferSize(hBufTotal.size().x(), - hBufTotal.size().y(), - hBufTotal.size().z()); + hdf5DataFile.writeAttribute(currentStep, SplashType64, "calorimeter", "unitSI", &unitSI); - /* if there is only one energy bin, omit the energy axis */ - uint32_t dimension = this->numBinsEnergy == 1 ? DIM2 : DIM3; - hdf5DataFile.write(currentStep, - SplashTypeX, - dimension, - splash::Selection(bufferSize), - this->leftParticlesDatasetName.c_str(), - &(*hBufTotal.origin())); + hdf5DataFile.writeAttribute(currentStep, SplashType64, "calorimeter", "posYaw[deg]", &posYaw_deg); - hdf5DataFile.close(); - } + hdf5DataFile.writeAttribute(currentStep, SplashType64, "calorimeter", "posPitch[deg]", &posPitch_deg); -private: - void initPlugin() - { - namespace pm = pmacc::math; + hdf5DataFile.writeAttribute(currentStep, SplashTypeX, "calorimeter", "maxYaw[deg]", &this->maxYaw_deg); - if(!(this->openingYaw_deg > float_X(0.0) && this->openingYaw_deg <= float_X(360.0))) - { - std::stringstream msg; - msg << "[Plugin] [" << m_help->getOptionPrefix() - << "] openingYaw has to be within (0, 360]." - << std::endl; - throw std::runtime_error(msg.str()); - } - if(!(this->openingPitch_deg > float_X(0.0) && this->openingPitch_deg <= float_X(180.0))) - { - std::stringstream msg; - msg << "[Plugin] [" << m_help->getOptionPrefix() - << "] openingPitch has to be within (0, 180]." - << std::endl; - throw std::runtime_error(msg.str()); - } - if(this->minEnergy < float_X(0.0)) - { - std::stringstream msg; - msg << "[Plugin] [" << m_help->getOptionPrefix() - << "] minEnergy can not be negative." - << std::endl; - throw std::runtime_error(msg.str()); - } - if(this->logScale && this->minEnergy == float_X(0.0)) - { - std::stringstream msg; - msg << "[Plugin] [" << m_help->getOptionPrefix() - << "] minEnergy can not be zero in logarithmic scaling." - << std::endl; - throw std::runtime_error(msg.str()); - } - if(this->numBinsEnergy > 1 && this->maxEnergy <= this->minEnergy) - { - std::stringstream msg; - msg << "[Plugin] [" << m_help->getOptionPrefix() - << "] minEnergy has to be less than maxEnergy." - << std::endl; - throw std::runtime_error(msg.str()); - } + hdf5DataFile.writeAttribute(currentStep, SplashTypeX, "calorimeter", "maxPitch[deg]", &this->maxPitch_deg); - this->maxYaw_deg = float_X(0.5) * this->openingYaw_deg; - this->maxPitch_deg = float_X(0.5) * this->openingPitch_deg; - /* convert units */ - const float_64 minEnergy_SI = this->minEnergy * UNITCONV_keV_to_Joule; - const float_64 maxEnergy_SI = this->maxEnergy * UNITCONV_keV_to_Joule; - this->minEnergy = minEnergy_SI / UNIT_ENERGY; - this->maxEnergy = maxEnergy_SI / UNIT_ENERGY; - - /* allocate memory buffers */ - this->dBufCalorimeter = new DBufCalorimeter(this->numBinsYaw, this->numBinsPitch, this->numBinsEnergy); - this->dBufLeftParsCalorimeter = new DBufCalorimeter(this->dBufCalorimeter->size()); - this->hBufCalorimeter = new HBufCalorimeter(this->dBufCalorimeter->size()); - this->hBufTotalCalorimeter = new HBufCalorimeter(this->dBufCalorimeter->size()); - - /* fill calorimeter for left particles with zero */ - this->dBufLeftParsCalorimeter->assign(float_X(0.0)); - - /* create mpi reduce algorithm */ - pmacc::GridController& con = pmacc::Environment::get().GridController(); - pm::Size_t gpuDim = (pm::Size_t)con.getGpuNodes(); - zone::SphericZone zone_allGPUs(gpuDim); - this->allGPU_reduce = AllGPU_reduce(new pmacc::algorithm::mpi::Reduce(zone_allGPUs)); - - /* calculate rotated calorimeter frame from posYaw_deg and posPitch_deg */ - constexpr float_64 radsInDegree = pmacc::algorithms::math::Pi::value / float_64(180.0); - const float_64 posYaw_rad = this->posYaw_deg * radsInDegree; - const float_64 posPitch_rad = this->posPitch_deg * radsInDegree; - this->calorimeterFrameVecY = float3_X(math::sin(posYaw_rad) * math::cos(posPitch_rad), - math::cos(posYaw_rad) * math::cos(posPitch_rad), - math::sin(posPitch_rad)); - /* If the y-axis is pointing exactly up- or downwards we need to define the x-axis manually */ - if(math::abs(this->calorimeterFrameVecY.z()) == float_X(1.0)) - { - this->calorimeterFrameVecX = float3_X(1.0, 0.0, 0.0); - } - else - { - /* choose `calorimeterFrameVecX` so that the roll is zero. */ - const float3_X vecUp(0.0, 0.0, -1.0); - this->calorimeterFrameVecX = math::cross(vecUp, this->calorimeterFrameVecY); - /* normalize vector */ - this->calorimeterFrameVecX /= math::abs(this->calorimeterFrameVecX); - } - this->calorimeterFrameVecZ = math::cross(this->calorimeterFrameVecX, this->calorimeterFrameVecY); - - /* create calorimeter functor instance */ - this->calorimeterFunctor = MyCalorimeterFunctorPtr(new MyCalorimeterFunctor( - this->maxYaw_deg * radsInDegree, - this->maxPitch_deg * radsInDegree, - this->numBinsYaw, - this->numBinsPitch, - this->numBinsEnergy, - this->logScale ? math::log10(this->minEnergy) : this->minEnergy, - this->logScale ? math::log10(this->maxEnergy) : this->maxEnergy, - this->logScale, - this->calorimeterFrameVecX, - this->calorimeterFrameVecY, - this->calorimeterFrameVecZ)); - - /* create folder for hdf5 files*/ - Environment::get().Filesystem().createDirectoryWithPermissions(this->foldername); - - // set how often the plugin should be executed while PIConGPU is running - Environment<>::get( ).PluginConnector( ).setNotificationPeriod( - this, - m_help->notifyPeriod.get( m_id ) - ); - } - - void writeToHDF5File(uint32_t currentStep) - { - splash::SerialDataCollector hdf5DataFile(1); - splash::DataCollector::FileCreationAttr fAttr; - - splash::DataCollector::initFileCreationAttr(fAttr); - - std::stringstream filename; - filename << this->foldername << "/" << filenamePrefix << "_" << currentStep; - - hdf5DataFile.open(filename.str().c_str(), fAttr); - - typename PICToSplash::type SplashTypeX; - typename PICToSplash::type SplashType64; - typename PICToSplash::type SplashTypeBool; - - splash::Dimensions bufferSize(this->hBufTotalCalorimeter->size().x(), - this->hBufTotalCalorimeter->size().y(), - this->hBufTotalCalorimeter->size().z()); - - hdf5DataFile.write(currentStep, - SplashTypeX, - this->numBinsEnergy == 1 ? DIM2 : DIM3, - splash::Selection(bufferSize), - "calorimeter", - &(*this->hBufTotalCalorimeter->origin())); - - const float_64 unitSI = particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE * UNIT_ENERGY; - - hdf5DataFile.writeAttribute(currentStep, - SplashType64, - "calorimeter", - "unitSI", - &unitSI); - - hdf5DataFile.writeAttribute(currentStep, - SplashType64, - "calorimeter", - "posYaw[deg]", - &posYaw_deg); - - hdf5DataFile.writeAttribute(currentStep, - SplashType64, - "calorimeter", - "posPitch[deg]", - &posPitch_deg); - - hdf5DataFile.writeAttribute(currentStep, - SplashTypeX, - "calorimeter", - "maxYaw[deg]", - &this->maxYaw_deg); - - hdf5DataFile.writeAttribute(currentStep, - SplashTypeX, - "calorimeter", - "maxPitch[deg]", - &this->maxPitch_deg); - - if(this->numBinsEnergy > 1) - { - const float_64 minEnergy_SI = this->minEnergy * UNIT_ENERGY; - const float_64 maxEnergy_SI = this->maxEnergy * UNIT_ENERGY; - const float_64 minEnergy_keV = minEnergy_SI * UNITCONV_Joule_to_keV; - const float_64 maxEnergy_keV = maxEnergy_SI * UNITCONV_Joule_to_keV; - - hdf5DataFile.writeAttribute(currentStep, - SplashType64, - "calorimeter", - "minEnergy[keV]", - &minEnergy_keV); - - hdf5DataFile.writeAttribute(currentStep, - SplashType64, - "calorimeter", - "maxEnergy[keV]", - &maxEnergy_keV); - - hdf5DataFile.writeAttribute(currentStep, - SplashTypeBool, - "calorimeter", - "logScale", - &this->logScale); - } + if(this->numBinsEnergy > 1) + { + const float_64 minEnergy_SI = this->minEnergy * UNIT_ENERGY; + const float_64 maxEnergy_SI = this->maxEnergy * UNIT_ENERGY; + const float_64 minEnergy_keV = minEnergy_SI * UNITCONV_Joule_to_keV; + const float_64 maxEnergy_keV = maxEnergy_SI * UNITCONV_Joule_to_keV; - hdf5DataFile.close(); - } + hdf5DataFile + .writeAttribute(currentStep, SplashType64, "calorimeter", "minEnergy[keV]", &minEnergy_keV); -public: + hdf5DataFile + .writeAttribute(currentStep, SplashType64, "calorimeter", "maxEnergy[keV]", &maxEnergy_keV); - struct Help : public plugins::multi::IHelp - { + hdf5DataFile.writeAttribute(currentStep, SplashTypeBool, "calorimeter", "logScale", &this->logScale); + } - /** creates an instance of ISlave - * - * @tparam T_Slave type of the interface implementation (must inherit from ISlave) - * @param help plugin defined help - * @param id index of the plugin, range: [0;help->getNumPlugins()) - */ - std::shared_ptr< ISlave > create( - std::shared_ptr< IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) - { - return std::shared_ptr< ISlave >( - new ParticleCalorimeter< ParticlesType >( - help, - id, - cellDescription - ) - ); + hdf5DataFile.close(); } - // find all valid filter for the current used species - using EligibleFilters = typename MakeSeqFromNestedSeq< - typename bmpl::transform< - particles::filter::AllParticleFilters, - particles::traits::GenerateSolversIfSpeciesEligible< - bmpl::_1, - ParticlesType - > - >::type - >::type; - - //! periodicity of computing the particle energy - plugins::multi::Option< std::string > notifyPeriod = { - "period", - "enable plugin [for each n-th step]" - }; - plugins::multi::Option< std::string > fileName = { - "file", - "output filename (prefix)" - }; - plugins::multi::Option< std::string > filter = { - "filter", - "particle filter: " - }; - plugins::multi::Option< uint32_t > numBinsYaw = { - "numBinsYaw", - "number of bins for angle yaw.", - 64 - }; - plugins::multi::Option< uint32_t > numBinsPitch = { - "numBinsPitch", - "number of bins for angle pitch.", - 64 - }; - plugins::multi::Option< uint32_t > numBinsEnergy = { - "numBinsEnergy", - "number of bins for the energy spectrum. Disabled by default.", - 1 - }; - plugins::multi::Option< float_X > minEnergy = { - "minEnergy", - "minimal detectable energy in keV.", - 0.0 - }; - plugins::multi::Option< float_X > maxEnergy = { - "maxEnergy", - "maximal detectable energy in keV.", - 1.0e3 - }; - plugins::multi::Option< uint32_t > logScale = { - "logScale", - "enable logarithmic energy scale.", - 0 - }; - plugins::multi::Option< float_X > openingYaw = { - "openingYaw", - "opening angle yaw in degrees. 0 <= x <= 360.", - 360.0 - }; - plugins::multi::Option< float_X > openingPitch = { - "openingPitch", - "opening angle pitch in degrees. 0 <= x <= 180.", - 180.0 - }; - plugins::multi::Option< float_64 > posYaw = { - "posYaw", - "yaw coordinate of calorimeter position in degrees. Defaults to +y direction.", - 0.0 - }; - plugins::multi::Option< float_64 > posPitch = { - "posPitch", - "pitch coordinate of calorimeter position in degrees. Defaults to +y direction.", - 0.0 - }; + void writeToOpenPMDFile(uint32_t currentStep) + { + std::stringstream filename; + filename << this->foldername << "/" << filenamePrefix << "_%T." << filenameExtension; + ::openPMD::Series series(filename.str(), ::openPMD::Access::CREATE); - //! string list with all possible particle filters - std::string concatenatedFilterNames; - std::vector< std::string > allowedFilters; + auto twoDimensional = [this](auto vector) -> decltype(vector) { + if(this->numBinsEnergy == 1) + { + vector.erase(vector.begin()); + } + return vector; + }; + + auto offset = twoDimensional(::openPMD::Offset{0, 0, 0}); + + auto extent = twoDimensional(::openPMD::Extent{ + this->hBufTotalCalorimeter->size().z(), + this->hBufTotalCalorimeter->size().y(), + this->hBufTotalCalorimeter->size().x()}); + + auto mesh = series.iterations[currentStep].meshes["calorimeter"]; + auto calorimeter = mesh[::openPMD::RecordComponent::SCALAR]; + calorimeter.resetDataset({::openPMD::determineDatatype(), extent}); + calorimeter.storeChunk( + std::shared_ptr{&(*this->hBufTotalCalorimeter->origin()), [](auto const*) {}}, + std::move(offset), + std::move(extent)); + + // Write attributes + + constexpr float_64 unitSI = particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE * UNIT_ENERGY; + calorimeter.setAttribute("maxPitch[deg]", maxPitch_deg); + calorimeter.setAttribute("maxYaw[deg]", maxYaw_deg); + calorimeter.setAttribute("posPitch[deg]", posPitch_deg); + calorimeter.setAttribute("posYaw[deg]", posYaw_deg); + calorimeter.setPosition(twoDimensional(std::vector{0.5, 0.5, 0.5})); + calorimeter.setUnitSI(unitSI); + mesh.setAxisLabels(twoDimensional(std::vector{"z", "y", "x"})); + mesh.setGridGlobalOffset(twoDimensional(std::vector{0., 0., 0.})); // @todo + mesh.setGridSpacing(twoDimensional(std::vector{1., 1., 1.})); // @todo + mesh.setGridUnitSI(1.); // @todo + mesh.setUnitDimension({/* @todo */}); + + if(this->numBinsEnergy > 1) + { + const float_64 minEnergy_SI = this->minEnergy * UNIT_ENERGY; + const float_64 maxEnergy_SI = this->maxEnergy * UNIT_ENERGY; + const float_64 minEnergy_keV = minEnergy_SI * UNITCONV_Joule_to_keV; + const float_64 maxEnergy_keV = maxEnergy_SI * UNITCONV_Joule_to_keV; + + calorimeter.setAttribute("minEnergy[keV]", minEnergy_keV); + calorimeter.setAttribute("maxEnergy[keV]", maxEnergy_keV); + calorimeter.setAttribute("logScale", this->logScale); + } - ///! method used by plugin controller to get --help description - void registerHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) - { - meta::ForEach< - EligibleFilters, - plugins::misc::AppendName< bmpl::_1 > - > getEligibleFilterNames; - getEligibleFilterNames( allowedFilters ); - - concatenatedFilterNames = plugins::misc::concatenateToString( - allowedFilters, - ", " - ); - - notifyPeriod.registerHelp( - desc, - masterPrefix + prefix - ); - fileName.registerHelp( - desc, - masterPrefix + prefix - ); - filter.registerHelp( - desc, - masterPrefix + prefix, - std::string( "[" ) + concatenatedFilterNames + "]" - ); - numBinsYaw.registerHelp( - desc, - masterPrefix + prefix - ); - numBinsPitch.registerHelp( - desc, - masterPrefix + prefix - ); - numBinsEnergy.registerHelp( - desc, - masterPrefix + prefix - ); - minEnergy.registerHelp( - desc, - masterPrefix + prefix - ); - maxEnergy.registerHelp( - desc, - masterPrefix + prefix - ); - logScale.registerHelp( - desc, - masterPrefix + prefix - ); - openingYaw.registerHelp( - desc, - masterPrefix + prefix - ); - openingPitch.registerHelp( - desc, - masterPrefix + prefix - ); - posYaw.registerHelp( - desc, - masterPrefix + prefix - ); - posPitch.registerHelp( - desc, - masterPrefix + prefix - ); + series.iterations[currentStep].close(); } - void expandHelp( - boost::program_options::options_description & desc, - std::string const & masterPrefix = std::string{ } - ) + public: + struct Help : public plugins::multi::IHelp { - } + /** creates an instance of ISlave + * + * @tparam T_Slave type of the interface implementation (must inherit from ISlave) + * @param help plugin defined help + * @param id index of the plugin, range: [0;help->getNumPlugins()) + */ + std::shared_ptr create(std::shared_ptr& help, size_t const id, MappingDesc* cellDescription) + { + return std::shared_ptr(new ParticleCalorimeter(help, id, cellDescription)); + } + // find all valid filter for the current used species + using EligibleFilters = typename MakeSeqFromNestedSeq>::type>::type; + + //! periodicity of computing the particle energy + plugins::multi::Option notifyPeriod = {"period", "enable plugin [for each n-th step]"}; + plugins::multi::Option fileName = {"file", "output filename (prefix)"}; + plugins::multi::Option filter = {"filter", "particle filter: "}; + plugins::multi::Option extension = {"ext", "openPMD filename extension", "h5"}; + plugins::multi::Option numBinsYaw = {"numBinsYaw", "number of bins for angle yaw.", 64}; + plugins::multi::Option numBinsPitch = {"numBinsPitch", "number of bins for angle pitch.", 64}; + plugins::multi::Option numBinsEnergy + = {"numBinsEnergy", "number of bins for the energy spectrum. Disabled by default.", 1}; + plugins::multi::Option minEnergy = {"minEnergy", "minimal detectable energy in keV.", 0.0}; + plugins::multi::Option maxEnergy = {"maxEnergy", "maximal detectable energy in keV.", 1.0e3}; + plugins::multi::Option logScale = {"logScale", "enable logarithmic energy scale.", 0}; + plugins::multi::Option openingYaw + = {"openingYaw", "opening angle yaw in degrees. 0 <= x <= 360.", 360.0}; + plugins::multi::Option openingPitch + = {"openingPitch", "opening angle pitch in degrees. 0 <= x <= 180.", 180.0}; + plugins::multi::Option posYaw + = {"posYaw", "yaw coordinate of calorimeter position in degrees. Defaults to +y direction.", 0.0}; + plugins::multi::Option posPitch + = {"posPitch", "pitch coordinate of calorimeter position in degrees. Defaults to +y direction.", 0.0}; + + //! string list with all possible particle filters + std::string concatenatedFilterNames; + std::vector allowedFilters; + + ///! method used by plugin controller to get --help description + void registerHelp( + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) + { + meta::ForEach> getEligibleFilterNames; + getEligibleFilterNames(allowedFilters); + + concatenatedFilterNames = plugins::misc::concatenateToString(allowedFilters, ", "); + + notifyPeriod.registerHelp(desc, masterPrefix + prefix); + fileName.registerHelp(desc, masterPrefix + prefix); + extension.registerHelp(desc, masterPrefix + prefix); + filter.registerHelp(desc, masterPrefix + prefix, std::string("[") + concatenatedFilterNames + "]"); + numBinsYaw.registerHelp(desc, masterPrefix + prefix); + numBinsPitch.registerHelp(desc, masterPrefix + prefix); + numBinsEnergy.registerHelp(desc, masterPrefix + prefix); + minEnergy.registerHelp(desc, masterPrefix + prefix); + maxEnergy.registerHelp(desc, masterPrefix + prefix); + logScale.registerHelp(desc, masterPrefix + prefix); + openingYaw.registerHelp(desc, masterPrefix + prefix); + openingPitch.registerHelp(desc, masterPrefix + prefix); + posYaw.registerHelp(desc, masterPrefix + prefix); + posPitch.registerHelp(desc, masterPrefix + prefix); + } - void validateOptions() - { - if( notifyPeriod.size() != fileName.size() ) - throw std::runtime_error( name + ": parameter fileName and period are not used the same number of times" ); + void expandHelp( + boost::program_options::options_description& desc, + std::string const& masterPrefix = std::string{}) + { + } - if( notifyPeriod.size() != filter.size() ) - throw std::runtime_error( name + ": parameter filter and period are not used the same number of times" ); - // check if user passed filter name are valid - for( auto const & filterName : filter) + void validateOptions() { - if( - std::find( - allowedFilters.begin(), - allowedFilters.end(), - filterName - ) == allowedFilters.end() - ) + if(notifyPeriod.size() != fileName.size()) + throw std::runtime_error( + name + ": parameter fileName and period are not used the same number of times"); + + if(notifyPeriod.size() != filter.size()) + throw std::runtime_error( + name + ": parameter filter and period are not used the same number of times"); + + // check if user passed filter name are valid + for(auto const& filterName : filter) { - throw std::runtime_error( name + ": unknown filter '" + filterName + "'" ); + if(std::find(allowedFilters.begin(), allowedFilters.end(), filterName) == allowedFilters.end()) + { + throw std::runtime_error(name + ": unknown filter '" + filterName + "'"); + } } } + + size_t getNumPlugins() const + { + return notifyPeriod.size(); + } + + std::string getDescription() const + { + return description; + } + + std::string getOptionPrefix() const + { + return prefix; + } + + std::string getName() const + { + return name; + } + + std::string const name = "ParticleCalorimeter"; + //! short description of the plugin + std::string const description = "(virtually) propagates and collects particles to infinite distance"; + //! prefix used for command line arguments + std::string const prefix = ParticlesType::FrameType::getName() + std::string("_calorimeter"); + }; + + static std::shared_ptr getHelp() + { + return std::shared_ptr(new Help{}); } - size_t getNumPlugins() const + ParticleCalorimeter( + std::shared_ptr& help, + size_t const id, + MappingDesc* cellDescription) + : m_help(std::static_pointer_cast(help)) + , m_id(id) + , m_cellDescription(cellDescription) + , leftParticlesDatasetName("calorimeterLeftParticles") + , dBufCalorimeter(nullptr) + , dBufLeftParsCalorimeter(nullptr) + , hBufCalorimeter(nullptr) + , hBufTotalCalorimeter(nullptr) { - return notifyPeriod.size(); + foldername = m_help->getOptionPrefix() + "/" + m_help->filter.get(m_id); + filenamePrefix + = m_help->getOptionPrefix() + "_" + m_help->fileName.get(m_id) + "_" + m_help->filter.get(m_id); + filenameExtension = m_help->extension.get(m_id); + numBinsYaw = m_help->numBinsYaw.get(m_id); + numBinsPitch = m_help->numBinsPitch.get(m_id); + numBinsEnergy = m_help->numBinsEnergy.get(m_id); + minEnergy = m_help->minEnergy.get(m_id); + maxEnergy = m_help->maxEnergy.get(m_id); + logScale = m_help->logScale.get(m_id); + openingYaw_deg = m_help->openingYaw.get(m_id); + openingPitch_deg = m_help->openingPitch.get(m_id); + posYaw_deg = m_help->posYaw.get(m_id); + posPitch_deg = m_help->posPitch.get(m_id); + + initPlugin(); } - std::string getDescription() const + virtual ~ParticleCalorimeter() { - return description; + __delete(this->dBufCalorimeter); + __delete(this->dBufLeftParsCalorimeter); + __delete(this->hBufCalorimeter); + __delete(this->hBufTotalCalorimeter); } - std::string getOptionPrefix() const + + void notify(uint32_t currentStep) { - return prefix; + /* initialize calorimeter with already detected particles */ + *this->dBufCalorimeter = *this->dBufLeftParsCalorimeter; + + /* data is written to dBufCalorimeter */ + this->calorimeterFunctor->setCalorimeterCursor(this->dBufCalorimeter->origin()); + + /* create kernel functor instance */ + DataConnector& dc = Environment<>::get().DataConnector(); + auto particles = dc.get(ParticlesType::FrameType::getName(), true); + + AreaMapping const mapper(*this->m_cellDescription); + auto const grid = mapper.getGridDim(); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + auto kernel = PMACC_KERNEL(KernelParticleCalorimeter{})(grid, numWorkers); + auto unaryKernel = std::bind( + kernel, + particles->getDeviceParticlesBox(), + *this->calorimeterFunctor, + mapper, + std::placeholders::_1); + + meta::ForEach>{}( + m_help->filter.get(m_id), + currentStep, + unaryKernel); + + dc.releaseData(ParticlesType::FrameType::getName()); + + /* copy to host */ + *this->hBufCalorimeter = *this->dBufCalorimeter; + + /* mpi reduce */ + (*this->allGPU_reduce)( + *this->hBufTotalCalorimeter, + *this->hBufCalorimeter, + pmacc::algorithm::functor::Add{}); + if(!this->allGPU_reduce->root()) + return; + + this->writeToHDF5File(currentStep); + this->writeToOpenPMDFile(currentStep); } - std::string getName() const + void onParticleLeave(const std::string& speciesName, int32_t direction) { - return name; + if(this->notifyPeriod.empty()) + return; + if(speciesName != ParticlesType::FrameType::getName()) + return; + + /* data is written to dBufLeftParsCalorimeter */ + this->calorimeterFunctor->setCalorimeterCursor(this->dBufLeftParsCalorimeter->origin()); + + ExchangeMapping mapper(*this->cellDescription, direction); + auto grid = mapper.getGridDim(); + + DataConnector& dc = Environment<>::get().DataConnector(); + auto particles = dc.get(speciesName, true); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + auto kernel = PMACC_KERNEL(KernelParticleCalorimeter{})(grid, numWorkers); + auto unaryKernel = std::bind( + kernel, + particles->getDeviceParticlesBox(), + (MyCalorimeterFunctor) * this->calorimeterFunctor, + mapper, + std::placeholders::_1); + + meta::ForEach>{}( + m_help->filter.get(m_id), + Environment<>::get().SimulationDescription().getCurrentStep(), + unaryKernel); + + dc.releaseData(speciesName); } - std::string const name = "ParticleCalorimeter"; - //! short description of the plugin - std::string const description = "(virtually) propagates and collects particles to infinite distance"; - //! prefix used for command line arguments - std::string const prefix = ParticlesType::FrameType::getName( ) + std::string( "_calorimeter" ); + private: + std::shared_ptr m_help; + size_t m_id; + std::string foldername; + std::string filenamePrefix; + std::string filenameExtension; + MappingDesc* m_cellDescription; + std::ofstream outFile; + const std::string leftParticlesDatasetName; + + uint32_t numBinsYaw; + uint32_t numBinsPitch; + uint32_t numBinsEnergy; + float_X minEnergy; + float_X maxEnergy; + bool logScale; + float_X openingYaw_deg; + float_X openingPitch_deg; + float_X maxYaw_deg; + float_X maxPitch_deg; + + float_64 posYaw_deg; + float_64 posPitch_deg; + + //! Rotated calorimeter frame + float3_X calorimeterFrameVecX; + float3_X calorimeterFrameVecY; + float3_X calorimeterFrameVecZ; + + //! device calorimeter buffer for a single gpu + DBufCalorimeter* dBufCalorimeter; + //! device calorimeter buffer for all particles which have left the simulation volume + DBufCalorimeter* dBufLeftParsCalorimeter; + //! host calorimeter buffer for a single mpi rank + HBufCalorimeter* hBufCalorimeter; + //! host calorimeter buffer for summation of all mpi ranks + HBufCalorimeter* hBufTotalCalorimeter; }; - static std::shared_ptr< plugins::multi::IHelp > getHelp() - { - return std::shared_ptr< plugins::multi::IHelp >( new Help{ } ); - } - - ParticleCalorimeter( - std::shared_ptr< plugins::multi::IHelp > & help, - size_t const id, - MappingDesc* cellDescription - ) : - m_help( std::static_pointer_cast< Help >(help) ), - m_id( id ), - m_cellDescription( cellDescription ), - leftParticlesDatasetName("calorimeterLeftParticles"), - dBufCalorimeter(nullptr), - dBufLeftParsCalorimeter(nullptr), - hBufCalorimeter(nullptr), - hBufTotalCalorimeter(nullptr) + namespace particles { - foldername = m_help->getOptionPrefix() + "/" + m_help->filter.get( m_id ); - filenamePrefix = m_help->getOptionPrefix() + "_" + m_help->fileName.get( m_id ) + "_" + m_help->filter.get( m_id ); - numBinsYaw = m_help->numBinsYaw.get( m_id ); - numBinsPitch = m_help->numBinsPitch.get( m_id ); - numBinsEnergy = m_help->numBinsEnergy.get( m_id ); - minEnergy = m_help->minEnergy.get( m_id ); - maxEnergy = m_help->maxEnergy.get( m_id ); - logScale = m_help->logScale.get( m_id ); - openingYaw_deg = m_help->openingYaw.get( m_id ); - openingPitch_deg = m_help->openingPitch.get( m_id ); - posYaw_deg = m_help->posYaw.get( m_id ); - posPitch_deg = m_help->posPitch.get( m_id ); - - initPlugin(); - } - - virtual ~ParticleCalorimeter() - { - __delete(this->dBufCalorimeter); - __delete(this->dBufLeftParsCalorimeter); - __delete(this->hBufCalorimeter); - __delete(this->hBufTotalCalorimeter); - } + namespace traits + { + template + struct SpeciesEligibleForSolver> + { + using FrameType = typename T_Species::FrameType; + // this plugin needs at least the weighting and momentum attributes + using RequiredIdentifiers = MakeSeq_t; - void notify(uint32_t currentStep) - { - /* initialize calorimeter with already detected particles */ - *this->dBufCalorimeter = *this->dBufLeftParsCalorimeter; - - /* data is written to dBufCalorimeter */ - this->calorimeterFunctor->setCalorimeterCursor(this->dBufCalorimeter->origin()); - - /* create kernel functor instance */ - DataConnector &dc = Environment<>::get().DataConnector(); - auto particles = dc.get< ParticlesType >( ParticlesType::FrameType::getName(), true ); - - AreaMapping< - CORE + BORDER, - MappingDesc - > const mapper( *this->m_cellDescription ); - auto const grid = mapper.getGridDim(); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - auto kernel = PMACC_KERNEL( KernelParticleCalorimeter< numWorkers >{ } )( - grid, - numWorkers - ); - auto unaryKernel = std::bind( - kernel, - particles->getDeviceParticlesBox( ), - *this->calorimeterFunctor, - mapper, - std::placeholders::_1 - ); - - meta::ForEach< - typename Help::EligibleFilters, - plugins::misc::ExecuteIfNameIsEqual< bmpl::_1 > - >{ }( - m_help->filter.get( m_id ), - currentStep, - unaryKernel - ); - - dc.releaseData( ParticlesType::FrameType::getName() ); - - /* copy to host */ - *this->hBufCalorimeter = *this->dBufCalorimeter; - - /* mpi reduce */ - (*this->allGPU_reduce)(*this->hBufTotalCalorimeter, *this->hBufCalorimeter, pmacc::algorithm::functor::Add{}); - if(!this->allGPU_reduce->root()) - return; - - this->writeToHDF5File(currentStep); - } - - void onParticleLeave(const std::string& speciesName, int32_t direction) - { - if(this->notifyPeriod.empty()) - return; - if(speciesName != ParticlesType::FrameType::getName()) - return; - - /* data is written to dBufLeftParsCalorimeter */ - this->calorimeterFunctor->setCalorimeterCursor(this->dBufLeftParsCalorimeter->origin()); - - ExchangeMapping mapper(*this->cellDescription, direction); - auto grid = mapper.getGridDim(); - - DataConnector &dc = Environment<>::get().DataConnector(); - auto particles = dc.get< ParticlesType >( speciesName, true ); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - auto kernel = PMACC_KERNEL( KernelParticleCalorimeter< numWorkers >{ } )( - grid, - numWorkers - ); - auto unaryKernel = std::bind( - kernel, - particles->getDeviceParticlesBox( ), - (MyCalorimeterFunctor)*this->calorimeterFunctor, - mapper, - std::placeholders::_1 - ); - - meta::ForEach< - typename Help::EligibleFilters, - plugins::misc::ExecuteIfNameIsEqual< bmpl::_1 > - >{ }( - m_help->filter.get( m_id ), - Environment<>::get().SimulationDescription().getCurrentStep(), - unaryKernel - ); - - dc.releaseData( speciesName ); - } - -private: - std::shared_ptr< Help > m_help; - size_t m_id; - std::string foldername; - std::string filenamePrefix; - MappingDesc* m_cellDescription; - std::ofstream outFile; - const std::string leftParticlesDatasetName; - - uint32_t numBinsYaw; - uint32_t numBinsPitch; - uint32_t numBinsEnergy; - float_X minEnergy; - float_X maxEnergy; - bool logScale; - float_X openingYaw_deg; - float_X openingPitch_deg; - float_X maxYaw_deg; - float_X maxPitch_deg; - - float_64 posYaw_deg; - float_64 posPitch_deg; - - //! Rotated calorimeter frame - float3_X calorimeterFrameVecX; - float3_X calorimeterFrameVecY; - float3_X calorimeterFrameVecZ; - - //! device calorimeter buffer for a single gpu - DBufCalorimeter* dBufCalorimeter; - //! device calorimeter buffer for all particles which have left the simulation volume - DBufCalorimeter* dBufLeftParsCalorimeter; - //! host calorimeter buffer for a single mpi rank - HBufCalorimeter* hBufCalorimeter; - //! host calorimeter buffer for summation of all mpi ranks - HBufCalorimeter* hBufTotalCalorimeter; -}; - -namespace particles -{ -namespace traits -{ - template< - typename T_Species, - typename T_UnspecifiedSpecies - > - struct SpeciesEligibleForSolver< - T_Species, - ParticleCalorimeter< T_UnspecifiedSpecies > - > - { - using FrameType = typename T_Species::FrameType; - - // this plugin needs at least the weighting and momentum attributes - using RequiredIdentifiers = MakeSeq_t< - weighting, - momentum - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - // and also a mass ratio for energy calculation from momentum - using SpeciesHasFlags = typename pmacc::traits::HasFlag< - FrameType, - massRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasFlags - >; - }; -} // namespace traits -} // namespace particles + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + // and also a mass ratio for energy calculation from momentum + using SpeciesHasFlags = typename pmacc::traits::HasFlag>::type; + + using type = typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeter.kernel b/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeter.kernel index 5c880d6bea..fe0f4e17c4 100644 --- a/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeter.kernel +++ b/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeter.kernel @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -24,112 +24,87 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -/** This kernel is only called for guard particles. - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelParticleCalorimeter -{ - /** call functor calorimeterFunctor for each particle - * - * @tparam T_ParticlesBox pmacc::ParticlesBox, particle box type - * @tparam T_CalorimeterFunctor type of the functor - * @tparam T_Mapping supercell mapper functor type - * @tparam T_Acc alpaka accelerator type + /** This kernel is only called for guard particles. * - * @param alpaka accelerator - * @param particlesBox particle memory - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_ParticlesBox, - typename T_CalorimeterFunctor, - typename T_Mapper, - typename T_Acc, - typename T_Filter - > - DINLINE void operator()( - T_Acc const & acc, - T_ParticlesBox particlesBox, - T_CalorimeterFunctor calorimeterFunctor, - T_Mapper mapper, - T_Filter filter - ) const + template + struct KernelParticleCalorimeter { - using namespace mappings::threads; + /** call functor calorimeterFunctor for each particle + * + * @tparam T_ParticlesBox pmacc::ParticlesBox, particle box type + * @tparam T_CalorimeterFunctor type of the functor + * @tparam T_Mapping supercell mapper functor type + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param particlesBox particle memory + * @param mapper functor to map a block to a supercell + */ + template< + typename T_ParticlesBox, + typename T_CalorimeterFunctor, + typename T_Mapper, + typename T_Acc, + typename T_Filter> + DINLINE void operator()( + T_Acc const& acc, + T_ParticlesBox particlesBox, + T_CalorimeterFunctor calorimeterFunctor, + T_Mapper mapper, + T_Filter filter) const + { + using namespace mappings::threads; - constexpr uint32_t numWorkers = T_numWorkers; - constexpr lcellId_t maxParticlesInFrame = pmacc::math::CT::volume< SuperCellSize >::type::value; + constexpr uint32_t numWorkers = T_numWorkers; + constexpr lcellId_t maxParticlesInFrame = pmacc::math::CT::volume::type::value; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; - /* multi-dimensional offset vector from local domain origin on GPU in units of super cells */ - DataSpace< simDim > const block( mapper.getSuperCellIndex( DataSpace< simDim > ( blockIdx ) )) ; + /* multi-dimensional offset vector from local domain origin on GPU in units of super cells */ + DataSpace const block(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); - using ParticlesFramePtr = typename T_ParticlesBox::FramePtr; + using ParticlesFramePtr = typename T_ParticlesBox::FramePtr; - ParticlesFramePtr particlesFrame; + ParticlesFramePtr particlesFrame; - particlesFrame = particlesBox.getLastFrame( block ); + particlesFrame = particlesBox.getLastFrame(block); - // end kernel if we have no frames within the supercell - if( !particlesFrame.isValid( ) ) - return; + // end kernel if we have no frames within the supercell + if(!particlesFrame.isValid()) + return; - auto accFilter = filter( - acc, - block - mapper.getGuardingSuperCells( ), - WorkerCfg< numWorkers >{ workerIdx } - ); + auto accFilter = filter(acc, block - mapper.getGuardingSuperCells(), WorkerCfg{workerIdx}); - // number of particles in the current frame - auto numParticles = particlesBox.getSuperCell( block ).getSizeLastFrame( ); + // number of particles in the current frame + auto numParticles = particlesBox.getSuperCell(block).getSizeLastFrame(); - while( particlesFrame.isValid( ) ) - { - using ParticleDomCfg = IdxConfig< - maxParticlesInFrame, - numWorkers - >; - - // loop over all particles in the frame - ForEachIdx< ParticleDomCfg >{ workerIdx } - ( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - auto particle = particlesFrame[ linearIdx ]; - if( linearIdx >= numParticles ) + while(particlesFrame.isValid()) + { + using ParticleDomCfg = IdxConfig; + + // loop over all particles in the frame + ForEachIdx{workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + auto particle = particlesFrame[linearIdx]; + if(linearIdx >= numParticles) { - particle.setHandleInvalid( ); + particle.setHandleInvalid(); } - if( - accFilter( - acc, - particle - ) - ) + if(accFilter(acc, particle)) { - calorimeterFunctor( - acc, - particlesFrame, - linearIdx - ); + calorimeterFunctor(acc, particlesFrame, linearIdx); } - } - ); + }); - // independent for each worker - particlesFrame = particlesBox.getPreviousFrame( particlesFrame ); - numParticles = maxParticlesInFrame; + // independent for each worker + particlesFrame = particlesBox.getPreviousFrame(particlesFrame); + numParticles = maxParticlesInFrame; + } } - } -}; + }; } // namespace picongpu diff --git a/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeterFunctors.hpp b/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeterFunctors.hpp index f69107f0a6..3232c86061 100644 --- a/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeterFunctors.hpp +++ b/include/picongpu/plugins/particleCalorimeter/ParticleCalorimeterFunctors.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -30,119 +30,128 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -template -struct CalorimeterFunctor -{ - CalorimeterCur calorimeterCur; - - const float_X maxYaw; - const float_X maxPitch; - const uint32_t numBinsYaw; - const uint32_t numBinsPitch; - const int32_t numBinsEnergy; - /* depending on `logScale` the energy range is initialized - * with the logarithmic or the linear value. */ - const float_X minEnergy; - const float_X maxEnergy; - const bool logScale; - - const float3_X calorimeterFrameVecX; - const float3_X calorimeterFrameVecY; - const float3_X calorimeterFrameVecZ; - - CalorimeterFunctor(const float_X maxYaw, - const float_X maxPitch, - const uint32_t numBinsYaw, - const uint32_t numBinsPitch, - const uint32_t numBinsEnergy, - const float_X minEnergy, - const float_X maxEnergy, - const bool logScale, - const float3_X calorimeterFrameVecX, - const float3_X calorimeterFrameVecY, - const float3_X calorimeterFrameVecZ) : - calorimeterCur(nullptr, pmacc::math::Size_t::create(0)), - maxYaw(maxYaw), - maxPitch(maxPitch), - numBinsYaw(numBinsYaw), - numBinsPitch(numBinsPitch), - numBinsEnergy(numBinsEnergy), - minEnergy(minEnergy), - maxEnergy(maxEnergy), - logScale(logScale), - calorimeterFrameVecX(calorimeterFrameVecX), - calorimeterFrameVecY(calorimeterFrameVecY), - calorimeterFrameVecZ(calorimeterFrameVecZ) - {} - - HINLINE void setCalorimeterCursor(const CalorimeterCur& calorimeterCur) - { - this->calorimeterCur = calorimeterCur; - } - - template - DINLINE void operator()(const T_Acc& acc, ParticlesFrame& particlesFrame, const uint32_t linearThreadIdx) + template + struct CalorimeterFunctor { - const float3_X mom = particlesFrame[linearThreadIdx][momentum_]; - const float_X mom2 = math::dot(mom, mom); - float3_X dirVec = mom * math::rsqrt(mom2); - - /* rotate dirVec into the calorimeter frame. This coordinate transformation - * is performed by a matrix vector multiplication. */ - using namespace pmacc::algorithms::math; - dirVec = float3_X(dot(this->calorimeterFrameVecX, dirVec), - dot(this->calorimeterFrameVecY, dirVec), - dot(this->calorimeterFrameVecZ, dirVec)); - - /* convert dirVec to yaw and pitch */ - const float_X yaw = atan2(dirVec.x(), dirVec.y()); - const float_X pitch = asin(dirVec.z()); - - if(abs(yaw) < this->maxYaw && abs(pitch) < this->maxPitch) + CalorimeterCur calorimeterCur; + + const float_X maxYaw; + const float_X maxPitch; + const uint32_t numBinsYaw; + const uint32_t numBinsPitch; + const int32_t numBinsEnergy; + /* depending on `logScale` the energy range is initialized + * with the logarithmic or the linear value. */ + const float_X minEnergy; + const float_X maxEnergy; + const bool logScale; + + const float3_X calorimeterFrameVecX; + const float3_X calorimeterFrameVecY; + const float3_X calorimeterFrameVecZ; + + CalorimeterFunctor( + const float_X maxYaw, + const float_X maxPitch, + const uint32_t numBinsYaw, + const uint32_t numBinsPitch, + const uint32_t numBinsEnergy, + const float_X minEnergy, + const float_X maxEnergy, + const bool logScale, + const float3_X calorimeterFrameVecX, + const float3_X calorimeterFrameVecY, + const float3_X calorimeterFrameVecZ) + : calorimeterCur(nullptr, pmacc::math::Size_t::create(0)) + , maxYaw(maxYaw) + , maxPitch(maxPitch) + , numBinsYaw(numBinsYaw) + , numBinsPitch(numBinsPitch) + , numBinsEnergy(numBinsEnergy) + , minEnergy(minEnergy) + , maxEnergy(maxEnergy) + , logScale(logScale) + , calorimeterFrameVecX(calorimeterFrameVecX) + , calorimeterFrameVecY(calorimeterFrameVecY) + , calorimeterFrameVecZ(calorimeterFrameVecZ) { - const float2_X calorimeterPos = particleCalorimeter::mapYawPitchToNormedRange( - yaw, pitch, this->maxYaw, this->maxPitch); - - // yaw - int32_t yawBin = calorimeterPos.x() * static_cast(numBinsYaw); - // catch out-of-range values - yawBin = yawBin >= numBinsYaw ? numBinsYaw - 1 : yawBin; - yawBin = yawBin < 0 ? 0 : yawBin; - - // pitch - int32_t pitchBin = calorimeterPos.y() * static_cast(numBinsPitch); - // catch out-of-range values - pitchBin = pitchBin >= numBinsPitch ? numBinsPitch - 1 : pitchBin; - pitchBin = pitchBin < 0 ? 0 : pitchBin; - - // energy - const float_X weighting = particlesFrame[linearThreadIdx][weighting_]; - const float_X normedWeighting = weighting / - static_cast(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE); - const auto particle = particlesFrame[linearThreadIdx]; - const float_X mass = attribute::getMass(weighting, particle); - const float_X energy = KinEnergy<>()(mom, mass) / weighting; - - int32_t energyBin = 0; - if(this->numBinsEnergy > 1) - { - const int32_t numBinsOutOfRange = 2; - energyBin = math::float2int_rd(((logScale ? log10(energy) : energy) - minEnergy) / - (maxEnergy - minEnergy) * static_cast(this->numBinsEnergy - numBinsOutOfRange)) + 1; + } - // all entries larger than maxEnergy go into last bin - energyBin = energyBin < this->numBinsEnergy ? energyBin : this->numBinsEnergy - 1; + HINLINE void setCalorimeterCursor(const CalorimeterCur& calorimeterCur) + { + this->calorimeterCur = calorimeterCur; + } - // all entries smaller than minEnergy go into bin zero - energyBin = energyBin > 0 ? energyBin : 0; + template + DINLINE void operator()(const T_Acc& acc, ParticlesFrame& particlesFrame, const uint32_t linearThreadIdx) + { + const float3_X mom = particlesFrame[linearThreadIdx][momentum_]; + const float_X mom2 = pmacc::math::dot(mom, mom); + float3_X dirVec = mom * math::rsqrt(mom2); + + /* rotate dirVec into the calorimeter frame. This coordinate transformation + * is performed by a matrix vector multiplication. */ + using namespace pmacc::math; + dirVec = float3_X( + pmacc::math::dot(this->calorimeterFrameVecX, dirVec), + pmacc::math::dot(this->calorimeterFrameVecY, dirVec), + pmacc::math::dot(this->calorimeterFrameVecZ, dirVec)); + + /* convert dirVec to yaw and pitch */ + const float_X yaw = atan2(dirVec.x(), dirVec.y()); + const float_X pitch = asin(dirVec.z()); + + if(abs(yaw) < this->maxYaw && abs(pitch) < this->maxPitch) + { + const float2_X calorimeterPos + = particleCalorimeter::mapYawPitchToNormedRange(yaw, pitch, this->maxYaw, this->maxPitch); + + // yaw + int32_t yawBin = calorimeterPos.x() * static_cast(numBinsYaw); + // catch out-of-range values + yawBin = yawBin >= numBinsYaw ? numBinsYaw - 1 : yawBin; + yawBin = yawBin < 0 ? 0 : yawBin; + + // pitch + int32_t pitchBin = calorimeterPos.y() * static_cast(numBinsPitch); + // catch out-of-range values + pitchBin = pitchBin >= numBinsPitch ? numBinsPitch - 1 : pitchBin; + pitchBin = pitchBin < 0 ? 0 : pitchBin; + + // energy + const float_X weighting = particlesFrame[linearThreadIdx][weighting_]; + const float_X normedWeighting + = weighting / static_cast(particles::TYPICAL_NUM_PARTICLES_PER_MACROPARTICLE); + const auto particle = particlesFrame[linearThreadIdx]; + const float_X mass = attribute::getMass(weighting, particle); + const float_X energy = KinEnergy<>()(mom, mass) / weighting; + + int32_t energyBin = 0; + if(this->numBinsEnergy > 1) + { + const int32_t numBinsOutOfRange = 2; + energyBin + = pmacc::math::float2int_rd( + ((logScale ? pmacc::math::log10(energy) : energy) - minEnergy) / (maxEnergy - minEnergy) + * static_cast(this->numBinsEnergy - numBinsOutOfRange)) + + 1; + + // all entries larger than maxEnergy go into last bin + energyBin = energyBin < this->numBinsEnergy ? energyBin : this->numBinsEnergy - 1; + + // all entries smaller than minEnergy go into bin zero + energyBin = energyBin > 0 ? energyBin : 0; + } + + cupla::atomicAdd( + acc, + &(*this->calorimeterCur(yawBin, pitchBin, energyBin)), + energy * normedWeighting, + ::alpaka::hierarchy::Threads{}); } - - atomicAdd( &(*this->calorimeterCur(yawBin, pitchBin, energyBin)), - energy * normedWeighting, ::alpaka::hierarchy::Threads{}); } - } -}; + }; } // namespace picongpu diff --git a/include/picongpu/plugins/particleMerging/ParticleMerger.hpp b/include/picongpu/plugins/particleMerging/ParticleMerger.hpp index 9d48fd426a..d0073f5cdf 100644 --- a/include/picongpu/plugins/particleMerging/ParticleMerger.hpp +++ b/include/picongpu/plugins/particleMerging/ParticleMerger.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Heiko Burau +/* Copyright 2017-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -34,290 +34,247 @@ namespace picongpu { -namespace plugins -{ -namespace particleMerging -{ - - using namespace pmacc; - namespace bmpl = boost::mpl; - - /** Implements a particle merging algorithm based on - * - * Luu, P. T., Tueckmantel, T., & Pukhov, A. (2016). - * Voronoi particle merging algorithm for PIC codes. - * Computer Physics Communications, 202, 165-174. - * - * \tparam T_ParticlesType particle species - */ - template< - class T_ParticlesType, - bool hasVoronoiCellId = - pmacc::traits::HasIdentifier< - typename T_ParticlesType::FrameType, - voronoiCellId - >::type::value - > - struct ParticleMergerWrapped; - - - template< class T_ParticlesType > - struct ParticleMergerWrapped< T_ParticlesType, true > : ISimulationPlugin - { - private: - std::string name; - std::string prefix; - std::string notifyPeriod; - MappingDesc* cellDescription; - - uint32_t minParticlesToMerge; - float_X posSpreadThreshold; - float_X absMomSpreadThreshold_mc; - float_X absMomSpreadThreshold; - float_X relMomSpreadThreshold; - float_64 minMeanEnergy_keV; - float_X minMeanEnergy; - - public: - using ParticlesType = T_ParticlesType; - - ParticleMergerWrapped() : - name( - "ParticleMerger: merges several macroparticles with" - " similar position and momentum into a single one" - ), - prefix( ParticlesType::FrameType::getName() + std::string("_merger") ), - cellDescription( nullptr ) - { - Environment<>::get().PluginConnector().registerPlugin( this ); - } - - void notify(uint32_t currentStep) - { - using SuperCellSize = MappingDesc::SuperCellSize; - - const pmacc::math::Int coreBorderGuardSuperCells = - this->cellDescription->getGridSuperCells(); - const pmacc::math::Int guardSuperCells = - this->cellDescription->getGuardingSuperCells(); - const pmacc::math::Int coreBorderSuperCells = - coreBorderGuardSuperCells - 2 * guardSuperCells; - - /* this zone represents the core+border area with guard offset in unit of cells */ - const zone::SphericZone< simDim > zone( - static_cast< pmacc::math::Size_t< simDim > >( - coreBorderSuperCells * SuperCellSize::toRT() - ), - guardSuperCells * SuperCellSize::toRT() - ); - - /* get particles instance */ - DataConnector &dc = Environment<>::get().DataConnector(); - auto particles = dc.get< ParticlesType >( - ParticlesType::FrameType::getName(), - true - ); - - /* create `ParticleMergerKernel` instance */ - ParticleMergerKernel< typename ParticlesType::ParticlesBoxType > - particleMergerKernel( - particles->getDeviceParticlesBox(), - this->minParticlesToMerge, - this->posSpreadThreshold, - this->absMomSpreadThreshold, - this->relMomSpreadThreshold, - this->minMeanEnergy - ); - - /* execute particle merging alorithm */ - algorithm::kernel::Foreach< SuperCellSize > foreach; - foreach( - zone, - cursor::make_MultiIndexCursor< simDim >(), - particleMergerKernel - ); - - /* close all gaps caused by removal of particles */ - particles->fillAllGaps(); - } - - - void setMappingDescription(MappingDesc* cellDescription) - { - this->cellDescription = cellDescription; - } - - - void pluginRegisterHelp(po::options_description& desc) - { - desc.add_options() - ( - ( this->prefix + ".period" ).c_str(), - po::value< std::string > ( - &this->notifyPeriod - ), - "enable plugin [for each n-th step]" - ) - ( - ( this->prefix + ".minParticlesToMerge" ).c_str(), - po::value< uint32_t > ( - &this->minParticlesToMerge - )->default_value( 8 ), - "minimal number of macroparticles needed to merge" - " the macroparticle collection into a single macroparticle." - ) - ( - ( this->prefix + ".posSpreadThreshold" ).c_str(), - po::value< float_X > ( - &this->posSpreadThreshold - )->default_value( 0.5 ), - "Below this threshold of spread in position macroparticles" - " can be merged [unit: cell edge length]." - ) - ( - ( this->prefix + ".absMomSpreadThreshold" ).c_str(), - po::value< float_X > ( - &this->absMomSpreadThreshold_mc - )->default_value( -1.0 ), - "Below this absolute threshold of spread in momentum" - " macroparticles can be merged [unit: m_el * c]." - " Disabled for -1 (default)." - ) - ( - ( this->prefix + ".relMomSpreadThreshold" ).c_str(), - po::value< float_X > ( - &this->relMomSpreadThreshold - )->default_value( -1.0 ), - "Below this relative (to mean momentum) threshold of spread in" - " momentum macroparticles can be merged [unit: none]." - " Disabled for -1 (default)." - ) - ( - ( this->prefix + ".minMeanEnergy" ).c_str(), - po::value< float_64 > ( - &this->minMeanEnergy_keV - )->default_value( 511.0 ), - "minimal mean kinetic energy needed to merge the macroparticle" - " collection into a single macroparticle [unit: keV]." - ); - } - - std::string pluginGetName() const - { - return this->name; - } - - protected: - - void pluginLoad() - { - if( notifyPeriod.empty() ) - return; - - Environment<>::get().PluginConnector().setNotificationPeriod( - this, - notifyPeriod - ); - - // clean user parameters - PMACC_VERIFY_MSG( - this->minParticlesToMerge > 1, - std::string("[Plugin: ") + this->prefix + "] minParticlesToMerge" - " has to be greater than one." - ); - PMACC_VERIFY_MSG( - this->posSpreadThreshold >= float_X(0.0), - std::string("[Plugin: ") + this->prefix + "] posSpreadThreshold" - " has to be non-negative." - ); - PMACC_VERIFY_MSG( - this->absMomSpreadThreshold_mc * this->relMomSpreadThreshold < float( 0.0 ), - std::string("[Plugin: ") + this->prefix + "] either" - " absMomSpreadThreshold or relMomSpreadThreshold has to be given" - ); - PMACC_VERIFY_MSG( - this->minMeanEnergy >= float_X(0.0), - std::string("[Plugin: ") + this->prefix + "] minMeanEnergy" - " has to be non-negative." - ); - - // convert units of user parameters - this->absMomSpreadThreshold = this->absMomSpreadThreshold_mc * - ELECTRON_MASS * SPEED_OF_LIGHT; - - const float_64 minMeanEnergy_SI = this->minMeanEnergy_keV * - UNITCONV_keV_to_Joule; - this->minMeanEnergy = static_cast< float_X >( - minMeanEnergy_SI / UNIT_ENERGY - ); - } - - void pluginUnload() - {} - - void restart( uint32_t, const std::string ) - {} - - void checkpoint( uint32_t, const std::string ) - {} - }; - - - template< class T_ParticlesType > - struct ParticleMergerWrapped< T_ParticlesType, false > : ISimulationPlugin + namespace plugins { - private: - std::string name; - std::string prefix; - std::string notifyPeriod; - MappingDesc* cellDescription; - - public: - using ParticlesType = T_ParticlesType; - - ParticleMergerWrapped() : - name( - "ParticleMerger: merges several macroparticles with" - " similar position and momentum into a single one.\n" - "plugin disabled. Enable plugin by adding the `voronoiCellId`" - " attribute to the particle attribute list." - ), - prefix( ParticlesType::FrameType::getName() + std::string("_merger") ), - cellDescription( nullptr ) - { - Environment<>::get().PluginConnector().registerPlugin( this ); - } - - std::string pluginGetName() const + namespace particleMerging { - return this->name; - } - - protected: - void setMappingDescription( MappingDesc* ) - {} - - void pluginRegisterHelp( po::options_description& ) - {} - - void pluginUnload() - {} - - void restart( uint32_t, const std::string ) - {} - - void checkpoint( uint32_t, const std::string ) - {} - - void notify( uint32_t ) - {} - }; - - - template< typename T_ParticlesType > - struct ParticleMerger : ParticleMergerWrapped< T_ParticlesType > - {}; - -} // namespace particleMerging -} // namespace plugins + using namespace pmacc; + namespace bmpl = boost::mpl; + + /** Implements a particle merging algorithm based on + * + * Luu, P. T., Tueckmantel, T., & Pukhov, A. (2016). + * Voronoi particle merging algorithm for PIC codes. + * Computer Physics Communications, 202, 165-174. + * + * \tparam T_ParticlesType particle species + */ + template< + class T_ParticlesType, + bool hasVoronoiCellId + = pmacc::traits::HasIdentifier::type::value> + struct ParticleMergerWrapped; + + + template + struct ParticleMergerWrapped : ISimulationPlugin + { + private: + std::string name; + std::string prefix; + std::string notifyPeriod; + MappingDesc* cellDescription; + + uint32_t minParticlesToMerge; + float_X posSpreadThreshold; + float_X absMomSpreadThreshold_mc; + float_X absMomSpreadThreshold; + float_X relMomSpreadThreshold; + float_64 minMeanEnergy_keV; + float_X minMeanEnergy; + + public: + using ParticlesType = T_ParticlesType; + + ParticleMergerWrapped() + : name("ParticleMerger: merges several macroparticles with" + " similar position and momentum into a single one") + , prefix(ParticlesType::FrameType::getName() + std::string("_merger")) + , cellDescription(nullptr) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } + + void notify(uint32_t currentStep) + { + using SuperCellSize = MappingDesc::SuperCellSize; + + const pmacc::math::Int coreBorderGuardSuperCells + = this->cellDescription->getGridSuperCells(); + const pmacc::math::Int guardSuperCells = this->cellDescription->getGuardingSuperCells(); + const pmacc::math::Int coreBorderSuperCells + = coreBorderGuardSuperCells - 2 * guardSuperCells; + + /* this zone represents the core+border area with guard offset in unit of cells */ + const zone::SphericZone zone( + static_cast>(coreBorderSuperCells * SuperCellSize::toRT()), + guardSuperCells * SuperCellSize::toRT()); + + /* get particles instance */ + DataConnector& dc = Environment<>::get().DataConnector(); + auto particles = dc.get(ParticlesType::FrameType::getName(), true); + + /* create `ParticleMergerKernel` instance */ + ParticleMergerKernel particleMergerKernel( + particles->getDeviceParticlesBox(), + this->minParticlesToMerge, + this->posSpreadThreshold, + this->absMomSpreadThreshold, + this->relMomSpreadThreshold, + this->minMeanEnergy); + + /* execute particle merging alorithm */ + algorithm::kernel::Foreach foreach; + foreach(zone, cursor::make_MultiIndexCursor(), particleMergerKernel) + ; + + /* close all gaps caused by removal of particles */ + particles->fillAllGaps(); + } + + + void setMappingDescription(MappingDesc* cellDescription) + { + this->cellDescription = cellDescription; + } + + + void pluginRegisterHelp(po::options_description& desc) + { + desc.add_options()( + (this->prefix + ".period").c_str(), + po::value(&this->notifyPeriod), + "enable plugin [for each n-th step]")( + (this->prefix + ".minParticlesToMerge").c_str(), + po::value(&this->minParticlesToMerge)->default_value(8), + "minimal number of macroparticles needed to merge" + " the macroparticle collection into a single macroparticle.")( + (this->prefix + ".posSpreadThreshold").c_str(), + po::value(&this->posSpreadThreshold)->default_value(0.5), + "Below this threshold of spread in position macroparticles" + " can be merged [unit: cell edge length].")( + (this->prefix + ".absMomSpreadThreshold").c_str(), + po::value(&this->absMomSpreadThreshold_mc)->default_value(-1.0), + "Below this absolute threshold of spread in momentum" + " macroparticles can be merged [unit: m_el * c]." + " Disabled for -1 (default).")( + (this->prefix + ".relMomSpreadThreshold").c_str(), + po::value(&this->relMomSpreadThreshold)->default_value(-1.0), + "Below this relative (to mean momentum) threshold of spread in" + " momentum macroparticles can be merged [unit: none]." + " Disabled for -1 (default).")( + (this->prefix + ".minMeanEnergy").c_str(), + po::value(&this->minMeanEnergy_keV)->default_value(511.0), + "minimal mean kinetic energy needed to merge the macroparticle" + " collection into a single macroparticle [unit: keV]."); + } + + std::string pluginGetName() const + { + return this->name; + } + + protected: + void pluginLoad() + { + if(notifyPeriod.empty()) + return; + + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + + // clean user parameters + PMACC_VERIFY_MSG( + this->minParticlesToMerge > 1, + std::string("[Plugin: ") + this->prefix + + "] minParticlesToMerge" + " has to be greater than one."); + PMACC_VERIFY_MSG( + this->posSpreadThreshold >= float_X(0.0), + std::string("[Plugin: ") + this->prefix + + "] posSpreadThreshold" + " has to be non-negative."); + PMACC_VERIFY_MSG( + this->absMomSpreadThreshold_mc * this->relMomSpreadThreshold < float(0.0), + std::string("[Plugin: ") + this->prefix + + "] either" + " absMomSpreadThreshold or relMomSpreadThreshold has to be given"); + PMACC_VERIFY_MSG( + this->minMeanEnergy >= float_X(0.0), + std::string("[Plugin: ") + this->prefix + + "] minMeanEnergy" + " has to be non-negative."); + + // convert units of user parameters + this->absMomSpreadThreshold = this->absMomSpreadThreshold_mc * ELECTRON_MASS * SPEED_OF_LIGHT; + + const float_64 minMeanEnergy_SI = this->minMeanEnergy_keV * UNITCONV_keV_to_Joule; + this->minMeanEnergy = static_cast(minMeanEnergy_SI / UNIT_ENERGY); + } + + void pluginUnload() + { + } + + void restart(uint32_t, const std::string) + { + } + + void checkpoint(uint32_t, const std::string) + { + } + }; + + + template + struct ParticleMergerWrapped : ISimulationPlugin + { + private: + std::string name; + std::string prefix; + std::string notifyPeriod; + MappingDesc* cellDescription; + + public: + using ParticlesType = T_ParticlesType; + + ParticleMergerWrapped() + : name("ParticleMerger: merges several macroparticles with" + " similar position and momentum into a single one.\n" + "plugin disabled. Enable plugin by adding the `voronoiCellId`" + " attribute to the particle attribute list.") + , prefix(ParticlesType::FrameType::getName() + std::string("_merger")) + , cellDescription(nullptr) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } + + std::string pluginGetName() const + { + return this->name; + } + + protected: + void setMappingDescription(MappingDesc*) + { + } + + void pluginRegisterHelp(po::options_description&) + { + } + + void pluginUnload() + { + } + + void restart(uint32_t, const std::string) + { + } + + void checkpoint(uint32_t, const std::string) + { + } + + void notify(uint32_t) + { + } + }; + + + template + struct ParticleMerger : ParticleMergerWrapped + { + }; + + } // namespace particleMerging + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/particleMerging/ParticleMerger.kernel b/include/picongpu/plugins/particleMerging/ParticleMerger.kernel index 83fc430b10..25b082d143 100644 --- a/include/picongpu/plugins/particleMerging/ParticleMerger.kernel +++ b/include/picongpu/plugins/particleMerging/ParticleMerger.kernel @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Heiko Burau +/* Copyright 2017-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -28,444 +28,387 @@ namespace picongpu { -namespace plugins -{ -namespace particleMerging -{ - - /** Implements a particle merging algorithm based on - * - * Luu, P. T., Tueckmantel, T., & Pukhov, A. (2016). - * Voronoi particle merging algorithm for PIC codes. - * Computer Physics Communications, 202, 165-174. - * - * \tparam T_ParticlesBox container of the particle species - */ - template< class T_ParticlesBox > - struct ParticleMergerKernel + namespace plugins { - using ParticlesBox = T_ParticlesBox; - using FramePtr = typename ParticlesBox::FramePtr; - using FrameType = typename ParticlesBox::FrameType; - using ArrayVoronoiCells = memory::Array< - VoronoiCell, - MAX_VORONOI_CELLS - >; - using VoronoiIndexPool = memory::IndexPool< - voronoiCellId::type, - MAX_VORONOI_CELLS - >; - - - ParticlesBox particlesBox; - /** minimal number of macroparticles needed to merge - the macroparticle collection into a single macroparticle */ - uint32_t minParticlesToMerge; - /** Below this threshold of spread in position (squared) macroparticles - can be merged [unit: cell edge length] */ - float_X posSpreadThreshold2; - /** Below this absolute threshold of spread in momentum - macroparticles can be merged [unit: m_el * c]. */ - float_X absMomSpreadThreshold; - /** Below this relative (to mean momentum) threshold of spread in - momentum macroparticles can be merged [unit: none]. */ - float_X relMomSpreadThreshold; - /** minimal mean kinetic energy needed to merge the macroparticle - collection into a single macroparticle [unit: keV] */ - float_X minMeanEnergy; - - ParticleMergerKernel( - ParticlesBox particlesBox, - uint32_t minParticlesToMerge, - float_X posSpreadThreshold, - float_X absMomSpreadThreshold, - float_X relMomSpreadThreshold, - float_X minMeanEnergy - ) : - particlesBox( particlesBox ), - minParticlesToMerge( minParticlesToMerge ), - posSpreadThreshold2( posSpreadThreshold * posSpreadThreshold ), - absMomSpreadThreshold( absMomSpreadThreshold ), - relMomSpreadThreshold( relMomSpreadThreshold ), - minMeanEnergy ( minMeanEnergy ) - {} - - /** map cell index to the initial Voronoi cell by aggregating N^simDim 'normal' - * cells to a single Voronoi cell. - * - * @param cellIdx cell index - */ - DINLINE voronoiCellId::type mapCellIdxToInitialVoronoiCell( const uint32_t cellIdx ) const + namespace particleMerging { - const DataSpace< simDim > cellIdxDim = DataSpaceOperations< simDim >::template map< - SuperCellSize - >( cellIdx ); - - const DataSpace< simDim > voronoiCellDim = cellIdxDim / 2; - - return static_cast< voronoiCellId::type >( - pmacc::math::linearize( - pmacc::math::CT::shrinkTo< SuperCellSize, simDim - 1 >::type::toRT() / 2, - voronoiCellDim - ) - ); - } - - - /** init the Voronoi cell id attribute for each particle in the super cell. - * - * The initial Voronoi cell is chosen by aggregating N^simDim 'normal' cells - * to a single Voronoi cell. - * - * @param cellIdx cell index - */ - template< typename T_Acc > - DINLINE void initVoronoiCellIdAttribute( - T_Acc const & acc, - const pmacc::math::Int& cellIdx - ) - { - //! \todo change this as soon as the kernel support lock step programming - constexpr uint32_t numWorkers = pmacc::math::CT::volume< SuperCellSize >::type::value; - const uint32_t workerIdx = DataSpaceOperations< simDim >::template map< - SuperCellSize - >( cellIdx % SuperCellSize::toRT() ); - particleAccess::Cell2Particle< SuperCellSize, numWorkers > forEachFrame; - forEachFrame( - acc, - this->particlesBox, - workerIdx, - cellIdx, - [this]( const T_Acc & acc, FramePtr frame, const int linearThreadIdx ) + /** Implements a particle merging algorithm based on + * + * Luu, P. T., Tueckmantel, T., & Pukhov, A. (2016). + * Voronoi particle merging algorithm for PIC codes. + * Computer Physics Communications, 202, 165-174. + * + * \tparam T_ParticlesBox container of the particle species + */ + template + struct ParticleMergerKernel + { + using ParticlesBox = T_ParticlesBox; + using FramePtr = typename ParticlesBox::FramePtr; + using FrameType = typename ParticlesBox::FrameType; + using ArrayVoronoiCells = memory::Array; + using VoronoiIndexPool = memory::IndexPool; + + + ParticlesBox particlesBox; + /** minimal number of macroparticles needed to merge + the macroparticle collection into a single macroparticle */ + uint32_t minParticlesToMerge; + /** Below this threshold of spread in position (squared) macroparticles + can be merged [unit: cell edge length] */ + float_X posSpreadThreshold2; + /** Below this absolute threshold of spread in momentum + macroparticles can be merged [unit: m_el * c]. */ + float_X absMomSpreadThreshold; + /** Below this relative (to mean momentum) threshold of spread in + momentum macroparticles can be merged [unit: none]. */ + float_X relMomSpreadThreshold; + /** minimal mean kinetic energy needed to merge the macroparticle + collection into a single macroparticle [unit: keV] */ + float_X minMeanEnergy; + + ParticleMergerKernel( + ParticlesBox particlesBox, + uint32_t minParticlesToMerge, + float_X posSpreadThreshold, + float_X absMomSpreadThreshold, + float_X relMomSpreadThreshold, + float_X minMeanEnergy) + : particlesBox(particlesBox) + , minParticlesToMerge(minParticlesToMerge) + , posSpreadThreshold2(posSpreadThreshold * posSpreadThreshold) + , absMomSpreadThreshold(absMomSpreadThreshold) + , relMomSpreadThreshold(relMomSpreadThreshold) + , minMeanEnergy(minMeanEnergy) { - auto particle = frame[linearThreadIdx]; - - const lcellId_t particleCellIdx = particle[localCellIdx_]; - - particle[voronoiCellId_] = this->mapCellIdxToInitialVoronoiCell( particleCellIdx ); - }, - particles::filter::All{} - ); - } - - /** calculate position of particle within a super cell. - * - * @param particleCellIdx local particle cell index - * @param positionWithinCell position within cell - * @return position of particle with respect to its super cell's origin - */ - DINLINE floatD_X getParticlePosWithinSuperCell( - const lcellId_t particleCellIdx, - const floatD_X positionWithinCell - ) const - { - const DataSpace< simDim > particleCellIdxDim = DataSpaceOperations< simDim >::template map< - SuperCellSize - >( particleCellIdx ); + } - floatD_X result; - for( int i = 0; i < simDim; i++ ) - { - result[i] = static_cast< float_X >( particleCellIdxDim[i] ) + positionWithinCell[i]; - } - - return result; - } - - /** This method handles the merging process on the single-particle level. - * - * It is called in the main loop of the merging algorithm. - * Depending on the state of the Voronoi cell where the particle belongs - * to the execution is forked into distinct sub-processes. - * - * @param cellIdx n-dim. cell index from the origin of the local domain - * @param listVoronoiCells fixed-sized array of Voronoi cells - */ - template< typename T_Acc > - DINLINE void processParticles( - T_Acc const & acc, - const pmacc::math::Int& cellIdx, - ArrayVoronoiCells& listVoronoiCells - ) - { - //! \todo change this as soon as the kernel support lock step programming - constexpr uint32_t numWorkers = pmacc::math::CT::volume< SuperCellSize >::type::value; - const uint32_t workerIdx = DataSpaceOperations< simDim >::template map< - SuperCellSize - >( cellIdx % SuperCellSize::toRT() ); - particleAccess::Cell2Particle< SuperCellSize, numWorkers > forEachFrame; - forEachFrame( - acc, - this->particlesBox, - workerIdx, - cellIdx, - [&]( const T_Acc & acc, FramePtr frame, const int linearThreadIdx ) + /** map cell index to the initial Voronoi cell by aggregating N^simDim 'normal' + * cells to a single Voronoi cell. + * + * @param cellIdx cell index + */ + DINLINE voronoiCellId::type mapCellIdxToInitialVoronoiCell(const uint32_t cellIdx) const { - auto particle = frame[linearThreadIdx]; + const DataSpace cellIdxDim + = DataSpaceOperations::template map(cellIdx); + + const DataSpace voronoiCellDim = cellIdxDim / 2; - const voronoiCellId::type voronoiCellId = particle[voronoiCellId_]; - if( voronoiCellId == -1 ) - return; + return static_cast(pmacc::math::linearize( + pmacc::math::CT::shrinkTo::type::toRT() / 2, + voronoiCellDim)); + } - VoronoiCell& voronoiCell = listVoronoiCells[voronoiCellId]; - const floatD_X position = this->getParticlePosWithinSuperCell( - particle[localCellIdx_], - particle[position_] - ); - const float_X weighting = particle[weighting_]; - const float3_X momentum = particle[momentum_] / weighting; + /** init the Voronoi cell id attribute for each particle in the super cell. + * + * The initial Voronoi cell is chosen by aggregating N^simDim 'normal' cells + * to a single Voronoi cell. + * + * @param cellIdx cell index + */ + template + DINLINE void initVoronoiCellIdAttribute(T_Acc const& acc, const pmacc::math::Int& cellIdx) + { + //! \todo change this as soon as the kernel support lock step programming + constexpr uint32_t numWorkers = pmacc::math::CT::volume::type::value; + const uint32_t workerIdx + = DataSpaceOperations::template map(cellIdx % SuperCellSize::toRT()); + particleAccess::Cell2Particle forEachFrame; + forEachFrame( + acc, + this->particlesBox, + workerIdx, + cellIdx, + [this](const T_Acc& acc, FramePtr frame, const int linearThreadIdx) { + auto particle = frame[linearThreadIdx]; + + const lcellId_t particleCellIdx = particle[localCellIdx_]; + + particle[voronoiCellId_] = this->mapCellIdxToInitialVoronoiCell(particleCellIdx); + }, + particles::filter::All{}); + } - switch( voronoiCell.status ) + /** calculate position of particle within a super cell. + * + * @param particleCellIdx local particle cell index + * @param positionWithinCell position within cell + * @return position of particle with respect to its super cell's origin + */ + DINLINE floatD_X + getParticlePosWithinSuperCell(const lcellId_t particleCellIdx, const floatD_X positionWithinCell) const + { + const DataSpace particleCellIdxDim + = DataSpaceOperations::template map(particleCellIdx); + + floatD_X result; + for(int i = 0; i < simDim; i++) { - case VoronoiStatus::collecting: - voronoiCell.addParticle( - acc, - position, - momentum, - weighting - ); + result[i] = static_cast(particleCellIdxDim[i]) + positionWithinCell[i]; + } - break; + return result; + } - case VoronoiStatus::splitting: - { - /* determine in what sub-Voronoi cell the particle falls */ - const voronoiCellId::type subVoronoiCellId = voronoiCell.getSubVoronoiCell( - position, - momentum - ); - - particle[voronoiCellId_] = subVoronoiCellId; - - /* place particle into one of the two sub-Voronoi cells */ - listVoronoiCells[subVoronoiCellId].addParticle( - acc, - position, - momentum, - weighting - ); - } + /** This method handles the merging process on the single-particle level. + * + * It is called in the main loop of the merging algorithm. + * Depending on the state of the Voronoi cell where the particle belongs + * to the execution is forked into distinct sub-processes. + * + * @param cellIdx n-dim. cell index from the origin of the local domain + * @param listVoronoiCells fixed-sized array of Voronoi cells + */ + template + DINLINE void processParticles( + T_Acc const& acc, + const pmacc::math::Int& cellIdx, + ArrayVoronoiCells& listVoronoiCells) + { + //! \todo change this as soon as the kernel support lock step programming + constexpr uint32_t numWorkers = pmacc::math::CT::volume::type::value; + const uint32_t workerIdx + = DataSpaceOperations::template map(cellIdx % SuperCellSize::toRT()); + particleAccess::Cell2Particle forEachFrame; + forEachFrame( + acc, + this->particlesBox, + workerIdx, + cellIdx, + [&](const T_Acc& acc, FramePtr frame, const int linearThreadIdx) { + auto particle = frame[linearThreadIdx]; + + const voronoiCellId::type voronoiCellId = particle[voronoiCellId_]; + if(voronoiCellId == -1) + return; + + VoronoiCell& voronoiCell = listVoronoiCells[voronoiCellId]; + + const floatD_X position + = this->getParticlePosWithinSuperCell(particle[localCellIdx_], particle[position_]); + const float_X weighting = particle[weighting_]; + const float3_X momentum = particle[momentum_] / weighting; + + switch(voronoiCell.status) + { + case VoronoiStatus::collecting: + voronoiCell.addParticle(acc, position, momentum, weighting); - break; + break; - case VoronoiStatus::abort: - /* check out of the Voronoi cell */ - particle[voronoiCellId_] = -1; + case VoronoiStatus::splitting: + { + /* determine in what sub-Voronoi cell the particle falls */ + const voronoiCellId::type subVoronoiCellId + = voronoiCell.getSubVoronoiCell(position, momentum); - break; + particle[voronoiCellId_] = subVoronoiCellId; - case VoronoiStatus::readyForMerging: - /* merge all particles of this Voronoi cell */ - if( voronoiCell.isFirstParticle( acc ) ) - { - /* I am the first particle in the Voronoi cell - * => get dressed with Voronoi cell's attributes */ - particle[momentum_] = voronoiCell.meanValue * voronoiCell.numRealParticles; - particle[weighting_] = voronoiCell.numRealParticles; - } - else - { - /* I am not the first particle in the Voronoi cell - * => remove me */ - particle[multiMask_] = 0; - } + /* place particle into one of the two sub-Voronoi cells */ + listVoronoiCells[subVoronoiCellId].addParticle(acc, position, momentum, weighting); + } - /* check out of the Voronoi cell */ - particle[voronoiCellId_] = -1; - } - }, - particles::filter::All{} - ); - } - - /** This method handles the merging process on the Voronoi cell level. - * - * It is called in the main loop of the merging algorithm. - * It does the transition of the distinct states of each Voronoi cell. - * - * @param listVoronoiCells fixed-sized array of Voronoi cells - * @param voronoiIndexPool holds indices of active Voronoi cells within `listVoronoiCells` - */ - DINLINE void processVoronoiCells( - ArrayVoronoiCells& listVoronoiCells, - VoronoiIndexPool& voronoiIndexPool - ) const - { - for( voronoiCellId::type voronoiCellId : voronoiIndexPool ) - { - VoronoiCell& voronoiCell = listVoronoiCells[voronoiCellId]; + break; + + case VoronoiStatus::abort: + /* check out of the Voronoi cell */ + particle[voronoiCellId_] = -1; + + break; - switch( voronoiCell.status ) + case VoronoiStatus::readyForMerging: + /* merge all particles of this Voronoi cell */ + if(voronoiCell.isFirstParticle(acc)) + { + /* I am the first particle in the Voronoi cell + * => get dressed with Voronoi cell's attributes */ + particle[momentum_] = voronoiCell.meanValue * voronoiCell.numRealParticles; + particle[weighting_] = voronoiCell.numRealParticles; + } + else + { + /* I am not the first particle in the Voronoi cell + * => remove me */ + particle[multiMask_] = 0; + } + + /* check out of the Voronoi cell */ + particle[voronoiCellId_] = -1; + } + }, + particles::filter::All{}); + } + + /** This method handles the merging process on the Voronoi cell level. + * + * It is called in the main loop of the merging algorithm. + * It does the transition of the distinct states of each Voronoi cell. + * + * @param listVoronoiCells fixed-sized array of Voronoi cells + * @param voronoiIndexPool holds indices of active Voronoi cells within `listVoronoiCells` + */ + DINLINE void processVoronoiCells( + ArrayVoronoiCells& listVoronoiCells, + VoronoiIndexPool& voronoiIndexPool) const { - case VoronoiStatus::collecting: + for(voronoiCellId::type voronoiCellId : voronoiIndexPool) { - /* check if Voronoi cell is too small of count */ - if( voronoiCell.numMacroParticles < this->minParticlesToMerge ) + VoronoiCell& voronoiCell = listVoronoiCells[voronoiCellId]; + + switch(voronoiCell.status) + { + case VoronoiStatus::collecting: { - voronoiCell.setToAbort(); + /* check if Voronoi cell is too small of count */ + if(voronoiCell.numMacroParticles < this->minParticlesToMerge) + { + voronoiCell.setToAbort(); - break; - } + break; + } - /* finalize mean value calculation */ - voronoiCell.finalizeMeanValues(); + /* finalize mean value calculation */ + voronoiCell.finalizeMeanValues(); - /* abort if mean energy of Voronoi cell is below limit */ - if( voronoiCell.getMeanEnergy( frame::getMass() ) < this->minMeanEnergy ) - { - voronoiCell.setToAbort(); + /* abort if mean energy of Voronoi cell is below limit */ + if(voronoiCell.getMeanEnergy(frame::getMass()) < this->minMeanEnergy) + { + voronoiCell.setToAbort(); - break; - } + break; + } - /* choose threshold of spread of momentum */ - const float_X momSpreadThreshold2 = - this->relMomSpreadThreshold != float_X( -1.0 ) ? - this->relMomSpreadThreshold * this->relMomSpreadThreshold * voronoiCell.getMeanMomentum2() : - this->absMomSpreadThreshold * this->absMomSpreadThreshold; - - /* check if Voronoi cell is too large in spread of position or momentum */ - uint8_t splittingComponent; - if( - ( - voronoiCell.splittingStage == VoronoiSplittingStage::position && - voronoiCell.getMaxPositionSpread2( splittingComponent ) > this->posSpreadThreshold2 - ) || - ( - voronoiCell.splittingStage == VoronoiSplittingStage::momentum && - voronoiCell.getMaxMomentumSpread2( splittingComponent ) > momSpreadThreshold2 - ) - ) - { - /* create two new sub Voronoi cells */ - voronoiCell.setToSplitting( - splittingComponent, - voronoiIndexPool.get(), /* lower Voronoi cell id */ - voronoiIndexPool.get() /* higher Voronoi cell id */ - ); - - /* abort if Voronoi index pool is full */ - if( voronoiCell.lowerCellId == -1 || voronoiCell.higherCellId == -1 ) + /* choose threshold of spread of momentum */ + const float_X momSpreadThreshold2 = this->relMomSpreadThreshold != float_X(-1.0) + ? this->relMomSpreadThreshold * this->relMomSpreadThreshold + * voronoiCell.getMeanMomentum2() + : this->absMomSpreadThreshold * this->absMomSpreadThreshold; + + /* check if Voronoi cell is too large in spread of position or momentum */ + uint8_t splittingComponent; + if((voronoiCell.splittingStage == VoronoiSplittingStage::position + && voronoiCell.getMaxPositionSpread2(splittingComponent) > this->posSpreadThreshold2) + || (voronoiCell.splittingStage == VoronoiSplittingStage::momentum + && voronoiCell.getMaxMomentumSpread2(splittingComponent) > momSpreadThreshold2)) { - voronoiCell.setToAbort(); + /* create two new sub Voronoi cells */ + voronoiCell.setToSplitting( + splittingComponent, + voronoiIndexPool.get(), /* lower Voronoi cell id */ + voronoiIndexPool.get() /* higher Voronoi cell id */ + ); + + /* abort if Voronoi index pool is full */ + if(voronoiCell.lowerCellId == -1 || voronoiCell.higherCellId == -1) + { + voronoiCell.setToAbort(); + + break; + } + + /* initialize the two new sub Voronoi cells in `collecting` state */ + listVoronoiCells[voronoiCell.lowerCellId] = VoronoiCell(voronoiCell.splittingStage); + listVoronoiCells[voronoiCell.higherCellId] = VoronoiCell(voronoiCell.splittingStage); break; } - /* initialize the two new sub Voronoi cells in `collecting` state */ - listVoronoiCells[voronoiCell.lowerCellId] = VoronoiCell( voronoiCell.splittingStage ); - listVoronoiCells[voronoiCell.higherCellId] = VoronoiCell( voronoiCell.splittingStage ); + /* switch to momentum-splitting-stage after position-splitting-stage */ + if(voronoiCell.splittingStage == VoronoiSplittingStage::position) + { + voronoiCell = VoronoiCell(VoronoiSplittingStage::momentum); - break; - } + break; + } - /* switch to momentum-splitting-stage after position-splitting-stage */ - if( voronoiCell.splittingStage == VoronoiSplittingStage::position ) - { - voronoiCell = VoronoiCell( VoronoiSplittingStage::momentum ); + /* if the Voronoi cell is neither too small in count + * nor too large in spread of position or momentum + * nor too low in mean energy it is ready to be merged + */ + voronoiCell.setToReadyForMerging(); break; } - /* if the Voronoi cell is neither too small in count - * nor too large in spread of position or momentum - * nor too low in mean energy it is ready to be merged - */ - voronoiCell.setToReadyForMerging(); + default: + /* delete Voronoi cell */ + voronoiIndexPool.release(voronoiCellId); - break; + break; + } } + } - default: - /* delete Voronoi cell */ - voronoiIndexPool.release( voronoiCellId ); - break; - } - } - } - - - /** Entry point of the particle merging algorithm - * - * @param cellIndex n-dim. cell index from the origin of the local domain - */ - template< typename T_Acc> - DINLINE void operator()( - T_Acc const & acc, - const pmacc::math::Int& cellIndex - ) - { - /* multi-dim vector from origin of the super cell to a cell in units of cells */ - const pmacc::math::Int threadIndex = cellIndex % SuperCellSize::toRT(); - - /* conversion from a multi-dim cell coordinate to a linear coordinate - * of the cell in its super cell */ - const int linearThreadIdx = pmacc::math::linearize( - pmacc::math::CT::shrinkTo::type::toRT(), - threadIndex - ); - - /* fixed-sized array of Voronoi cells */ - PMACC_SMEM( acc, listVoronoiCells, ArrayVoronoiCells ); - /* holds indices of active Voronoi cells within `listVoronoiCells` */ - PMACC_SMEM( acc, voronoiIndexPool, VoronoiIndexPool ); - - /* number of initial Voronoi cells - * - * `1u << simDim` is equivalent to `pow(2, simDim)` but can be - * calculated at compile-time to save a shared variable. - */ - constexpr uint16_t numInitialVoronoiCells = pmacc::math::CT::volume< - SuperCellSize - >::type::value / ( 1u << simDim ); + /** Entry point of the particle merging algorithm + * + * @param cellIndex n-dim. cell index from the origin of the local domain + */ + template + DINLINE void operator()(T_Acc const& acc, const pmacc::math::Int& cellIndex) + { + /* multi-dim vector from origin of the super cell to a cell in units of cells */ + const pmacc::math::Int threadIndex = cellIndex % SuperCellSize::toRT(); + + /* conversion from a multi-dim cell coordinate to a linear coordinate + * of the cell in its super cell */ + const int linearThreadIdx = pmacc::math::linearize( + pmacc::math::CT::shrinkTo::type::toRT(), + threadIndex); + + /* fixed-sized array of Voronoi cells */ + PMACC_SMEM(acc, listVoronoiCells, ArrayVoronoiCells); + /* holds indices of active Voronoi cells within `listVoronoiCells` */ + PMACC_SMEM(acc, voronoiIndexPool, VoronoiIndexPool); + + /* number of initial Voronoi cells + * + * `1u << simDim` is equivalent to `pow(2, simDim)` but can be + * calculated at compile-time to save a shared variable. + */ + constexpr uint16_t numInitialVoronoiCells + = pmacc::math::CT::volume::type::value / (1u << simDim); + + if(linearThreadIdx == 0) + { + /* init index pool of Voronoi Cells */ + voronoiIndexPool = VoronoiIndexPool(numInitialVoronoiCells); + } - if( linearThreadIdx == 0 ) - { - /* init index pool of Voronoi Cells */ - voronoiIndexPool = VoronoiIndexPool( numInitialVoronoiCells ); - } + cupla::__syncthreads(acc); - __syncthreads(); + /* set initial Voronoi cells into `collecting` state */ + if(linearThreadIdx < numInitialVoronoiCells) + listVoronoiCells[linearThreadIdx] = VoronoiCell(); - /* set initial Voronoi cells into `collecting` state */ - if( linearThreadIdx < numInitialVoronoiCells ) - listVoronoiCells[linearThreadIdx] = VoronoiCell(); + cupla::__syncthreads(acc); - __syncthreads(); + /* init the voronoiCellId attribute for each particle */ + this->initVoronoiCellIdAttribute(acc, cellIndex); - /* init the voronoiCellId attribute for each particle */ - this->initVoronoiCellIdAttribute( acc, cellIndex ); + cupla::__syncthreads(acc); - __syncthreads(); + /* main loop of the merging algorithm */ + while(voronoiIndexPool.size() > 0) + { + this->processParticles(acc, cellIndex, listVoronoiCells); - /* main loop of the merging algorithm */ - while( voronoiIndexPool.size() > 0 ) - { - this->processParticles( - acc, - cellIndex, - listVoronoiCells - ); + cupla::__syncthreads(acc); - __syncthreads(); + /* TODO: parallelize */ + if(linearThreadIdx == 0) + { + this->processVoronoiCells(listVoronoiCells, voronoiIndexPool); + } - /* TODO: parallelize */ - if( linearThreadIdx == 0 ) - { - this->processVoronoiCells( - listVoronoiCells, - voronoiIndexPool - ); + cupla::__syncthreads(acc); + } } - - __syncthreads(); - } - } - }; + }; -} // namespace particleMerging -} // namespace plugins + } // namespace particleMerging + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/particleMerging/VoronoiCell.hpp b/include/picongpu/plugins/particleMerging/VoronoiCell.hpp index 75bbb56b90..e6699dd192 100644 --- a/include/picongpu/plugins/particleMerging/VoronoiCell.hpp +++ b/include/picongpu/plugins/particleMerging/VoronoiCell.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Heiko Burau +/* Copyright 2017-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -24,241 +24,243 @@ namespace picongpu { -namespace plugins -{ -namespace particleMerging -{ - - /** Status of a Voronoi cell */ - enum struct VoronoiStatus : uint8_t - { - /* !< a Voronoi cell is collecting particles (first state) */ - collecting, - /* !< the Voronoi cell is splitting thus all its particles have - * to move to one of two sub-Voronoi cells */ - splitting, - /* !< the cell needs to be destroyed. Before this can happen - * all its particles need to clear their voronoiCellId attribute. */ - abort, - /* !< the Voronoi cell is ready for merging. After merging it is destroyed. */ - readyForMerging, - }; - - - /** Stage of a Voronoi cell - * - * The spliiting process is two-fold: at first, the splitting is done regarding - * only the spread in position and then by looking at the spread of momentum. - */ - enum struct VoronoiSplittingStage : bool - { - /* !< the spatial distribution is splitted */ - position, - /* !< the momentum distribution is splitted */ - momentum - }; - - - /** Represents a Voronoi cell */ - struct VoronoiCell + namespace plugins { - VoronoiStatus status; - VoronoiSplittingStage splittingStage; - uint32_t numMacroParticles; - float_X numRealParticles; - - float3_X meanValue; - float3_X meanSquaredValue; - - uint8_t splittingComponent; - int32_t lowerCellId; - int32_t higherCellId; - int firstParticleFlag; - - HDINLINE - VoronoiCell( VoronoiSplittingStage splittingStage = VoronoiSplittingStage::position ) : - status( VoronoiStatus::collecting ), - splittingStage( splittingStage ), - numMacroParticles( 0 ), - numRealParticles( float_X( 0.0 ) ), - meanValue( float3_X::create( 0.0 ) ), - meanSquaredValue( float3_X::create( 0.0 ) ), - firstParticleFlag( 0 ) - {} - - /** status setter */ - HDINLINE - void setToAbort() + namespace particleMerging { - this->status = VoronoiStatus::abort; - } + /** Status of a Voronoi cell */ + enum struct VoronoiStatus : uint8_t + { + /* !< a Voronoi cell is collecting particles (first state) */ + collecting, + /* !< the Voronoi cell is splitting thus all its particles have + * to move to one of two sub-Voronoi cells */ + splitting, + /* !< the cell needs to be destroyed. Before this can happen + * all its particles need to clear their voronoiCellId attribute. */ + abort, + /* !< the Voronoi cell is ready for merging. After merging it is destroyed. */ + readyForMerging, + }; + + + /** Stage of a Voronoi cell + * + * The spliiting process is two-fold: at first, the splitting is done regarding + * only the spread in position and then by looking at the spread of momentum. + */ + enum struct VoronoiSplittingStage : bool + { + /* !< the spatial distribution is splitted */ + position, + /* !< the momentum distribution is splitted */ + momentum + }; - /** status setter */ - HDINLINE - void setToSplitting( - const uint8_t splittingComponent, - const int32_t lowerCellId, - const int32_t higherCellId) - { - this->status = VoronoiStatus::splitting; - this->splittingComponent = splittingComponent; - this->lowerCellId = lowerCellId; - this->higherCellId = higherCellId; - } + /** Represents a Voronoi cell */ + struct VoronoiCell + { + VoronoiStatus status; + VoronoiSplittingStage splittingStage; + uint32_t numMacroParticles; + float_X numRealParticles; + + float3_X meanValue; + float3_X meanSquaredValue; + + uint8_t splittingComponent; + int32_t lowerCellId; + int32_t higherCellId; + int firstParticleFlag; + + HDINLINE + VoronoiCell(VoronoiSplittingStage splittingStage = VoronoiSplittingStage::position) + : status(VoronoiStatus::collecting) + , splittingStage(splittingStage) + , numMacroParticles(0) + , numRealParticles(float_X(0.0)) + , meanValue(float3_X::create(0.0)) + , meanSquaredValue(float3_X::create(0.0)) + , firstParticleFlag(0) + { + } + /** status setter */ + HDINLINE + void setToAbort() + { + this->status = VoronoiStatus::abort; + } - /** status setter */ - HDINLINE - void setToReadyForMerging() - { - this->status = VoronoiStatus::readyForMerging; - } - /** check if the current thread is associated to the first particle */ - template< typename T_Acc > - DINLINE - bool isFirstParticle(T_Acc const & acc) - { - return atomicExch( &this->firstParticleFlag, 1 ) == 0; - } - - - /** add a particle to this Voronoi cell */ - template< typename T_Acc > - DINLINE - void addParticle( - T_Acc const & acc, - const floatD_X position, - const float3_X momentum, - const float_X weighting - ) - { - atomicAdd( &this->numMacroParticles, static_cast(1), ::alpaka::hierarchy::Threads{} ); - atomicAdd( &this->numRealParticles, weighting, ::alpaka::hierarchy::Threads{} ); + /** status setter */ + HDINLINE + void setToSplitting( + const uint8_t splittingComponent, + const int32_t lowerCellId, + const int32_t higherCellId) + { + this->status = VoronoiStatus::splitting; + this->splittingComponent = splittingComponent; + this->lowerCellId = lowerCellId; + this->higherCellId = higherCellId; + } - if( this->splittingStage == VoronoiSplittingStage::position ) - { - const floatD_X position2 = position * position; - for( int i = 0; i < simDim; i++ ) + /** status setter */ + HDINLINE + void setToReadyForMerging() { - atomicAdd( &this->meanValue[i], weighting * position[i], ::alpaka::hierarchy::Threads{} ); - atomicAdd( &this->meanSquaredValue[i], weighting * position2[i], ::alpaka::hierarchy::Threads{} ); + this->status = VoronoiStatus::readyForMerging; } - } - else - { - const float3_X momentum2 = momentum * momentum; - for( int i = 0; i < DIM3; i++ ) + /** check if the current thread is associated to the first particle */ + template + DINLINE bool isFirstParticle(T_Acc const& acc) { - atomicAdd( &this->meanValue[i], weighting * momentum[i], ::alpaka::hierarchy::Threads{} ); - atomicAdd( &this->meanSquaredValue[i], weighting * momentum2[i], ::alpaka::hierarchy::Threads{} ); + return cupla::atomicExch(acc, &this->firstParticleFlag, 1) == 0; } - } - } - /** finalize mean value calculation */ - HDINLINE - void finalizeMeanValues() - { - this->meanValue /= this->numRealParticles; - this->meanSquaredValue /= this->numRealParticles; - } + /** add a particle to this Voronoi cell */ + template + DINLINE void addParticle( + T_Acc const& acc, + const floatD_X position, + const float3_X momentum, + const float_X weighting) + { + cupla::atomicAdd( + acc, + &this->numMacroParticles, + static_cast(1), + ::alpaka::hierarchy::Threads{}); + cupla::atomicAdd(acc, &this->numRealParticles, weighting, ::alpaka::hierarchy::Threads{}); + + if(this->splittingStage == VoronoiSplittingStage::position) + { + const floatD_X position2 = position * position; + + for(int i = 0; i < simDim; i++) + { + cupla::atomicAdd( + acc, + &this->meanValue[i], + weighting * position[i], + ::alpaka::hierarchy::Threads{}); + cupla::atomicAdd( + acc, + &this->meanSquaredValue[i], + weighting * position2[i], + ::alpaka::hierarchy::Threads{}); + } + } + else + { + const float3_X momentum2 = momentum * momentum; + + for(int i = 0; i < DIM3; i++) + { + cupla::atomicAdd( + acc, + &this->meanValue[i], + weighting * momentum[i], + ::alpaka::hierarchy::Threads{}); + cupla::atomicAdd( + acc, + &this->meanSquaredValue[i], + weighting * momentum2[i], + ::alpaka::hierarchy::Threads{}); + } + } + } - /** get the mean energy of this Voronoi cell if called in momentum stage */ - HDINLINE - float_X getMeanEnergy( const float_X mass ) const - { - return KinEnergy<>()( - this->meanValue, - mass - ); - } - - /** get the mean momentum squared of this Voronoi cell if called in momentum stage */ - HDINLINE - float_X getMeanMomentum2() const - { - return math::abs2( this->meanValue ); - } + /** finalize mean value calculation */ + HDINLINE + void finalizeMeanValues() + { + this->meanValue /= this->numRealParticles; + this->meanSquaredValue /= this->numRealParticles; + } - /** determine in which of the two sub-Voronoi cells a particle falls */ - HDINLINE - int32_t getSubVoronoiCell( - const floatD_X position, - const float3_X momentum - ) const - { - const float_X valParticle = - this->splittingStage == VoronoiSplittingStage::position ? - position[this->splittingComponent] : - momentum[this->splittingComponent] - ; - - const float_X meanVoronoi = this->meanValue[this->splittingComponent]; - - return - valParticle < meanVoronoi ? - this->lowerCellId : - this->higherCellId - ; - } - - - /** auxillary function for getting the mean squared deviation in position or momentum */ - HDINLINE - float_X getMaxValueSpread2( - uint8_t& component, - const uint8_t dimension - ) const - { - const float3_X meanValue2 = this->meanValue * this->meanValue; - const float3_X valueSpread2 = this->meanSquaredValue - meanValue2; + /** get the mean energy of this Voronoi cell if called in momentum stage */ + HDINLINE + float_X getMeanEnergy(const float_X mass) const + { + return KinEnergy<>()(this->meanValue, mass); + } - /* find component of most spread in position */ - component = 0; - float_X maxValueSpread2 = valueSpread2[0]; - for( uint8_t i = 1; i < dimension; i++ ) - { - if( valueSpread2[i] > maxValueSpread2 ) + /** get the mean momentum squared of this Voronoi cell if called in momentum stage */ + HDINLINE + float_X getMeanMomentum2() const { - maxValueSpread2 = valueSpread2[i]; - component = i; + return pmacc::math::abs2(this->meanValue); } - } - return maxValueSpread2; - } + /** determine in which of the two sub-Voronoi cells a particle falls */ + HDINLINE + int32_t getSubVoronoiCell(const floatD_X position, const float3_X momentum) const + { + const float_X valParticle = this->splittingStage == VoronoiSplittingStage::position + ? position[this->splittingComponent] + : momentum[this->splittingComponent]; - /** calculate the maxmimum squared spread in position - * - * @param component index of position component of maxmimum spread - * @return maxmimum squared spread in position - */ - HDINLINE - float_X getMaxPositionSpread2( uint8_t& component ) const - { - return this->getMaxValueSpread2( component, simDim ); - } + const float_X meanVoronoi = this->meanValue[this->splittingComponent]; + + return valParticle < meanVoronoi ? this->lowerCellId : this->higherCellId; + } - /** calculate the maxmimum squared spread in momentum - * - * @param component index of momentum component of maxmimum spread - * @return maxmimum squared spread in momentum - */ - HDINLINE - float_X getMaxMomentumSpread2( uint8_t& component ) const - { - return this->getMaxValueSpread2( component, DIM3 ); - } - }; + /** auxillary function for getting the mean squared deviation in position or momentum */ + HDINLINE + float_X getMaxValueSpread2(uint8_t& component, const uint8_t dimension) const + { + const float3_X meanValue2 = this->meanValue * this->meanValue; + const float3_X valueSpread2 = this->meanSquaredValue - meanValue2; + + /* find component of most spread in position */ + component = 0; + float_X maxValueSpread2 = valueSpread2[0]; + for(uint8_t i = 1; i < dimension; i++) + { + if(valueSpread2[i] > maxValueSpread2) + { + maxValueSpread2 = valueSpread2[i]; + component = i; + } + } + + return maxValueSpread2; + } + + + /** calculate the maxmimum squared spread in position + * + * @param component index of position component of maxmimum spread + * @return maxmimum squared spread in position + */ + HDINLINE + float_X getMaxPositionSpread2(uint8_t& component) const + { + return this->getMaxValueSpread2(component, simDim); + } + + + /** calculate the maxmimum squared spread in momentum + * + * @param component index of momentum component of maxmimum spread + * @return maxmimum squared spread in momentum + */ + HDINLINE + float_X getMaxMomentumSpread2(uint8_t& component) const + { + return this->getMaxValueSpread2(component, DIM3); + } + }; -} // namespace particleMerging -} // namespace plugins + } // namespace particleMerging + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/ExecuteParticleFilter.hpp b/include/picongpu/plugins/radiation/ExecuteParticleFilter.hpp index e2d10111d3..678920f2f0 100644 --- a/include/picongpu/plugins/radiation/ExecuteParticleFilter.hpp +++ b/include/picongpu/plugins/radiation/ExecuteParticleFilter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -29,70 +29,65 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - - /** read the `radiationMask` of a species */ - template< bool hasFilter > - struct ExecuteParticleFilter + namespace plugins { - /** get the attribute value of `radiationMask` - * - * @param species buffer - * @param currentStep current simulation time step - * @return value of the attribute `radiationMask` - */ - template< typename T_Species > - void operator()( std::shared_ptr const &, const uint32_t currentStep ) + namespace radiation { - particles::Manipulate< - picongpu::plugins::radiation::RadiationParticleFilter, - T_Species - >{}( currentStep ); - } - }; + /** read the `radiationMask` of a species */ + template + struct ExecuteParticleFilter + { + /** get the attribute value of `radiationMask` + * + * @param species buffer + * @param currentStep current simulation time step + * @return value of the attribute `radiationMask` + */ + template + void operator()(std::shared_ptr const&, const uint32_t currentStep) + { + particles::Manipulate{}( + currentStep); + } + }; - /** specialization - * - * specialization for the case that the species not owns the attribute - * `radiationMask` - */ - template< > - struct ExecuteParticleFilter< false > - { - /** get the attribute value of `radiationMask` - * - * @param particle to be used - * @return always true - */ - template< typename T_Species > - void operator()( const std::shared_ptr, const uint32_t currentStep ) - { - } - }; + /** specialization + * + * specialization for the case that the species not owns the attribute + * `radiationMask` + */ + template<> + struct ExecuteParticleFilter + { + /** get the attribute value of `radiationMask` + * + * @param particle to be used + * @return always true + */ + template + void operator()(const std::shared_ptr, const uint32_t currentStep) + { + } + }; - /** execute the particle filter on a species - * - * It is **allowed** to call this function even if the species does not contain - * the attribute `radiationMask`. - * The filter is **not** executed if the species does not contain the attribute `radiationMask`. - * - * @tparam T_Species species type - * @param species species to be filtered - */ - template< typename T_Species > - void executeParticleFilter( std::shared_ptr& species, const uint32_t currentStep ) - { - constexpr bool hasRadiationFilter = pmacc::traits::HasIdentifier< - typename T_Species::FrameType, - radiationMask - >::type::value; + /** execute the particle filter on a species + * + * It is **allowed** to call this function even if the species does not contain + * the attribute `radiationMask`. + * The filter is **not** executed if the species does not contain the attribute `radiationMask`. + * + * @tparam T_Species species type + * @param species species to be filtered + */ + template + void executeParticleFilter(std::shared_ptr& species, const uint32_t currentStep) + { + constexpr bool hasRadiationFilter + = pmacc::traits::HasIdentifier::type::value; - return ExecuteParticleFilter< hasRadiationFilter >{}( species, currentStep ); - } + return ExecuteParticleFilter{}(species, currentStep); + } -} // namespace radiation -} // namespace plugins + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/GetRadiationMask.hpp b/include/picongpu/plugins/radiation/GetRadiationMask.hpp index 5ca1c58661..ab8db86968 100644 --- a/include/picongpu/plugins/radiation/GetRadiationMask.hpp +++ b/include/picongpu/plugins/radiation/GetRadiationMask.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PIConGPU. * @@ -26,64 +26,63 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - /** read the `radiationMask` of a species */ - template< bool hasRadiationMask > - struct GetRadiationMask + namespace plugins { - /** get the attribute value of `radiationMask` - * - * @param particle particle to be used - * @return value of the attribute `radiationMask` - */ - template< typename T_Particle > - HDINLINE bool operator()( const T_Particle& particle ) const + namespace radiation { - return particle[ picongpu::radiationMask_ ]; - } - }; + /** read the `radiationMask` of a species */ + template + struct GetRadiationMask + { + /** get the attribute value of `radiationMask` + * + * @param particle particle to be used + * @return value of the attribute `radiationMask` + */ + template + HDINLINE bool operator()(const T_Particle& particle) const + { + return particle[picongpu::radiationMask_]; + } + }; - /** specialization - * - * specialization for the case that the species not owns the attribute - * `radiationMask` - */ - template< > - struct GetRadiationMask< false > - { - /** get the attribute value of `radiationMask` - * - * @param particle to be used - * @return always true - */ - template< typename T_Particle > - HDINLINE bool operator()( const T_Particle& ) const - { - return true; - } - }; + /** specialization + * + * specialization for the case that the species not owns the attribute + * `radiationMask` + */ + template<> + struct GetRadiationMask + { + /** get the attribute value of `radiationMask` + * + * @param particle to be used + * @return always true + */ + template + HDINLINE bool operator()(const T_Particle&) const + { + return true; + } + }; - /** get the value of the particle attribute `radiationMask` - * - * Allow to read out the value of the attribute `radiationMask` also if - * it is not defined for the particle. - * - * @tparam T_Particle particle type - * @param particle valid particle - * @return particle attribute value `radiationMask`, always `true` if attribute `radiationMask` is not defined - */ - template< typename T_Particle > - HDINLINE bool getRadiationMask( const T_Particle& particle ) - { - constexpr bool hasRadiationMask = pmacc::traits::HasIdentifier< - typename T_Particle::FrameType, - radiationMask - >::type::value; - return GetRadiationMask< hasRadiationMask >{}( particle ); - } -} // namespace radiation -} // namespace plugins + /** get the value of the particle attribute `radiationMask` + * + * Allow to read out the value of the attribute `radiationMask` also if + * it is not defined for the particle. + * + * @tparam T_Particle particle type + * @param particle valid particle + * @return particle attribute value `radiationMask`, always `true` if attribute `radiationMask` is not + * defined + */ + template + HDINLINE bool getRadiationMask(const T_Particle& particle) + { + constexpr bool hasRadiationMask + = pmacc::traits::HasIdentifier::type::value; + return GetRadiationMask{}(particle); + } + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/Radiation.hpp b/include/picongpu/plugins/radiation/Radiation.hpp index fe8c23ac1f..e62abb3292 100644 --- a/include/picongpu/plugins/radiation/Radiation.hpp +++ b/include/picongpu/plugins/radiation/Radiation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Klaus Steiniger, Felix Schmitt, Benjamin Worpitz * * This file is part of PIConGPU. @@ -21,7 +21,7 @@ #pragma once #if(ENABLE_HDF5 != 1) -#error The activated radiation plugin (radiation.param) requires HDF5 +# error The activated radiation plugin (radiation.param) requires HDF5 #endif #include "picongpu/simulation_defines.hpp" @@ -51,1226 +51,1214 @@ #include #include #include - +#include namespace picongpu { -namespace plugins -{ -namespace radiation -{ - -using namespace pmacc; + namespace plugins + { + namespace radiation + { + using namespace pmacc; -namespace po = boost::program_options; + namespace po = boost::program_options; + namespace idLabels + { + enum meshRecordLabelsEnum + { + Amplitude = 0, + Detector = 1, + Frequency = 2 + }; + } // end namespace idLabels -namespace idLabels -{ - enum meshRecordLabelsEnum - { - Amplitude = 0, - Detector = 1, - Frequency = 2 - }; -}// end namespace idLabels + /////////////////////////////////////////////////////////////////////////////////////////////// + /////////////////////////////// Radiation Plugin Class //////////////////////////////////// + /////////////////////////////////////////////////////////////////////////////////////////////// + template + class Radiation : public ISimulationPlugin + { + using Amplitude = picongpu::plugins::radiation::Amplitude<>; + private: + typedef MappingDesc::SuperCellSize SuperCellSize; -/////////////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////// Radiation Plugin Class //////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////////////// + typedef PIConGPUVerboseRadiation radLog; -template -class Radiation : public ISimulationPlugin -{ -private: - - typedef MappingDesc::SuperCellSize SuperCellSize; - - typedef PIConGPUVerboseRadiation radLog; - - /** - * Object that stores the complex radiated amplitude on host and device. - * Radiated amplitude is a function of theta (looking direction) and - * frequency. Layout of the radiation array is: - * [omega_1(theta_1),omega_2(theta_1),...,omega_N-omega(theta_1), - * omega_1(theta_2),omega_2(theta_2),...,omega_N-omega(theta_N-theta)] - */ - GridBuffer *radiation; - radiation_frequencies::InitFreqFunctor freqInit; - radiation_frequencies::FreqFunctor freqFkt; - - MappingDesc *cellDescription; - std::string notifyPeriod; - uint32_t dumpPeriod; - uint32_t radStart; - uint32_t radEnd; - - std::string speciesName; - std::string pluginName; - std::string pluginPrefix; - std::string filename_prefix; - bool totalRad; - bool lastRad; - std::string folderLastRad; - std::string folderTotalRad; - bool radPerGPU; - std::string folderRadPerGPU; - DataSpace lastGPUpos; - - /** - * Data structure for storage and summation of the intermediate values of - * the calculated Amplitude from every host for every direction and - * frequency. - */ - Amplitude* timeSumArray; - Amplitude *tmp_result; - vector_64* detectorPositions; - float_64* detectorFrequencies; - - bool isMaster; - - uint32_t currentStep; - uint32_t lastStep; - - std::string pathRestart; - std::string meshesPathName; - std::string particlesPathName; - - mpi::MPIReduce reduce; - bool compressionOn; - static const int numberMeshRecords = 3; - -public: - - Radiation() : - pluginName("Radiation: calculate the radiation of a species"), - speciesName(ParticlesType::FrameType::getName()), - pluginPrefix(speciesName + std::string("_radiation")), - filename_prefix(pluginPrefix), - radiation(nullptr), - cellDescription(nullptr), - dumpPeriod(0), - totalRad(false), - lastRad(false), - timeSumArray(nullptr), - tmp_result(nullptr), - detectorPositions(nullptr), - detectorFrequencies(nullptr), - isMaster(false), - currentStep(0), - radPerGPU(false), - lastStep(0), - meshesPathName("DetectorMesh/"), - particlesPathName("DetectorParticle/"), - compressionOn(false) - { - Environment<>::get().PluginConnector().registerPlugin(this); - } + /** + * Object that stores the complex radiated amplitude on host and device. + * Radiated amplitude is a function of theta (looking direction) and + * frequency. Layout of the radiation array is: + * [omega_1(theta_1),omega_2(theta_1),...,omega_N-omega(theta_1), + * omega_1(theta_2),omega_2(theta_2),...,omega_N-omega(theta_N-theta)] + * The second dimension is used to store intermediate results if command + * line option numJobs is > 1. + */ + GridBuffer* radiation; + radiation_frequencies::InitFreqFunctor freqInit; + radiation_frequencies::FreqFunctor freqFkt; + + MappingDesc* cellDescription; + std::string notifyPeriod; + uint32_t dumpPeriod; + uint32_t radStart; + uint32_t radEnd; + + std::string speciesName; + std::string pluginName; + std::string pluginPrefix; + std::string filename_prefix; + bool totalRad; + bool lastRad; + std::string folderLastRad; + std::string folderTotalRad; + bool radPerGPU; + std::string folderRadPerGPU; + DataSpace lastGPUpos; + int numJobs; + + /** + * Data structure for storage and summation of the intermediate values of + * the calculated Amplitude from every host for every direction and + * frequency. + */ + std::vector timeSumArray; + std::vector tmp_result; + std::vector detectorPositions; + std::vector detectorFrequencies; + + bool isMaster; + + uint32_t currentStep; + uint32_t lastStep; + + std::string pathRestart; + std::string meshesPathName; + std::string particlesPathName; + + mpi::MPIReduce reduce; + bool compressionOn; + static const int numberMeshRecords = 3; + + public: + Radiation() + : pluginName("Radiation: calculate the radiation of a species") + , speciesName(ParticlesType::FrameType::getName()) + , pluginPrefix(speciesName + std::string("_radiation")) + , filename_prefix(pluginPrefix) + , radiation(nullptr) + , cellDescription(nullptr) + , dumpPeriod(0) + , totalRad(false) + , lastRad(false) + , isMaster(false) + , currentStep(0) + , radPerGPU(false) + , lastStep(0) + , meshesPathName("DetectorMesh/") + , particlesPathName("DetectorParticle/") + , compressionOn(false) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } - virtual ~Radiation() - { - } - - /** - * This function represents what is actually calculated if the plugin - * is called. Here, one only sets the particles pointer to the data of - * the latest time step and calls the 'calculateRadiationParticles' - * function if for the actual time step radiation is to be calculated. - * @param currentStep - */ - void notify(uint32_t currentStep) - { - if (currentStep >= radStart) - { - // radEnd = 0 is default, calculates radiation until simulation - // end - if (currentStep <= radEnd || radEnd == 0) - { - log ("Radiation (%1%): calculate time step %2% ") % speciesName % currentStep; + virtual ~Radiation() + { + } - /* CORE + BORDER is PIC black magic, currently not needed - * + /** + * This function represents what is actually calculated if the plugin + * is called. Here, one only sets the particles pointer to the data of + * the latest time step and calls the 'calculateRadiationParticles' + * function if for the actual time step radiation is to be calculated. + * @param currentStep */ - calculateRadiationParticles < CORE + BORDER > (currentStep); + void notify(uint32_t currentStep) + { + if(currentStep >= radStart) + { + // radEnd = 0 is default, calculates radiation until simulation + // end + if(currentStep <= radEnd || radEnd == 0) + { + log("Radiation (%1%): calculate time step %2% ") % speciesName + % currentStep; + + /* CORE + BORDER is PIC black magic, currently not needed + * + */ + calculateRadiationParticles(currentStep); + + log("Radiation (%1%): finished time step %2% ") % speciesName + % currentStep; + } + } + } - log ("Radiation (%1%): finished time step %2% ") % speciesName % currentStep; - } - } - } + void pluginRegisterHelp(po::options_description& desc) + { + desc.add_options()( + (pluginPrefix + ".period").c_str(), + po::value(¬ifyPeriod), + "enable plugin [for each n-th step]")( + (pluginPrefix + ".dump").c_str(), + po::value(&dumpPeriod)->default_value(0), + "dump integrated radiation from last dumped step [for each n-th step] (0 = only print data at " + "end of simulation)")( + (pluginPrefix + ".lastRadiation").c_str(), + po::bool_switch(&lastRad), + "enable calculation of integrated radiation from last dumped step")( + (pluginPrefix + ".folderLastRad").c_str(), + po::value(&folderLastRad)->default_value("lastRad"), + "folder in which the integrated radiation from last dumped step is written")( + (pluginPrefix + ".totalRadiation").c_str(), + po::bool_switch(&totalRad), + "enable calculation of integrated radiation from start of simulation")( + (pluginPrefix + ".folderTotalRad").c_str(), + po::value(&folderTotalRad)->default_value("totalRad"), + "folder in which the integrated radiation from start of simulation is written")( + (pluginPrefix + ".start").c_str(), + po::value(&radStart)->default_value(2), + "time index when radiation should start with calculation")( + (pluginPrefix + ".end").c_str(), + po::value(&radEnd)->default_value(0), + "time index when radiation should end with calculation")( + (pluginPrefix + ".radPerGPU").c_str(), + po::bool_switch(&radPerGPU), + "enable radiation output from each GPU individually")( + (pluginPrefix + ".folderRadPerGPU").c_str(), + po::value(&folderRadPerGPU)->default_value("radPerGPU"), + "folder in which the radiation of each GPU is written")( + (pluginPrefix + ".compression").c_str(), + po::bool_switch(&compressionOn), + "enable compression of hdf5 output")( + (pluginPrefix + ".numJobs").c_str(), + po::value(&numJobs)->default_value(2), + "Number of independent jobs used for the radiation calculation."); + } - void pluginRegisterHelp(po::options_description& desc) - { - desc.add_options() - ((pluginPrefix + ".period").c_str(), po::value (¬ifyPeriod), "enable plugin [for each n-th step]") - ((pluginPrefix + ".dump").c_str(), po::value (&dumpPeriod)->default_value(0), "dump integrated radiation from last dumped step [for each n-th step] (0 = only print data at end of simulation)") - ((pluginPrefix + ".lastRadiation").c_str(), po::bool_switch(&lastRad), "enable calculation of integrated radiation from last dumped step") - ((pluginPrefix + ".folderLastRad").c_str(), po::value (&folderLastRad)->default_value("lastRad"), "folder in which the integrated radiation from last dumped step is written") - ((pluginPrefix + ".totalRadiation").c_str(), po::bool_switch(&totalRad), "enable calculation of integrated radiation from start of simulation") - ((pluginPrefix + ".folderTotalRad").c_str(), po::value (&folderTotalRad)->default_value("totalRad"), "folder in which the integrated radiation from start of simulation is written") - ((pluginPrefix + ".start").c_str(), po::value (&radStart)->default_value(2), "time index when radiation should start with calculation") - ((pluginPrefix + ".end").c_str(), po::value (&radEnd)->default_value(0), "time index when radiation should end with calculation") - ((pluginPrefix + ".radPerGPU").c_str(), po::bool_switch(&radPerGPU), "enable radiation output from each GPU individually") - ((pluginPrefix + ".folderRadPerGPU").c_str(), po::value (&folderRadPerGPU)->default_value("radPerGPU"), "folder in which the radiation of each GPU is written") - ((pluginPrefix + ".compression").c_str(), po::bool_switch(&compressionOn), "enable compression of hdf5 output"); - } - - - std::string pluginGetName() const - { - return pluginName; - } + std::string pluginGetName() const + { + return pluginName; + } - void setMappingDescription(MappingDesc *cellDescription) - { - this->cellDescription = cellDescription; - } + void setMappingDescription(MappingDesc* cellDescription) + { + this->cellDescription = cellDescription; + } - void restart(uint32_t timeStep, const std::string restartDirectory) - { - // only load backup if radiation is calculated: - if(notifyPeriod.empty()) - return; - if(isMaster) - { - // this will lead to wrong lastRad output right after the checkpoint if the restart point is - // not a dump point. The correct lastRad data can be reconstructed from hdf5 data - // since text based lastRad output will be obsolete soon, this is not a problem - readHDF5file(timeSumArray, restartDirectory + "/" + speciesName + std::string("_radRestart_"), timeStep); - log ("Radiation (%1%): restart finished") % speciesName; - } - } + void restart(uint32_t timeStep, const std::string restartDirectory) + { + // only load backup if radiation is calculated: + if(notifyPeriod.empty()) + return; + + if(isMaster) + { + // this will lead to wrong lastRad output right after the checkpoint if the restart point is + // not a dump point. The correct lastRad data can be reconstructed from hdf5 data + // since text based lastRad output will be obsolete soon, this is not a problem + readHDF5file( + timeSumArray, + restartDirectory + "/" + speciesName + std::string("_radRestart_"), + timeStep); + log("Radiation (%1%): restart finished") % speciesName; + } + } - void checkpoint(uint32_t timeStep, const std::string restartDirectory) - { - // only write backup if radiation is calculated: - if(notifyPeriod.empty()) - return; + void checkpoint(uint32_t timeStep, const std::string restartDirectory) + { + // only write backup if radiation is calculated: + if(notifyPeriod.empty()) + return; + + // collect data GPU -> CPU -> Master + copyRadiationDeviceToHost(); + collectRadiationOnMaster(); + sumAmplitudesOverTime(tmp_result, timeSumArray); + + // write backup file + if(isMaster) + { + writeHDF5file(tmp_result, restartDirectory + "/" + speciesName + std::string("_radRestart_")); + } + } - // collect data GPU -> CPU -> Master - copyRadiationDeviceToHost(); - collectRadiationOnMaster(); - sumAmplitudesOverTime(tmp_result, timeSumArray); - // write backup file - if (isMaster) - { - writeHDF5file(tmp_result, restartDirectory + "/" + speciesName + std::string("_radRestart_")); - } - } - - -private: - - /** - * The plugin is loaded on every MPI rank, and therefor this function is - * executed on every MPI rank. - * One host with MPI rank 0 is defined to be the master. - * It creates a folder where all the - * results are saved and, depending on the type of radiation calculation, - * creates an additional data structure for the summation of all - * intermediate values. - * On every host data structure for storage of the calculated radiation - * is created. */ - void pluginLoad() - { - if(!notifyPeriod.empty()) - { - // allocate memory for all amplitudes for temporal data collection - tmp_result = new Amplitude[elements_amplitude()]; + private: + /** + * The plugin is loaded on every MPI rank, and therefor this function is + * executed on every MPI rank. + * One host with MPI rank 0 is defined to be the master. + * It creates a folder where all the + * results are saved and, depending on the type of radiation calculation, + * creates an additional data structure for the summation of all + * intermediate values. + * On every host data structure for storage of the calculated radiation + * is created. */ + void pluginLoad() + { + if(!notifyPeriod.empty()) + { + if(numJobs <= 0) + { + std::cerr << "'numJobs' must be '>=1' value is adjusted from" << numJobs << " to '1'." + << std::endl; + numJobs = 1; + } + /* allocate memory for all amplitudes for temporal data collection + * ACCUMULATOR! Should be in double precision for numerical stability. + */ + tmp_result.resize(elements_amplitude(), Amplitude::zero()); + + /*only rank 0 creates a file*/ + isMaster = reduce.hasResult(mpi::reduceMethods::Reduce()); + + /* Buffer for GPU results. + * The second dimension is used to store intermediate results if command + * line option numJobs is > 1. + */ + radiation = new GridBuffer(DataSpace<2>(elements_amplitude(), numJobs)); + + freqInit.Init(frequencies_from_list::listLocation); + freqFkt = freqInit.getFunctor(); + + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + pmacc::Filesystem& fs = Environment::get().Filesystem(); + + if(isMaster) + { + timeSumArray.resize(elements_amplitude(), Amplitude::zero()); + + /* save detector position / observation direction */ + detectorPositions.resize(parameters::N_observer); + for(uint32_t detectorIndex = 0; detectorIndex < parameters::N_observer; ++detectorIndex) + { + detectorPositions[detectorIndex] + = radiation_observer::observation_direction(detectorIndex); + } + + /* save detector frequencies */ + detectorFrequencies.resize(radiation_frequencies::N_omega); + for(uint32_t detectorIndex = 0; detectorIndex < radiation_frequencies::N_omega; + ++detectorIndex) + { + detectorFrequencies[detectorIndex] = freqFkt.get(detectorIndex); + } + } + + if(isMaster) + { + fs.createDirectory("radiationHDF5"); + fs.setDirectoryPermissions("radiationHDF5"); + } + + + if(isMaster && radPerGPU) + { + fs.createDirectory(folderRadPerGPU); + fs.setDirectoryPermissions(folderRadPerGPU); + } + + if(isMaster && totalRad) + { + // create folder for total output + fs.createDirectory(folderTotalRad); + fs.setDirectoryPermissions(folderTotalRad); + } + if(isMaster && lastRad) + { + // create folder for total output + fs.createDirectory(folderLastRad); + fs.setDirectoryPermissions(folderLastRad); + } + } + } - /*only rank 0 create a file*/ - isMaster = reduce.hasResult(mpi::reduceMethods::Reduce()); - radiation = new GridBuffer (DataSpace (elements_amplitude())); //create one int on GPU and host + void pluginUnload() + { + if(!notifyPeriod.empty()) + { + // Some funny things that make it possible for the kernel to calculate + // the absolute position of the particles + const SubGrid& subGrid = Environment::get().SubGrid(); + DataSpace localSize(subGrid.getLocalDomain().size); + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + DataSpace globalOffset(subGrid.getLocalDomain().offset); + globalOffset.y() += (localSize.y() * numSlides); + + // only print data at end of simulation if no dump period was set + if(dumpPeriod == 0) + { + collectDataGPUToMaster(); + writeAllFiles(globalOffset); + } + + + __delete(radiation); + CUDA_CHECK(cuplaGetLastError()); + } + } - freqInit.Init(frequencies_from_list::listLocation); - freqFkt = freqInit.getFunctor(); - Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); - pmacc::Filesystem& fs = Environment::get().Filesystem(); + /** Method to copy data from GPU to CPU */ + void copyRadiationDeviceToHost() + { + radiation->deviceToHost(); + __getTransactionEvent().waitForFinished(); + + auto dbox = radiation->getHostBuffer().getDataBox(); + int numAmp = elements_amplitude(); + // update the main result matrix (y index zero) + for(int resultIdx = 1; resultIdx < numJobs; ++resultIdx) + for(int ampIdx = 0; ampIdx < numAmp; ++ampIdx) + { + dbox(DataSpace<2>(ampIdx, 0)) += dbox(DataSpace<2>(ampIdx, resultIdx)); + } + } - if (isMaster) - { - timeSumArray = new Amplitude[elements_amplitude()]; - for (unsigned int i = 0; i < elements_amplitude(); ++i) - timeSumArray[i] = Amplitude::zero(); - /* save detector position / observation direction */ - detectorPositions = new vector_64[parameters::N_observer]; - for(uint32_t detectorIndex=0; detectorIndex < parameters::N_observer; ++detectorIndex) + /** write radiation from each GPU to file individually + * requires call of copyRadiationDeviceToHost() before */ + void saveRadPerGPU(const DataSpace currentGPUpos) { - detectorPositions[detectorIndex] = radiation_observer::observation_direction(detectorIndex); + if(radPerGPU) + { + // only print lastGPUrad if full time period was covered + if(lastGPUpos == currentGPUpos) + { + std::stringstream last_time_step_str; + std::stringstream current_time_step_str; + std::stringstream GPUpos_str; + + last_time_step_str << lastStep; + current_time_step_str << currentStep; + + for(uint32_t dimIndex = 0; dimIndex < simDim; ++dimIndex) + GPUpos_str << "_" << currentGPUpos[dimIndex]; + + writeFile( + radiation->getHostBuffer().getBasePointer(), + folderRadPerGPU + "/" + speciesName + "_radPerGPU_pos" + GPUpos_str.str() + "_time_" + + last_time_step_str.str() + "-" + current_time_step_str.str() + ".dat"); + } + lastGPUpos = currentGPUpos; + } } - /* save detector frequencies */ - detectorFrequencies = new float_64[radiation_frequencies::N_omega]; - for(uint32_t detectorIndex=0; detectorIndex < radiation_frequencies::N_omega; ++detectorIndex) + + /** returns number of observers (radiation detectors) */ + static unsigned int elements_amplitude() { - detectorFrequencies[detectorIndex] = freqFkt.get(detectorIndex); + return radiation_frequencies::N_omega + * parameters::N_observer; // storage for amplitude results on GPU } - } - if (isMaster) - { - fs.createDirectory("radiationHDF5"); - fs.setDirectoryPermissions("radiationHDF5"); - } + /** combine radiation data from each CPU and store result on master + * copyRadiationDeviceToHost() should be called before */ + void collectRadiationOnMaster() + { + reduce( + nvidia::functors::Add(), + tmp_result.data(), + radiation->getHostBuffer().getBasePointer(), + elements_amplitude(), + mpi::reduceMethods::Reduce()); + } - if (isMaster && radPerGPU) - { - fs.createDirectory(folderRadPerGPU); - fs.setDirectoryPermissions(folderRadPerGPU); - } - if (isMaster && totalRad) - { - //create folder for total output - fs.createDirectory(folderTotalRad); - fs.setDirectoryPermissions(folderTotalRad); - } - if (isMaster && lastRad) - { - //create folder for total output - fs.createDirectory(folderLastRad); - fs.setDirectoryPermissions(folderLastRad); - } + /** add collected radiation data to previously stored data + * should be called after collectRadiationOnMaster() */ + void sumAmplitudesOverTime(std::vector& targetArray, std::vector& summandArray) + { + if(isMaster) + { + // add last amplitudes to previous amplitudes + for(unsigned int i = 0; i < elements_amplitude(); ++i) + targetArray[i] += summandArray[i]; + } + } - } - } + /** writes to file the emitted radiation only from the current + * time step. Radiation from previous time steps is neglected. */ + void writeLastRadToText() + { + // only the master rank writes data + if(isMaster) + { + // write file only if lastRad flag was selected + if(lastRad) + { + // get time step as string + std::stringstream o_step; + o_step << currentStep; + + // write lastRad data to txt + writeFile( + tmp_result.data(), + folderLastRad + "/" + filename_prefix + "_" + o_step.str() + ".dat"); + } + } + } - void pluginUnload() - { - if(!notifyPeriod.empty()) - { - // Some funny things that make it possible for the kernel to calculate - // the absolute position of the particles - const SubGrid& subGrid = Environment::get().SubGrid(); - DataSpace localSize(subGrid.getLocalDomain().size); - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); - DataSpace globalOffset(subGrid.getLocalDomain().offset); - globalOffset.y() += (localSize.y() * numSlides); + /** writes the total radiation (over entire simulation time) to file */ + void writeTotalRadToText() + { + // only the master rank writes data + if(isMaster) + { + // write file only if totalRad flag was selected + if(totalRad) + { + // get time step as string + std::stringstream o_step; + o_step << currentStep; + + // write totalRad data to txt + writeFile( + timeSumArray.data(), + folderTotalRad + "/" + filename_prefix + "_" + o_step.str() + ".dat"); + } + } + } - // only print data at end of simulation if no dump period was set - if (dumpPeriod == 0) - { - collectDataGPUToMaster(); - writeAllFiles(globalOffset); - } - if (isMaster) - { - __deleteArray(timeSumArray); - delete[] detectorPositions; - delete[] detectorFrequencies; - } - - __delete(radiation); - CUDA_CHECK(cudaGetLastError()); - - __deleteArray(tmp_result); - } - } - - - /** Method to copy data from GPU to CPU */ - void copyRadiationDeviceToHost() - { - radiation->deviceToHost(); - __getTransactionEvent().waitForFinished(); - } - - - /** write radiation from each GPU to file individually - * requires call of copyRadiationDeviceToHost() before */ - void saveRadPerGPU(const DataSpace currentGPUpos) - { - if (radPerGPU) - { - // only print lastGPUrad if full time period was covered - if (lastGPUpos == currentGPUpos) - { - std::stringstream last_time_step_str; - std::stringstream current_time_step_str; - std::stringstream GPUpos_str; - - last_time_step_str << lastStep; - current_time_step_str << currentStep; - - for(uint32_t dimIndex=0; dimIndexgetHostBuffer().getBasePointer(), folderRadPerGPU + "/" + speciesName - + "_radPerGPU_pos" + GPUpos_str.str() - + "_time_" + last_time_step_str.str() - + "-" + current_time_step_str.str() + ".dat"); - } - lastGPUpos = currentGPUpos; - } - - } - - - /** returns number of observers (radiation detectors) */ - static unsigned int elements_amplitude() - { - return radiation_frequencies::N_omega * parameters::N_observer; // storage for amplitude results on GPU - } - - - /** combine radiation data from each CPU and store result on master - * copyRadiationDeviceToHost() should be called before */ - void collectRadiationOnMaster() - { - reduce(nvidia::functors::Add(), - tmp_result, - radiation->getHostBuffer().getBasePointer(), - elements_amplitude(), - mpi::reduceMethods::Reduce() - ); - } - - - /** add collected radiation data to previously stored data - * should be called after collectRadiationOnMaster() */ - void sumAmplitudesOverTime(Amplitude* targetArray, Amplitude* summandArray) - { - if (isMaster) - { - // add last amplitudes to previous amplitudes - for (unsigned int i = 0; i < elements_amplitude(); ++i) - targetArray[i] += summandArray[i]; - } - } - - - - /** writes to file the emitted radiation only from the current - * time step. Radiation from previous time steps is neglected. */ - void writeLastRadToText() - { - // only the master rank writes data - if (isMaster) - { - // write file only if lastRad flag was selected - if (lastRad) - { - // get time step as string - std::stringstream o_step; - o_step << currentStep; - - // write lastRad data to txt - writeFile(tmp_result, folderLastRad + "/" + filename_prefix + "_" + o_step.str() + ".dat"); - } - } - } - - - /** writes the total radiation (over entire simulation time) to file */ - void writeTotalRadToText() - { - // only the master rank writes data - if (isMaster) - { - // write file only if totalRad flag was selected - if (totalRad) - { - // get time step as string - std::stringstream o_step; - o_step << currentStep; - - // write totalRad data to txt - writeFile(timeSumArray, folderTotalRad + "/" + filename_prefix + "_" + o_step.str() + ".dat"); - } - } - } - - - /** write total radiation data as HDF5 file */ - void writeAmplitudesToHDF5() - { - if (isMaster) - { - writeHDF5file(timeSumArray, std::string("radiationHDF5/") + speciesName + std::string("_radAmplitudes_")); - } - } - - - /** perform all operations to get data from GPU to master */ - void collectDataGPUToMaster() - { - // collect data GPU -> CPU -> Master - copyRadiationDeviceToHost(); - collectRadiationOnMaster(); - sumAmplitudesOverTime(timeSumArray, tmp_result); - } - - - /** write all possible/selected output */ - void writeAllFiles(const DataSpace currentGPUpos) - { - // write data to files - saveRadPerGPU(currentGPUpos); - writeLastRadToText(); - writeTotalRadToText(); - writeAmplitudesToHDF5(); - } - - - /** This method returns hdf5 data structure names for amplitudes - * - * Arguments: - * int index - index of Amplitude - * "-1" return record name - * - * Return: - * std::string - name - * - * This method avoids initializing static constexpr string arrays. - */ - static const std::string dataLabels(int index) - { - const std::string path("Amplitude/"); - - /* return record name if handed -1 */ - if(index == -1) - return path; - - const std::string dataLabelsList[] = {"x_Re", - "x_Im", - "y_Re", - "y_Im", - "z_Re", - "z_Im"}; - - return path + dataLabelsList[index]; - } - - /** This method returns hdf5 data structure names for detector directions - * - * Arguments: - * int index - index of detector - * "-1" return record name - * - * Return: - * std::string - name - * - * This method avoids initializing static const string arrays. - */ - static const std::string dataLabelsDetectorDirection(int index) - { - const std::string path("DetectorDirection/"); - - /* return record name if handed -1 */ - if(index == -1) - return path; - - const std::string dataLabelsList[] = {"x", - "y", - "z"}; - - return path + dataLabelsList[index]; - } - - - /** This method returns hdf5 data structure names for detector frequencies - * - * Arguments: - * int index - index of detector - * "-1" return record name - * - * Return: - * std::string - name - * - * This method avoids initializing static const string arrays. - */ - static const std::string dataLabelsDetectorFrequency(int index) - { - const std::string path("DetectorFrequency/"); - - /* return record name if handed -1 */ - if(index == -1) - return path; - - const std::string dataLabelsList[] = {"omega"}; - - return path + dataLabelsList[index]; - } - - /** This method returns hdf5 data structure names for all mesh records - * - * Arguments: - * int index - index of record - * "-1" return number of mesh records - * - * Return: - * std::string - name - * - * This method avoids initializing static const string arrays. - */ - static const std::string meshRecordLabels(int index) - { - if(index == idLabels::Amplitude) - return dataLabels(-1); - else if (index == idLabels::Detector) - return dataLabelsDetectorDirection(-1); - else if (index == idLabels::Frequency) - return dataLabelsDetectorFrequency(-1); - else - return std::string("this-record-does-not-exist"); - } - - - - - - /** Write Amplitude data to HDF5 file - * - * Arguments: - * Amplitude* values - array of complex amplitude values - * std::string name - path and beginning of file name to store data to - */ - void writeHDF5file(Amplitude* values, std::string name) - { - splash::SerialDataCollector hdf5DataFile(1); - splash::DataCollector::FileCreationAttr fAttr; - - splash::DataCollector::initFileCreationAttr(fAttr); - fAttr.enableCompression = compressionOn; - - std::ostringstream filename; - filename << name << currentStep; - - hdf5DataFile.open(filename.str().c_str(), fAttr); - - typename PICToSplash::type radSplashType; - - - splash::Dimensions bufferSize(Amplitude::numComponents, - radiation_frequencies::N_omega, - parameters::N_observer); - - splash::Dimensions componentSize(1, - radiation_frequencies::N_omega, - parameters::N_observer); - - splash::Dimensions stride(Amplitude::numComponents,1,1); - - /* get the radiation amplitude unit */ - Amplitude UnityAmplitude(1., 0., 0., 0., 0., 0.); - const picongpu::float_64 factor = UnityAmplitude.calc_radiation() * UNIT_ENERGY * UNIT_TIME ; - - typedef PICToSplash::type SplashFloatXType; - SplashFloatXType splashFloatXType; - - for(uint32_t ampIndex=0; ampIndex < Amplitude::numComponents; ++ampIndex) - { - splash::Dimensions offset(ampIndex,0,0); - splash::Selection dataSelection(bufferSize, - componentSize, - offset, - stride); - - /* save data for each x/y/z * Re/Im amplitude */ - hdf5DataFile.write(currentStep, - radSplashType, - 3, - dataSelection, - (meshesPathName + dataLabels(ampIndex)).c_str(), - values); - - /* save SI unit as attribute together with data set */ - hdf5DataFile.writeAttribute(currentStep, - radSplashType, - (meshesPathName + dataLabels(ampIndex)).c_str(), - "unitSI", - &factor); - - /* position */ - std::vector positionMesh(simDim, 0.0); /* there is no offset - zero */ - hdf5DataFile.writeAttribute(currentStep, - splashFloatXType, - (meshesPathName + dataLabels(ampIndex)).c_str(), - "position", - 1u, - splash::Dimensions(simDim,0,0), - &(*positionMesh.begin())); - } - - /* save SI unit as attribute in the Amplitude group (for convenience) */ - hdf5DataFile.writeAttribute(currentStep, - radSplashType, - (meshesPathName + std::string("Amplitude")).c_str(), - "unitSI", - &factor); - - /* save detector position / observation direction */ - splash::Dimensions bufferSizeDetector(3, - 1, - parameters::N_observer); - - splash::Dimensions componentSizeDetector(1, - 1, - parameters::N_observer); - - splash::Dimensions strideDetector(3,1,1); - - for(uint32_t detectorDim=0; detectorDim < 3; ++detectorDim) - { - splash::Dimensions offset(detectorDim,0,0); - splash::Selection dataSelection(bufferSizeDetector, - componentSizeDetector, - offset, - strideDetector); - - hdf5DataFile.write(currentStep, - radSplashType, - 3, - dataSelection, - (meshesPathName + dataLabelsDetectorDirection(detectorDim)).c_str(), - detectorPositions); - - /* save SI unit as attribute together with data set */ - const picongpu::float_64 factorDirection = 1.0 ; - hdf5DataFile.writeAttribute(currentStep, - radSplashType, - (meshesPathName + dataLabelsDetectorDirection(detectorDim)).c_str(), - "unitSI", - &factorDirection); - - /* position */ - std::vector positionMesh(simDim, 0.0); /* there is no offset - zero */ - hdf5DataFile.writeAttribute(currentStep, - splashFloatXType, - (meshesPathName + dataLabelsDetectorDirection(detectorDim)).c_str(), - "position", - 1u, - splash::Dimensions(simDim,0,0), - &(*positionMesh.begin())); - - } - - - - /* save detector frequencies */ - splash::Dimensions bufferSizeOmega(1, - radiation_frequencies::N_omega, - 1); - - splash::Dimensions strideOmega(1,1,1); - - splash::Dimensions offset(0,0,0); - splash::Selection dataSelection(bufferSizeOmega, - bufferSizeOmega, - offset, - strideOmega); - - hdf5DataFile.write(currentStep, - radSplashType, - 3, - dataSelection, - (meshesPathName + dataLabelsDetectorFrequency(0)).c_str(), - detectorFrequencies); - - /* save SI unit as attribute together with data set */ - const picongpu::float_64 factorOmega = 1.0 / UNIT_TIME ; - hdf5DataFile.writeAttribute(currentStep, - radSplashType, - (meshesPathName + dataLabelsDetectorFrequency(0)).c_str(), - "unitSI", - &factorOmega); - - /* position */ - std::vector positionMesh(simDim, 0.0); /* there is no offset - zero */ - hdf5DataFile.writeAttribute(currentStep, - splashFloatXType, - (meshesPathName + dataLabelsDetectorFrequency(0)).c_str(), - "position", - 1u, - splash::Dimensions(simDim,0,0), - &(*positionMesh.begin())); - - - /* begin openPMD attributes */ - /* begin required openPMD global attributes */ - std::string openPMDversion("1.0.0"); - splash::ColTypeString ctOpenPMDversion(openPMDversion.length()); - hdf5DataFile.writeGlobalAttribute( ctOpenPMDversion, - "openPMD", - openPMDversion.c_str() ); - - const uint32_t openPMDextension = 0; // no extension - splash::ColTypeUInt32 ctUInt32; - hdf5DataFile.writeGlobalAttribute( ctUInt32, - "openPMDextension", - &openPMDextension ); - - std::string basePath("/data/%T/"); - splash::ColTypeString ctBasePath(basePath.length()); - hdf5DataFile.writeGlobalAttribute(ctBasePath, - "basePath", - basePath.c_str() ); - - splash::ColTypeString ctMeshesPath(meshesPathName.length()); - hdf5DataFile.writeGlobalAttribute(ctMeshesPath, - "meshesPath", - meshesPathName.c_str() ); - - - splash::ColTypeString ctParticlesPath(particlesPathName.length()); - hdf5DataFile.writeGlobalAttribute( ctParticlesPath, - "particlesPath", - particlesPathName.c_str() ); - - std::string iterationEncoding("fileBased"); - splash::ColTypeString ctIterationEncoding(iterationEncoding.length()); - hdf5DataFile.writeGlobalAttribute( ctIterationEncoding, - "iterationEncoding", - iterationEncoding.c_str() ); - - /* the ..._0_0_0... extension comes from the current filename - formating of the serial data colector in libSplash */ - const int indexCutDirectory = name.rfind('/'); - std::string iterationFormat(name.substr(indexCutDirectory + 1) + std::string("%T_0_0_0.h5")); - splash::ColTypeString ctIterationFormat(iterationFormat.length()); - hdf5DataFile.writeGlobalAttribute( ctIterationFormat, - "iterationFormat", - iterationFormat.c_str() ); - - hdf5DataFile.writeAttribute(currentStep, splashFloatXType, nullptr, "dt", &DELTA_T); - const float_X time = float_X(currentStep) * DELTA_T; - hdf5DataFile.writeAttribute(currentStep, splashFloatXType, nullptr, "time", &time); - splash::ColTypeDouble ctDouble; - hdf5DataFile.writeAttribute(currentStep, ctDouble, nullptr, "timeUnitSI", &UNIT_TIME); - - /* end required openPMD global attributes */ - - /* begin recommended openPMD global attributes */ - - std::string author = Environment<>::get().SimulationDescription().getAuthor(); - if( author.length() > 0 ) - { - splash::ColTypeString ctAuthor(author.length()); - hdf5DataFile.writeGlobalAttribute( ctAuthor, - "author", - author.c_str() ); - } - - std::string software("PIConGPU"); - splash::ColTypeString ctSoftware(software.length()); - hdf5DataFile.writeGlobalAttribute( ctSoftware, - "software", - software.c_str() ); - - std::stringstream softwareVersion; - softwareVersion << PICONGPU_VERSION_MAJOR << "." - << PICONGPU_VERSION_MINOR << "." - << PICONGPU_VERSION_PATCH; - if( ! std::string(PICONGPU_VERSION_LABEL).empty() ) - softwareVersion << "-" << PICONGPU_VERSION_LABEL; - splash::ColTypeString ctSoftwareVersion(softwareVersion.str().length()); - hdf5DataFile.writeGlobalAttribute( ctSoftwareVersion, - "softwareVersion", - softwareVersion.str().c_str() ); - - std::string date = helper::getDateString("%F %T %z"); - splash::ColTypeString ctDate(date.length()); - hdf5DataFile.writeGlobalAttribute( ctDate, - "date", - date.c_str() ); - - /* end recommended openPMD global attributes */ - - /* begin required openPMD attributes for meshes records */ - - for(int i = 0; i gridGlobalOffset(simDim, 0.0); /* there is no offset - zero */ - hdf5DataFile.writeAttribute(currentStep, - ctDouble, - (meshesPathName + meshRecordLabels(i)).c_str(), - "gridGlobalOffset", - 1u, - splash::Dimensions(simDim,0,0), - &(*gridGlobalOffset.begin())); - - /* gridUnit */ - /* ALL grids have indices as axises - thus no unit conversion */ - const double unitNone = 1.0; - hdf5DataFile.writeAttribute(currentStep, - ctDouble, - (meshesPathName + meshRecordLabels(i)).c_str(), - "gridUnitSI", - &unitNone); - - /* geometry */ - const std::string geometry("cartesian"); - splash::ColTypeString ctGeometry(geometry.length()); - hdf5DataFile.writeAttribute(currentStep, - ctGeometry, - (meshesPathName + meshRecordLabels(i)).c_str(), - "geometry", - geometry.c_str()); - - /* dataOrder */ - const std::string dataOrder("C"); - splash::ColTypeString ctDataOrder(dataOrder.length()); - hdf5DataFile.writeAttribute(currentStep, - ctDataOrder, - (meshesPathName + meshRecordLabels(i)).c_str(), - "dataOrder", - dataOrder.c_str()); - - std::vector gridSpacing(simDim, 0.0); - for( uint32_t d = 0; d < simDim; ++d ) - gridSpacing.at(d) = float_X(1.0); - hdf5DataFile.writeAttribute(currentStep, - splashFloatXType, - (meshesPathName + meshRecordLabels(i)).c_str(), - "gridSpacing", - 1u, - splash::Dimensions(simDim,0,0), - &(*gridSpacing.begin())); - - /* axisLabels */ - std::list myListOfStr; - if( i == idLabels::Amplitude ) /* amplitude record */ - { - myListOfStr.push_back("detector direction index"); - myListOfStr.push_back("detector frequency index"); - } - else if( i == idLabels::Detector ) /* detector direction record */ - { - myListOfStr.push_back("detector direction index"); - myListOfStr.push_back("None"); - } - else if( i == idLabels::Frequency ) /* detector frequency record */ - { - myListOfStr.push_back("None"); - myListOfStr.push_back("detector frequency index"); - } - myListOfStr.push_back("None"); - - // convert to splash format - helper::GetSplashArrayOfString getSplashArrayOfString; - helper::GetSplashArrayOfString::Result myArrOfStr; - myArrOfStr = getSplashArrayOfString( myListOfStr ); - splash::ColTypeString ctSomeListOfStr( myArrOfStr.maxLen ); - - hdf5DataFile.writeAttribute(currentStep, - ctSomeListOfStr, - (meshesPathName + meshRecordLabels(i)).c_str(), - "axisLabels", - 1u, /* ndims: 1D array */ - splash::Dimensions(myListOfStr.size(),0,0), /* size of 1D array */ - &(myArrOfStr.buffers.at(0))); - - - /* unitDimension */ - std::vector unitDimension( traits::NUnitDimension, 0.0 ); - if( i == idLabels::Amplitude ) /* amplitude record */ - { - /* units Joule seconds -> Length^2 * Time^-1 * Mass^1 */ - unitDimension[traits::SIBaseUnits::length] = 2.0; - unitDimension[traits::SIBaseUnits::time] = -1.0; - unitDimension[traits::SIBaseUnits::mass] = 1.0; - } - else if( i == idLabels::Detector ) /* detector direction record */ - { - /* units none */ - } - else if( i == idLabels::Frequency ) /* detector frequency record */ - { - /* units 1./second -> Time^-1 */ - unitDimension[traits::SIBaseUnits::time] = -1.0; - } - hdf5DataFile.writeAttribute(currentStep, - ctDouble, - (meshesPathName + meshRecordLabels(i)).c_str(), - "unitDimension", - 1u, - splash::Dimensions(traits::NUnitDimension,0,0), - &(*unitDimension.begin())); - - - } - /* end required openPMD attributes for meshes */ - /* end openPMD attributes */ - - hdf5DataFile.close(); - } - - - - /** Read Amplitude data from HDF5 file - * - * Arguments: - * Amplitude* values - array of complex amplitudes to store data in - * std::string name - path and beginning of file name with data stored in - * const int timeStep - time step to read - */ - void readHDF5file(Amplitude* values, std::string name, const int timeStep) - { - splash::SerialDataCollector hdf5DataFile(1); - splash::DataCollector::FileCreationAttr fAttr; - - splash::DataCollector::initFileCreationAttr(fAttr); - - fAttr.fileAccType = splash::DataCollector::FAT_READ; - - std::ostringstream filename; - /* add to standard ending added by libSplash for SerialDataCollector */ - filename << name << timeStep << "_0_0_0.h5"; - - /* check if restart file exists */ - if( !boost::filesystem::exists(filename.str()) ) - { - log ("Radiation (%1%): restart file not found (%2%) - start with zero values") % - speciesName % filename.str(); - } - else - { - hdf5DataFile.open(filename.str().c_str(), fAttr); - - typename PICToSplash::type radSplashType; - - splash::Dimensions componentSize(1, - radiation_frequencies::N_omega, - parameters::N_observer); - - const int N_tmpBuffer = radiation_frequencies::N_omega * parameters::N_observer; - picongpu::float_64* tmpBuffer = new picongpu::float_64[N_tmpBuffer]; - - for(uint32_t ampIndex=0; ampIndex < Amplitude::numComponents; ++ampIndex) - { - hdf5DataFile.read(timeStep, + /** write total radiation data as HDF5 file */ + void writeAmplitudesToHDF5() + { + if(isMaster) + { + writeHDF5file( + timeSumArray, + std::string("radiationHDF5/") + speciesName + std::string("_radAmplitudes_")); + } + } + + + /** perform all operations to get data from GPU to master */ + void collectDataGPUToMaster() + { + // collect data GPU -> CPU -> Master + copyRadiationDeviceToHost(); + collectRadiationOnMaster(); + sumAmplitudesOverTime(timeSumArray, tmp_result); + } + + + /** write all possible/selected output */ + void writeAllFiles(const DataSpace currentGPUpos) + { + // write data to files + saveRadPerGPU(currentGPUpos); + writeLastRadToText(); + writeTotalRadToText(); + writeAmplitudesToHDF5(); + } + + + /** This method returns hdf5 data structure names for amplitudes + * + * Arguments: + * int index - index of Amplitude + * "-1" return record name + * + * Return: + * std::string - name + * + * This method avoids initializing static constexpr string arrays. + */ + static const std::string dataLabels(int index) + { + const std::string path("Amplitude/"); + + /* return record name if handed -1 */ + if(index == -1) + return path; + + const std::string dataLabelsList[] = {"x_Re", "x_Im", "y_Re", "y_Im", "z_Re", "z_Im"}; + + return path + dataLabelsList[index]; + } + + /** This method returns hdf5 data structure names for detector directions + * + * Arguments: + * int index - index of detector + * "-1" return record name + * + * Return: + * std::string - name + * + * This method avoids initializing static const string arrays. + */ + static const std::string dataLabelsDetectorDirection(int index) + { + const std::string path("DetectorDirection/"); + + /* return record name if handed -1 */ + if(index == -1) + return path; + + const std::string dataLabelsList[] = {"x", "y", "z"}; + + return path + dataLabelsList[index]; + } + + + /** This method returns hdf5 data structure names for detector frequencies + * + * Arguments: + * int index - index of detector + * "-1" return record name + * + * Return: + * std::string - name + * + * This method avoids initializing static const string arrays. + */ + static const std::string dataLabelsDetectorFrequency(int index) + { + const std::string path("DetectorFrequency/"); + + /* return record name if handed -1 */ + if(index == -1) + return path; + + const std::string dataLabelsList[] = {"omega"}; + + return path + dataLabelsList[index]; + } + + /** This method returns hdf5 data structure names for all mesh records + * + * Arguments: + * int index - index of record + * "-1" return number of mesh records + * + * Return: + * std::string - name + * + * This method avoids initializing static const string arrays. + */ + static const std::string meshRecordLabels(int index) + { + if(index == idLabels::Amplitude) + return dataLabels(-1); + else if(index == idLabels::Detector) + return dataLabelsDetectorDirection(-1); + else if(index == idLabels::Frequency) + return dataLabelsDetectorFrequency(-1); + else + return std::string("this-record-does-not-exist"); + } + + + /** Write Amplitude data to HDF5 file + * + * Arguments: + * Amplitude* values - array of complex amplitude values + * std::string name - path and beginning of file name to store data to + */ + void writeHDF5file(std::vector& values, std::string name) + { + splash::SerialDataCollector hdf5DataFile(1); + splash::DataCollector::FileCreationAttr fAttr; + + splash::DataCollector::initFileCreationAttr(fAttr); + fAttr.enableCompression = compressionOn; + + std::ostringstream filename; + filename << name << currentStep; + + hdf5DataFile.open(filename.str().c_str(), fAttr); + + typename PICToSplash::type radSplashType; + + + splash::Dimensions bufferSize( + Amplitude::numComponents, + radiation_frequencies::N_omega, + parameters::N_observer); + + splash::Dimensions componentSize(1, radiation_frequencies::N_omega, parameters::N_observer); + + splash::Dimensions stride(Amplitude::numComponents, 1, 1); + + /* get the radiation amplitude unit */ + Amplitude UnityAmplitude(1., 0., 0., 0., 0., 0.); + const picongpu::float_64 factor = UnityAmplitude.calc_radiation() * UNIT_ENERGY * UNIT_TIME; + + typedef PICToSplash::type SplashFloatXType; + SplashFloatXType splashFloatXType; + + for(uint32_t ampIndex = 0; ampIndex < Amplitude::numComponents; ++ampIndex) + { + splash::Dimensions offset(ampIndex, 0, 0); + splash::Selection dataSelection(bufferSize, componentSize, offset, stride); + + /* save data for each x/y/z * Re/Im amplitude */ + hdf5DataFile.write( + currentStep, + radSplashType, + 3, + dataSelection, + (meshesPathName + dataLabels(ampIndex)).c_str(), + values.data()); + + /* save SI unit as attribute together with data set */ + hdf5DataFile.writeAttribute( + currentStep, + radSplashType, + (meshesPathName + dataLabels(ampIndex)).c_str(), + "unitSI", + &factor); + + /* position */ + std::vector positionMesh(simDim, 0.0); /* there is no offset - zero */ + hdf5DataFile.writeAttribute( + currentStep, + splashFloatXType, + (meshesPathName + dataLabels(ampIndex)).c_str(), + "position", + 1u, + splash::Dimensions(simDim, 0, 0), + &(*positionMesh.begin())); + } + + /* save SI unit as attribute in the Amplitude group (for convenience) */ + hdf5DataFile.writeAttribute( + currentStep, + radSplashType, + (meshesPathName + std::string("Amplitude")).c_str(), + "unitSI", + &factor); + + /* save detector position / observation direction */ + splash::Dimensions bufferSizeDetector(3, 1, parameters::N_observer); + + splash::Dimensions componentSizeDetector(1, 1, parameters::N_observer); + + splash::Dimensions strideDetector(3, 1, 1); + + for(uint32_t detectorDim = 0; detectorDim < 3; ++detectorDim) + { + splash::Dimensions offset(detectorDim, 0, 0); + splash::Selection dataSelection( + bufferSizeDetector, + componentSizeDetector, + offset, + strideDetector); + + hdf5DataFile.write( + currentStep, + radSplashType, + 3, + dataSelection, + (meshesPathName + dataLabelsDetectorDirection(detectorDim)).c_str(), + detectorPositions.data()); + + /* save SI unit as attribute together with data set */ + const picongpu::float_64 factorDirection = 1.0; + hdf5DataFile.writeAttribute( + currentStep, + radSplashType, + (meshesPathName + dataLabelsDetectorDirection(detectorDim)).c_str(), + "unitSI", + &factorDirection); + + /* position */ + std::vector positionMesh(simDim, 0.0); /* there is no offset - zero */ + hdf5DataFile.writeAttribute( + currentStep, + splashFloatXType, + (meshesPathName + dataLabelsDetectorDirection(detectorDim)).c_str(), + "position", + 1u, + splash::Dimensions(simDim, 0, 0), + &(*positionMesh.begin())); + } + + + /* save detector frequencies */ + splash::Dimensions bufferSizeOmega(1, radiation_frequencies::N_omega, 1); + + splash::Dimensions strideOmega(1, 1, 1); + + splash::Dimensions offset(0, 0, 0); + splash::Selection dataSelection(bufferSizeOmega, bufferSizeOmega, offset, strideOmega); + + hdf5DataFile.write( + currentStep, + radSplashType, + 3, + dataSelection, + (meshesPathName + dataLabelsDetectorFrequency(0)).c_str(), + detectorFrequencies.data()); + + /* save SI unit as attribute together with data set */ + const picongpu::float_64 factorOmega = 1.0 / UNIT_TIME; + hdf5DataFile.writeAttribute( + currentStep, + radSplashType, + (meshesPathName + dataLabelsDetectorFrequency(0)).c_str(), + "unitSI", + &factorOmega); + + /* position */ + std::vector positionMesh(simDim, 0.0); /* there is no offset - zero */ + hdf5DataFile.writeAttribute( + currentStep, + splashFloatXType, + (meshesPathName + dataLabelsDetectorFrequency(0)).c_str(), + "position", + 1u, + splash::Dimensions(simDim, 0, 0), + &(*positionMesh.begin())); + + + /* begin openPMD attributes */ + /* begin required openPMD global attributes */ + std::string openPMDversion("1.0.0"); + splash::ColTypeString ctOpenPMDversion(openPMDversion.length()); + hdf5DataFile.writeGlobalAttribute(ctOpenPMDversion, "openPMD", openPMDversion.c_str()); + + const uint32_t openPMDextension = 0; // no extension + splash::ColTypeUInt32 ctUInt32; + hdf5DataFile.writeGlobalAttribute(ctUInt32, "openPMDextension", &openPMDextension); + + std::string basePath("/data/%T/"); + splash::ColTypeString ctBasePath(basePath.length()); + hdf5DataFile.writeGlobalAttribute(ctBasePath, "basePath", basePath.c_str()); + + splash::ColTypeString ctMeshesPath(meshesPathName.length()); + hdf5DataFile.writeGlobalAttribute(ctMeshesPath, "meshesPath", meshesPathName.c_str()); + + + splash::ColTypeString ctParticlesPath(particlesPathName.length()); + hdf5DataFile.writeGlobalAttribute(ctParticlesPath, "particlesPath", particlesPathName.c_str()); + + std::string iterationEncoding("fileBased"); + splash::ColTypeString ctIterationEncoding(iterationEncoding.length()); + hdf5DataFile.writeGlobalAttribute( + ctIterationEncoding, + "iterationEncoding", + iterationEncoding.c_str()); + + /* the ..._0_0_0... extension comes from the current filename + formating of the serial data colector in libSplash */ + const int indexCutDirectory = name.rfind('/'); + std::string iterationFormat(name.substr(indexCutDirectory + 1) + std::string("%T_0_0_0.h5")); + splash::ColTypeString ctIterationFormat(iterationFormat.length()); + hdf5DataFile.writeGlobalAttribute(ctIterationFormat, "iterationFormat", iterationFormat.c_str()); + + hdf5DataFile.writeAttribute(currentStep, splashFloatXType, nullptr, "dt", &DELTA_T); + const float_X time = float_X(currentStep) * DELTA_T; + hdf5DataFile.writeAttribute(currentStep, splashFloatXType, nullptr, "time", &time); + splash::ColTypeDouble ctDouble; + hdf5DataFile.writeAttribute(currentStep, ctDouble, nullptr, "timeUnitSI", &UNIT_TIME); + + /* end required openPMD global attributes */ + + /* begin recommended openPMD global attributes */ + + std::string author = Environment<>::get().SimulationDescription().getAuthor(); + if(author.length() > 0) + { + splash::ColTypeString ctAuthor(author.length()); + hdf5DataFile.writeGlobalAttribute(ctAuthor, "author", author.c_str()); + } + + std::string software("PIConGPU"); + splash::ColTypeString ctSoftware(software.length()); + hdf5DataFile.writeGlobalAttribute(ctSoftware, "software", software.c_str()); + + std::stringstream softwareVersion; + softwareVersion << PICONGPU_VERSION_MAJOR << "." << PICONGPU_VERSION_MINOR << "." + << PICONGPU_VERSION_PATCH; + if(!std::string(PICONGPU_VERSION_LABEL).empty()) + softwareVersion << "-" << PICONGPU_VERSION_LABEL; + splash::ColTypeString ctSoftwareVersion(softwareVersion.str().length()); + hdf5DataFile.writeGlobalAttribute( + ctSoftwareVersion, + "softwareVersion", + softwareVersion.str().c_str()); + + std::string date = helper::getDateString("%F %T %z"); + splash::ColTypeString ctDate(date.length()); + hdf5DataFile.writeGlobalAttribute(ctDate, "date", date.c_str()); + + /* end recommended openPMD global attributes */ + + /* begin required openPMD attributes for meshes records */ + + for(int i = 0; i < numberMeshRecords; ++i) + { + /* timeOffset */ + const float_X timeOffset = 0.0; + hdf5DataFile.writeAttribute( + currentStep, + splashFloatXType, + (meshesPathName + meshRecordLabels(i)).c_str(), + "timeOffset", + &timeOffset); + + /* gridGlobalOffset */ + std::vector gridGlobalOffset(simDim, 0.0); /* there is no offset - zero */ + hdf5DataFile.writeAttribute( + currentStep, + ctDouble, + (meshesPathName + meshRecordLabels(i)).c_str(), + "gridGlobalOffset", + 1u, + splash::Dimensions(simDim, 0, 0), + &(*gridGlobalOffset.begin())); + + /* gridUnit */ + /* ALL grids have indices as axises - thus no unit conversion */ + const double unitNone = 1.0; + hdf5DataFile.writeAttribute( + currentStep, + ctDouble, + (meshesPathName + meshRecordLabels(i)).c_str(), + "gridUnitSI", + &unitNone); + + /* geometry */ + const std::string geometry("cartesian"); + splash::ColTypeString ctGeometry(geometry.length()); + hdf5DataFile.writeAttribute( + currentStep, + ctGeometry, + (meshesPathName + meshRecordLabels(i)).c_str(), + "geometry", + geometry.c_str()); + + /* dataOrder */ + const std::string dataOrder("C"); + splash::ColTypeString ctDataOrder(dataOrder.length()); + hdf5DataFile.writeAttribute( + currentStep, + ctDataOrder, + (meshesPathName + meshRecordLabels(i)).c_str(), + "dataOrder", + dataOrder.c_str()); + + std::vector gridSpacing(simDim, 0.0); + for(uint32_t d = 0; d < simDim; ++d) + gridSpacing.at(d) = float_X(1.0); + hdf5DataFile.writeAttribute( + currentStep, + splashFloatXType, + (meshesPathName + meshRecordLabels(i)).c_str(), + "gridSpacing", + 1u, + splash::Dimensions(simDim, 0, 0), + &(*gridSpacing.begin())); + + /* axisLabels */ + std::list myListOfStr; + if(i == idLabels::Amplitude) /* amplitude record */ + { + myListOfStr.push_back("detector direction index"); + myListOfStr.push_back("detector frequency index"); + } + else if(i == idLabels::Detector) /* detector direction record */ + { + myListOfStr.push_back("detector direction index"); + myListOfStr.push_back("None"); + } + else if(i == idLabels::Frequency) /* detector frequency record */ + { + myListOfStr.push_back("None"); + myListOfStr.push_back("detector frequency index"); + } + myListOfStr.push_back("None"); + + // convert to splash format + helper::GetSplashArrayOfString getSplashArrayOfString; + helper::GetSplashArrayOfString::Result myArrOfStr; + myArrOfStr = getSplashArrayOfString(myListOfStr); + splash::ColTypeString ctSomeListOfStr(myArrOfStr.maxLen); + + hdf5DataFile.writeAttribute( + currentStep, + ctSomeListOfStr, + (meshesPathName + meshRecordLabels(i)).c_str(), + "axisLabels", + 1u, /* ndims: 1D array */ + splash::Dimensions(myListOfStr.size(), 0, 0), /* size of 1D array */ + &(myArrOfStr.buffers.at(0))); + + + /* unitDimension */ + std::vector unitDimension(traits::NUnitDimension, 0.0); + if(i == idLabels::Amplitude) /* amplitude record */ + { + /* units Joule seconds -> Length^2 * Time^-1 * Mass^1 */ + unitDimension[traits::SIBaseUnits::length] = 2.0; + unitDimension[traits::SIBaseUnits::time] = -1.0; + unitDimension[traits::SIBaseUnits::mass] = 1.0; + } + else if(i == idLabels::Detector) /* detector direction record */ + { + /* units none */ + } + else if(i == idLabels::Frequency) /* detector frequency record */ + { + /* units 1./second -> Time^-1 */ + unitDimension[traits::SIBaseUnits::time] = -1.0; + } + hdf5DataFile.writeAttribute( + currentStep, + ctDouble, + (meshesPathName + meshRecordLabels(i)).c_str(), + "unitDimension", + 1u, + splash::Dimensions(traits::NUnitDimension, 0, 0), + &(*unitDimension.begin())); + } + /* end required openPMD attributes for meshes */ + /* end openPMD attributes */ + + hdf5DataFile.close(); + } + + + /** Read Amplitude data from HDF5 file + * + * Arguments: + * Amplitude* values - array of complex amplitudes to store data in + * std::string name - path and beginning of file name with data stored in + * const int timeStep - time step to read + */ + void readHDF5file(std::vector& values, std::string name, const int timeStep) + { + splash::SerialDataCollector hdf5DataFile(1); + splash::DataCollector::FileCreationAttr fAttr; + + splash::DataCollector::initFileCreationAttr(fAttr); + + fAttr.fileAccType = splash::DataCollector::FAT_READ; + + std::ostringstream filename; + /* add to standard ending added by libSplash for SerialDataCollector */ + filename << name << timeStep << "_0_0_0.h5"; + + /* check if restart file exists */ + if(!boost::filesystem::exists(filename.str())) + { + log( + "Radiation (%1%): restart file not found (%2%) - start with zero values") + % speciesName % filename.str(); + } + else + { + hdf5DataFile.open(filename.str().c_str(), fAttr); + + typename PICToSplash::type radSplashType; + + splash::Dimensions componentSize(1, radiation_frequencies::N_omega, parameters::N_observer); + + const int N_tmpBuffer = radiation_frequencies::N_omega * parameters::N_observer; + picongpu::float_64* tmpBuffer = new picongpu::float_64[N_tmpBuffer]; + + for(uint32_t ampIndex = 0; ampIndex < Amplitude::numComponents; ++ampIndex) + { + hdf5DataFile.read( + timeStep, (meshesPathName + dataLabels(ampIndex)).c_str(), componentSize, tmpBuffer); - for(int copyIndex = 0; copyIndex < N_tmpBuffer; ++copyIndex) - { - /* convert data directly because Amplitude is just 6 float_64 */ - ((picongpu::float_64*)values)[ampIndex + Amplitude::numComponents*copyIndex] = tmpBuffer[copyIndex]; - } - - } - - delete[] tmpBuffer; - hdf5DataFile.close(); - - log ("Radiation (%1%): read radiation data from HDF5") % speciesName; - } - } - - - /** - * From the collected data from all hosts the radiated intensity is - * calculated by calculating the absolute value squared and multiplying - * this with with the appropriate physics constants. - * @param values - * @param name - */ - void writeFile(Amplitude* values, std::string name) - { - std::ofstream outFile; - outFile.open(name.c_str(), std::ofstream::out | std::ostream::trunc); - if (!outFile) - { - std::cerr << "Can't open file [" << name << "] for output, disable plugin output. " << std::endl; - isMaster = false; // no Master anymore -> no process is able to write - } - else - { - for (unsigned int index_direction = 0; index_direction < parameters::N_observer; ++index_direction) // over all directions - { - for (unsigned index_omega = 0; index_omega < radiation_frequencies::N_omega; ++index_omega) // over all frequencies - { - // Take Amplitude for one direction and frequency, - // calculate the square of the absolute value - // and write to file. - outFile << - values[index_omega + index_direction * radiation_frequencies::N_omega].calc_radiation() * UNIT_ENERGY * UNIT_TIME << "\t"; - - } - outFile << std::endl; - } - outFile.flush(); - outFile << std::endl; //now all data are written to file - - if (outFile.fail()) - std::cerr << "Error on flushing file [" << name << "]. " << std::endl; - - outFile.close(); - } - } - - /** - * This functions calls the radiation kernel. It specifies how the - * calculation is parallelized. - * gridDim_rad is the number of Thread-Blocks in a grid - * blockDim_rad is the number of threads per block - * - * ----------------------------------------------------------- - * | Grid | - * | -------------- -------------- | - * | | Block 0 | | Block 1 | | - * | |o o | |o o | | - * | |o o | |o o | | - * | |th1 th2 | |th1 th2 | | - * | -------------- -------------- | - * ----------------------------------------------------------- - * - * !!! The TEMPLATE parameter is not used anymore. - * !!! But the calculations it is supposed to do is hard coded in the - * kernel. - * !!! THIS NEEDS TO BE CHANGED !!! - * - * @param currentStep - */ - template< uint32_t AREA> /*This Template Parameter is not used anymore*/ - void calculateRadiationParticles(uint32_t currentStep) - { - this->currentStep = currentStep; - - DataConnector &dc = Environment<>::get().DataConnector(); - auto particles = dc.get< ParticlesType >( ParticlesType::FrameType::getName(), true ); - - /* execute the particle filter */ - radiation::executeParticleFilter( particles, currentStep ); - - /* the parallelization is ONLY over directions: - * (a combined parallelization over direction AND frequencies - * turned out to be slower on GPUs of the Fermi generation (sm_2x) (couple - * percent) and definitely slower on Kepler GPUs (sm_3x, tested on K20)) - */ - const int N_observer = parameters::N_observer; - const auto gridDim_rad = N_observer; - - /* number of threads per block = number of cells in a super cell - * = number of particles in a Frame - * (THIS IS PIConGPU SPECIFIC) - * A Frame is the entity that stores particles. - * A super cell can have many Frames. - * Particles in a Frame can be accessed in parallel. - */ - - // Some funny things that make it possible for the kernel to calculate - // the absolute position of the particles - DataSpace localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); - const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); - const SubGrid& subGrid = Environment::get().SubGrid(); - DataSpace globalOffset(subGrid.getLocalDomain().offset); - globalOffset.y() += (localSize.y() * numSlides); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - - // PIC-like kernel call of the radiation kernel - PMACC_KERNEL( KernelRadiationParticles< - numWorkers - >{} )( - gridDim_rad, - numWorkers - )( - /*Pointer to particles memory on the device*/ - particles->getDeviceParticlesBox(), - - /*Pointer to memory of radiated amplitude on the device*/ - radiation->getDeviceBuffer().getDataBox(), - globalOffset, - currentStep, *cellDescription, - freqFkt, - subGrid.getGlobalDomain().size - ); - - dc.releaseData( ParticlesType::FrameType::getName() ); - - if (dumpPeriod != 0 && currentStep % dumpPeriod == 0) - { - collectDataGPUToMaster(); - writeAllFiles(globalOffset); - - // update time steps - lastStep = currentStep; - - // reset amplitudes on GPU back to zero - radiation->getDeviceBuffer().reset(false); - } - - } - -}; - -} // namespace radiation -} // namespace plugins - -namespace particles -{ -namespace traits -{ - template< - typename T_Species, - typename T_UnspecifiedSpecies - > - struct SpeciesEligibleForSolver< - T_Species, - plugins::radiation::Radiation< T_UnspecifiedSpecies > - > + for(int copyIndex = 0; copyIndex < N_tmpBuffer; ++copyIndex) + { + /* convert data directly because Amplitude is just 6 float_32 */ + ((picongpu::float_64*) values.data())[ampIndex + Amplitude::numComponents * copyIndex] + = tmpBuffer[copyIndex]; + } + } + + delete[] tmpBuffer; + hdf5DataFile.close(); + + log("Radiation (%1%): read radiation data from HDF5") % speciesName; + } + } + + + /** + * From the collected data from all hosts the radiated intensity is + * calculated by calculating the absolute value squared and multiplying + * this with with the appropriate physics constants. + * @param values + * @param name + */ + void writeFile(Amplitude* values, std::string name) + { + std::ofstream outFile; + outFile.open(name.c_str(), std::ofstream::out | std::ostream::trunc); + if(!outFile) + { + std::cerr << "Can't open file [" << name << "] for output, disable plugin output. " + << std::endl; + isMaster = false; // no Master anymore -> no process is able to write + } + else + { + for(unsigned int index_direction = 0; index_direction < parameters::N_observer; + ++index_direction) // over all directions + { + for(unsigned index_omega = 0; index_omega < radiation_frequencies::N_omega; + ++index_omega) // over all frequencies + { + // Take Amplitude for one direction and frequency, + // calculate the square of the absolute value + // and write to file. + outFile << values[index_omega + index_direction * radiation_frequencies::N_omega] + .calc_radiation() + * UNIT_ENERGY * UNIT_TIME + << "\t"; + } + outFile << std::endl; + } + outFile.flush(); + outFile << std::endl; // now all data are written to file + + if(outFile.fail()) + std::cerr << "Error on flushing file [" << name << "]. " << std::endl; + + outFile.close(); + } + } + + /** + * This functions calls the radiation kernel. It specifies how the + * calculation is parallelized. + * gridDim_rad is the number of Thread-Blocks in a grid + * blockDim_rad is the number of threads per block + * + * ----------------------------------------------------------- + * | Grid | + * | -------------- -------------- | + * | | Block 0 | | Block 1 | | + * | |o o | |o o | | + * | |o o | |o o | | + * | |th1 th2 | |th1 th2 | | + * | -------------- -------------- | + * ----------------------------------------------------------- + * + * !!! The TEMPLATE parameter is not used anymore. + * !!! But the calculations it is supposed to do is hard coded in the + * kernel. + * !!! THIS NEEDS TO BE CHANGED !!! + * + * @param currentStep + */ + template /*This Template Parameter is not used anymore*/ + void calculateRadiationParticles(uint32_t currentStep) + { + this->currentStep = currentStep; + + DataConnector& dc = Environment<>::get().DataConnector(); + auto particles = dc.get(ParticlesType::FrameType::getName(), true); + + /* execute the particle filter */ + radiation::executeParticleFilter(particles, currentStep); + + /* the parallelization is ONLY over directions: + * (a combined parallelization over direction AND frequencies + * turned out to be slower on GPUs of the Fermi generation (sm_2x) (couple + * percent) and definitely slower on Kepler GPUs (sm_3x, tested on K20)) + */ + const int N_observer = parameters::N_observer; + const auto gridDim_rad = N_observer; + + /* number of threads per block = number of cells in a super cell + * = number of particles in a Frame + * (THIS IS PIConGPU SPECIFIC) + * A Frame is the entity that stores particles. + * A super cell can have many Frames. + * Particles in a Frame can be accessed in parallel. + */ + + // Some funny things that make it possible for the kernel to calculate + // the absolute position of the particles + DataSpace localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + const SubGrid& subGrid = Environment::get().SubGrid(); + DataSpace globalOffset(subGrid.getLocalDomain().offset); + globalOffset.y() += (localSize.y() * numSlides); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + + // PIC-like kernel call of the radiation kernel + PMACC_KERNEL(KernelRadiationParticles{}) + (DataSpace<2>(gridDim_rad, numJobs), DataSpace<2>(numWorkers, 1))( + /*Pointer to particles memory on the device*/ + particles->getDeviceParticlesBox(), + + /*Pointer to memory of radiated amplitude on the device*/ + radiation->getDeviceBuffer().getDataBox(), + globalOffset, + currentStep, + *cellDescription, + freqFkt, + subGrid.getGlobalDomain().size); + + dc.releaseData(ParticlesType::FrameType::getName()); + + if(dumpPeriod != 0 && currentStep % dumpPeriod == 0) + { + collectDataGPUToMaster(); + writeAllFiles(globalOffset); + + // update time steps + lastStep = currentStep; + + // reset amplitudes on GPU back to zero + radiation->getDeviceBuffer().reset(false); + } + } + }; + + } // namespace radiation + } // namespace plugins + + namespace particles { - using FrameType = typename T_Species::FrameType; - - // this plugin needs at least the position, a weighting, momentum and momentumPrev1 to run - using RequiredIdentifiers = MakeSeq_t< - position<>, - weighting, - momentum, - momentumPrev1 - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - using SpeciesHasMass = typename pmacc::traits::HasFlag< - FrameType, - massRatio<> - >::type; - - using SpeciesHasCharge = typename pmacc::traits::HasFlag< - FrameType, - chargeRatio<> - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasMass, - SpeciesHasCharge - >; - }; - -} // namespace traits -} // namespace particles -} // namespace picongpu + namespace traits + { + template + struct SpeciesEligibleForSolver> + { + using FrameType = typename T_Species::FrameType; + + // this plugin needs at least the position, a weighting, momentum and momentumPrev1 to run + using RequiredIdentifiers = MakeSeq_t, weighting, momentum, momentumPrev1>; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + using SpeciesHasMass = typename pmacc::traits::HasFlag>::type; + using SpeciesHasCharge = typename pmacc::traits::HasFlag>::type; + using type = typename bmpl::and_; + }; + + } // namespace traits + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/plugins/radiation/Radiation.kernel b/include/picongpu/plugins/radiation/Radiation.kernel index c6fd10db60..b0f4173901 100644 --- a/include/picongpu/plugins/radiation/Radiation.kernel +++ b/include/picongpu/plugins/radiation/Radiation.kernel @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Klaus Steiniger, Felix Schmitt, Benjamin Worpitz * * This file is part of PIConGPU. @@ -21,10 +21,6 @@ #pragma once -#include -#include -#include -#include #include "picongpu/simulation_defines.hpp" @@ -55,459 +51,393 @@ #include "picongpu/plugins/radiation/radFormFactor.hpp" #include "sys/stat.h" +#include +#include +#include +#include namespace picongpu { -namespace plugins -{ -namespace radiation -{ - - /** calculate the radiation of a species - * - * If \p T_dependenciesFulfilled is false a dummy kernel without functionality is created - * - * @tparam T_numWorkers number of workers - */ - template< - uint32_t T_numWorkers - > - struct KernelRadiationParticles + namespace plugins { - /** - * The radiation kernel calculates for all particles on the device the - * emitted radiation for every direction and every frequency. - * The parallelization is as follows: - * - There are as many Blocks of threads as there are directions for which - * radiation needs to be calculated. (A block of threads shares - * shared memory) - * - The number of threads per block is equal to the number of cells per - * super cells which is also equal to the number of particles per frame - * - * The procedure starts with calculating unique ids for the threads and - * initializing the shared memory. - * Then a loop over all super cells starts. - * Every thread loads a particle from that super cell and calculates its - * retarded time and its real amplitude (both is dependent of the direction). - * For every Particle - * exists therefor a unique space within the shared memory. - * After that, a thread calculates for a specific frequency the emitted - * radiation of all particles. - * @param pb - * @param radiation - * @param globalOffset - * @param currentStep - * @param mapper - * @param freqFkt - * @param simBoxSize - */ - template< - typename ParBox, - typename DBox, - typename Mapping, - typename T_Acc - > - DINLINE - /*__launch_bounds__(256, 4)*/ - void operator()( - T_Acc const & acc, - ParBox pb, - DBox radiation, - DataSpace globalOffset, - uint32_t currentStep, - Mapping mapper, - radiation_frequencies::FreqFunctor freqFkt, - DataSpace simBoxSize - ) const + namespace radiation { - using namespace mappings::threads; - namespace po = boost::program_options; - - constexpr uint32_t frameSize = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorker = T_numWorkers; + /** calculate the radiation of a species + * + * If \p T_dependenciesFulfilled is false a dummy kernel without functionality is created + * + * @tparam T_numWorkers number of workers + */ + template + struct KernelRadiationParticles + { + /** + * The radiation kernel calculates for all particles on the device the + * emitted radiation for every direction and every frequency. + * The parallelization is as follows: + * - There are as many Blocks of threads as there are directions for which + * radiation needs to be calculated. (A block of threads shares + * shared memory) + * - The number of threads per block is equal to the number of cells per + * super cells which is also equal to the number of particles per frame + * + * The procedure starts with calculating unique ids for the threads and + * initializing the shared memory. + * Then a loop over all super cells starts. + * Every thread loads a particle from that super cell and calculates its + * retarded time and its real amplitude (both is dependent of the direction). + * For every Particle + * exists therefor a unique space within the shared memory. + * After that, a thread calculates for a specific frequency the emitted + * radiation of all particles. + * @param pb + * @param radiation + * @param globalOffset + * @param currentStep + * @param mapper + * @param freqFkt + * @param simBoxSize + */ + template + DINLINE + /*__launch_bounds__(256, 4)*/ + void + operator()( + T_Acc const& acc, + ParBox pb, + DBox radiation, + DataSpace globalOffset, + uint32_t currentStep, + Mapping mapper, + radiation_frequencies::FreqFunctor freqFkt, + DataSpace simBoxSize) const + { + using namespace mappings::threads; + namespace po = boost::program_options; + using Amplitude = picongpu::plugins::radiation::Amplitude<>; + constexpr uint32_t frameSize = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorker = T_numWorkers; + + using FrameType = typename ParBox::FrameType; + using FramePtr = typename ParBox::FramePtr; + + using namespace parameters; // parameters of radiation + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + /// calculate radiated Amplitude + /* parallelized in 1 dimensions: + * looking direction (theta) + * (not anymore data handling) + * create shared memory for particle data to reduce global memory calls + * every thread in a block loads one particle and every thread runs + * through all particles and calculates the radiation for one direction + * for all frequencies + */ + constexpr int blockSize = pmacc::math::CT::volume::type::value; - using FrameType = typename ParBox::FrameType ; - using FramePtr = typename ParBox::FramePtr; + // vectorial part of the integrand in the Jackson formula + PMACC_SMEM(acc, real_amplitude_s, memory::Array); - using namespace parameters; // parameters of radiation + // retarded time + PMACC_SMEM(acc, t_ret_s, memory::Array); - uint32_t const workerIdx = threadIdx.x; + // storage for macro particle weighting needed if + // the coherent and incoherent radiation of a single + // macro-particle needs to be considered + PMACC_SMEM(acc, radWeighting_s, memory::Array); - /// calculate radiated Amplitude - /* parallelized in 1 dimensions: - * looking direction (theta) - * (not anymore data handling) - * create shared memory for particle data to reduce global memory calls - * every thread in a block loads one particle and every thread runs - * through all particles and calculates the radiation for one direction - * for all frequencies - */ - constexpr int blockSize = pmacc::math::CT::volume::type::value; + // particle counter used if not all particles are considered for + // radiation calculation + PMACC_SMEM(acc, counter_s, int); - // vectorial part of the integrand in the Jackson formula - PMACC_SMEM( acc, real_amplitude_s, memory::Array< vector_64, blockSize > ); + PMACC_SMEM(acc, lowpass_s, memory::Array); - // retarded time - PMACC_SMEM( acc, t_ret_s, memory::Array< picongpu::float_64, blockSize > ); - // storage for macro particle weighting needed if - // the coherent and incoherent radiation of a single - // macro-particle needs to be considered - PMACC_SMEM( acc, radWeighting_s, memory::Array< float_X, blockSize > ); + int const theta_idx = cupla::blockIdx(acc).x; // cupla::blockIdx(acc).x is used to determine theta - // particle counter used if not all particles are considered for - // radiation calculation - PMACC_SMEM( acc, counter_s, int ); + // simulation time (needed for retarded time) + picongpu::float_64 const t(picongpu::float_64(currentStep) * picongpu::float_64(DELTA_T)); - PMACC_SMEM( acc, lowpass_s, memory::Array< NyquistLowPass, blockSize > ); + // looking direction (needed for observer) used in the thread + vector_64 const look = radiation_observer::observation_direction(theta_idx); + // get extent of guarding super cells (needed to ignore them) + DataSpace const guardingSuperCells = mapper.getGuardingSuperCells(); - int const theta_idx = blockIdx.x; //blockIdx.x is used to determine theta + /* number of super cells on GPU per dimension (still including guard cells) + * remove both guards from count [later one sided guard needs to be added again] + */ + DataSpace const superCellsCount(mapper.getGridSuperCells() - 2 * guardingSuperCells); - // simulation time (needed for retarded time) - picongpu::float_64 const t( - picongpu::float_64( currentStep ) * picongpu::float_64( DELTA_T) - ); + // get absolute number of relevant super cells + int const numSuperCells = superCellsCount.productOfComponents(); - // looking direction (needed for observer) used in the thread - vector_64 const look = radiation_observer::observation_direction( theta_idx ); + int const numJobs = cupla::gridDim(acc).y; + int const jobIdx = cupla::blockIdx(acc).y; - // get extent of guarding super cells (needed to ignore them) - DataSpace< simDim > const guardingSuperCells = mapper.getGuardingSuperCells(); + /* go over all super cells on GPU with a stride depending on number of temporary results + * but ignore all guarding supercells + */ + for(int super_cell_index = jobIdx; super_cell_index <= numSuperCells; super_cell_index += numJobs) + { + // select SuperCell and add one sided guard again + DataSpace const superCell + = DataSpaceOperations::map(superCellsCount, super_cell_index) + guardingSuperCells; - /* number of super cells on GPU per dimension (still including guard cells) - * remove both guards from count [later one sided guard needs to be added again] - */ - DataSpace< simDim > const superCellsCount( mapper.getGridSuperCells( ) - 2 * guardingSuperCells ); + // -guardingSuperCells remove guarding block + DataSpace const superCellOffset( + globalOffset + ((superCell - guardingSuperCells) * SuperCellSize::toRT())); - // get absolute number of relevant super cells - int const numSuperCells = superCellsCount.productOfComponents(); + // pointer to frame storing particles + FramePtr frame = pb.getLastFrame(superCell); + // number of particles in current frame + lcellId_t particlesInFrame = pb.getSuperCell(superCell).getSizeLastFrame(); - /* go over all super cells on GPU - * but ignore all guarding supercells - */ - for( int super_cell_index = 0; super_cell_index <= numSuperCells; ++super_cell_index ) - { - // select SuperCell and add one sided guard again - DataSpace< simDim > const superCell = - DataSpaceOperations::map( - superCellsCount, - super_cell_index - ) + - guardingSuperCells; - - // -guardingSuperCells remove guarding block - DataSpace< simDim > const superCellOffset( - globalOffset + - ( - ( superCell - guardingSuperCells ) * - SuperCellSize::toRT() - ) - ); - - // pointer to frame storing particles - FramePtr frame = pb.getLastFrame( superCell ); - - // number of particles in current frame - lcellId_t particlesInFrame = pb.getSuperCell( superCell ).getSizeLastFrame(); - - /* go to next supercell - * - * if "isValid" is false then there is no frame - * inside the superCell (anymore) - */ - while( frame.isValid() ) - { - /* since a race condition can occur if "continue loop" is called, - * all threads must wait for the selection of a new frame - * until all threads have evaluated "isValid" - */ - __syncthreads(); - - ForEachIdx< - IdxConfig< - 1, - numWorker - > - > onlyMaster{ workerIdx }; - - /* The Master process (thread 0) in every thread block is in - * charge of loading a frame from - * the current super cell and evaluate the total number of - * particles in this frame. - */ - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) + /* go to next supercell + * + * if "isValid" is false then there is no frame + * inside the superCell (anymore) + */ + while(frame.isValid()) { - counter_s = 0; - } - ); + /* since a race condition can occur if "continue loop" is called, + * all threads must wait for the selection of a new frame + * until all threads have evaluated "isValid" + */ + cupla::__syncthreads(acc); - __syncthreads(); + ForEachIdx> onlyMaster{workerIdx}; - using ParticleDomCfg = IdxConfig< - frameSize, - numWorker - >; + /* The Master process (thread 0) in every thread block is in + * charge of loading a frame from + * the current super cell and evaluate the total number of + * particles in this frame. + */ + onlyMaster([&](uint32_t const, uint32_t const) { counter_s = 0; }); - // loop over all particles in the frame - ForEachIdx< ParticleDomCfg > forEachParticle{ workerIdx }; + cupla::__syncthreads(acc); - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - // only threads with particles are running - if( linearIdx < particlesInFrame ) - { + using ParticleDomCfg = IdxConfig; - auto par = frame[ linearIdx ]; - // get old and new particle momenta - vector_X const particle_momentumNow = vector_X( par[momentum_] ); - vector_X const particle_momentumOld = vector_X( par[momentumPrev1_] ); - /* initializes "saveParticleAt" flag with -1 - * because "counter_s" will never be -1 - * therefore, if a particle is saved, a value of counter - * is stored in "saveParticleAt" != -1 - * THIS IS ACTUALLY ONLY NEEDED IF: the radiation flag was set - * LATER: can this be optimized? - */ - int saveParticleAt = -1; + // loop over all particles in the frame + ForEachIdx forEachParticle{workerIdx}; - /* if particle is not accelerated we skip all calculations - * - * this is a component-wise comparison - */ - if( particle_momentumNow != particle_momentumOld ) + forEachParticle([&](uint32_t const linearIdx, uint32_t const) { + // only threads with particles are running + if(linearIdx < particlesInFrame) { - if( getRadiationMask(par) ) - saveParticleAt = nvidia::atomicAllInc( - acc, - &counter_s, - ::alpaka::hierarchy::Threads{} - ); - - /* for information: - * atomicAdd returns an int with the previous - * value of "counter_s" != -1 - * therefore, if a particle is selected - * "saveParticleAs" != -1 + auto par = frame[linearIdx]; + // get old and new particle momenta + vector_X const particle_momentumNow = vector_X(par[momentum_]); + vector_X const particle_momentumOld = vector_X(par[momentumPrev1_]); + /* initializes "saveParticleAt" flag with -1 + * because "counter_s" will never be -1 + * therefore, if a particle is saved, a value of counter + * is stored in "saveParticleAt" != -1 + * THIS IS ACTUALLY ONLY NEEDED IF: the radiation flag was set + * LATER: can this be optimized? */ + int saveParticleAt = -1; - // if a particle needs to be considered - if( saveParticleAt != -1 ) + /* if particle is not accelerated we skip all calculations + * + * this is a component-wise comparison + */ + if(particle_momentumNow != particle_momentumOld) { - - // calculate global position - lcellId_t const cellIdx = par[ localCellIdx_ ]; - - // position inside of the cell - floatD_X const pos = par[ position_ ]; - - // calculate global position of cell - DataSpace< simDim > const globalPos( - superCellOffset + - DataSpaceOperations< simDim >:: - template map< SuperCellSize >( cellIdx ) - ); - - // add global position of cell with local position of particle in cell - vector_X particle_locationNow; - // set z component to zero in case of simDim==DIM2 - particle_locationNow[ 2 ] = 0.0; - // run over all components and compute gobal position - for( int i = 0; i < simDim; ++i ) - particle_locationNow[ i ] = - ( float_X( globalPos[ i ] ) + pos[ i ] ) * - cellSize[ i ]; - - /* get macro-particle weighting - * - * Info: - * the weighting is the number of real particles described - * by a macro-particle + if(getRadiationMask(par)) + saveParticleAt = nvidia::atomicAllInc( + acc, + &counter_s, + ::alpaka::hierarchy::Threads{}); + + /* for information: + * atomicAdd returns an int with the previous + * value of "counter_s" != -1 + * therefore, if a particle is selected + * "saveParticleAs" != -1 */ - float_X const weighting = par[ weighting_ ]; - /* only of coherent and incoherent radiation of a single macro-particle is - * considered, the weighting of each macro-particle needs to be stored - * in order to be considered when the actual frequency calculation is done - */ - radWeighting_s[ saveParticleAt ] = weighting; + // if a particle needs to be considered + if(saveParticleAt != -1) + { + // calculate global position + lcellId_t const cellIdx = par[localCellIdx_]; + + // position inside of the cell + floatD_X const pos = par[position_]; + + // calculate global position of cell + DataSpace const globalPos( + superCellOffset + + DataSpaceOperations::template map(cellIdx)); + + // add global position of cell with local position of particle in cell + vector_X particle_locationNow; + // set z component to zero in case of simDim==DIM2 + particle_locationNow[2] = 0.0; + // run over all components and compute gobal position + for(int i = 0; i < simDim; ++i) + particle_locationNow[i] + = (float_X(globalPos[i]) + pos[i]) * cellSize[i]; + + /* get macro-particle weighting + * + * Info: + * the weighting is the number of real particles described + * by a macro-particle + */ + float_X const weighting = par[weighting_]; + + /* only of coherent and incoherent radiation of a single macro-particle is + * considered, the weighting of each macro-particle needs to be stored + * in order to be considered when the actual frequency calculation is done + */ + radWeighting_s[saveParticleAt] = weighting; + + // mass of macro-particle + float_X const particle_mass = attribute::getMass(weighting, par); + + /**************************************************** + **** Here happens the true physical calculation **** + ****************************************************/ + + // set up particle using the radiation's own particle class + /*!\todo please add a namespace for Particle class*/ + Particle const particle( + particle_locationNow, + particle_momentumOld, + particle_momentumNow, + particle_mass); + + // set up amplitude calculator + using Calc_Amplitude_n_sim_1 = Calc_Amplitude; + + // calculate amplitude + Calc_Amplitude_n_sim_1 const amplitude3(particle, DELTA_T, t); + + // get charge of single electron ! (weighting=1.0f) + float_X const particle_charge = frame::getCharge(); + + /* compute real amplitude of macro-particle with a charge of + * a single electron + */ + real_amplitude_s[saveParticleAt] = amplitude3.get_vector(look) + * particle_charge * picongpu::float_64(DELTA_T); + + // retarded time stored in shared memory + t_ret_s[saveParticleAt] = amplitude3.get_t_ret(look); + + lowpass_s[saveParticleAt] = NyquistLowPass(look, particle); + + /* the particle amplitude is used to include the weighting + * of the window function filter without needing more memory + */ + radWindowFunction::radWindowFunction const winFkt; + + /* start with a factor of one */ + float_X windowFactor = 1.0; + + for(uint32_t d = 0; d < simDim; ++d) + { + windowFactor + *= winFkt(particle_locationNow[d], simBoxSize[d] * cellSize[d]); + } + + /* apply window function factor to amplitude */ + real_amplitude_s[saveParticleAt] *= windowFactor; + + } // END: if a particle needs to be considered + } // END: check if particle is accelerated + } // END: only threads with particles are running + }); + + cupla::__syncthreads(acc); // wait till every thread has loaded its particle data + + + // run over all valid omegas for this thread + for(int o = workerIdx; o < radiation_frequencies::N_omega; o += T_numWorkers) + { + /* storage for amplitude (complex 3D vector) + * it is initialized with zeros ( 0 + i 0 ) + * Attention: This is an accumulator and should + * be in double precision to ameliorate roundoff + * errors! + */ + Amplitude amplitude = Amplitude::zero(); - // mass of macro-particle - float_X const particle_mass = attribute::getMass( - weighting, - par - ); + // compute frequency "omega" using for-loop-index "o" + picongpu::float_64 const omega = freqFkt(o); + // create a form factor object + radFormFactor::radFormFactor const myRadFormFactor{}; + + /* Particle loop: thread runs through loaded particle data + * + * Summation of Jackson radiation formula integrand + * over all electrons for fixed, thread-specific + * frequency + */ + for(int j = 0; j < counter_s; ++j) + { + // check Nyquist-limit for each particle "j" and each frequency "omega" + if(lowpass_s[j].check(omega)) + { /**************************************************** **** Here happens the true physical calculation **** ****************************************************/ - // set up particle using the radiation's own particle class - /*!\todo please add a namespace for Particle class*/ - Particle const particle( - particle_locationNow, - particle_momentumOld, - particle_momentumNow, - particle_mass - ); - - // set up amplitude calculator - using Calc_Amplitude_n_sim_1 = Calc_Amplitude< - Retarded_time_1, - Old_DFT - >; - - // calculate amplitude - Calc_Amplitude_n_sim_1 const amplitude3( - particle, - DELTA_T, - t - ); - - // get charge of single electron ! (weighting=1.0f) - float_X const particle_charge = frame::getCharge(); - - /* compute real amplitude of macro-particle with a charge of - * a single electron - */ - real_amplitude_s[ saveParticleAt ] = - amplitude3.get_vector( look ) * - particle_charge * - picongpu::float_64( DELTA_T ); - - // retarded time stored in shared memory - t_ret_s[ saveParticleAt ] = amplitude3.get_t_ret( look ); + // calulate the form factor's' influences to the real amplitude + vector_64 const weighted_real_amp = real_amplitude_s[j] + * precisionCast(myRadFormFactor(radWeighting_s[j], omega, look)); - lowpass_s[ saveParticleAt ] = NyquistLowPass( - look, - particle - ); - - /* the particle amplitude is used to include the weighting - * of the window function filter without needing more memory + /* complex amplitude increment for j-th particle + * It is local to the loop and can be single precision */ - radWindowFunction::radWindowFunction const winFkt; - - /* start with a factor of one */ - float_X windowFactor = 1.0; - - for( uint32_t d = 0; d < simDim; ++d ) - { - windowFactor *= winFkt( - particle_locationNow[ d ], - simBoxSize[d] * cellSize[ d ] - ); - } + Amplitude amplitude_add(weighted_real_amp, t_ret_s[j] * omega); - /* apply window function factor to amplitude */ - real_amplitude_s[ saveParticleAt ] *= windowFactor; + // add this single amplitude those previously considered + amplitude += amplitude_add; - } // END: if a particle needs to be considered - } // END: check if particle is accelerated - } // END: only threads with particles are running - } - ); + } // END: check Nyquist-limit for each particle "j" and each frequency "omega" - __syncthreads(); // wait till every thread has loaded its particle data + } // END: Particle loop + /* the radiation contribution of the following is added to global memory: + * - valid particles of last super cell + * - from this (one) time step + * - omega_id = theta_idx * radiation_frequencies::N_omega + o + */ + radiation(DataSpace<2>(theta_idx * radiation_frequencies::N_omega + o, jobIdx)) + += amplitude; - - // run over all valid omegas for this thread - for( int o = workerIdx; o < radiation_frequencies::N_omega; o += T_numWorkers ) - { - - /* storage for amplitude (complex 3D vector) - * it is initialized with zeros ( 0 + i 0 ) - */ - Amplitude amplitude = Amplitude::zero(); - - // compute frequency "omega" using for-loop-index "o" - picongpu::float_64 const omega = freqFkt( o ); - - // create a form factor object - radFormFactor::radFormFactor const myRadFormFactor{ }; - - /* Particle loop: thread runs through loaded particle data - * - * Summation of Jackson radiation formula integrand - * over all electrons for fixed, thread-specific - * frequency - */ - for( int j = 0; j < counter_s; ++j ) - { - - // check Nyquist-limit for each particle "j" and each frequency "omega" - if( lowpass_s[ j ].check( omega ) ) - { - - /**************************************************** - **** Here happens the true physical calculation **** - ****************************************************/ - - // calulate the form factor's' influences to the real amplitude - vector_64 const weighted_real_amp = real_amplitude_s[ j ] * - precisionCast< float_64 >( - myRadFormFactor( - radWeighting_s[ j ], - omega, - look - ) - ); - - // complex amplitude for j-th particle - Amplitude amplitude_add( - weighted_real_amp, - t_ret_s[ j ] * omega - ); - - // add this single amplitude those previously considered - amplitude += amplitude_add; - - }// END: check Nyquist-limit for each particle "j" and each frequency "omega" - - }// END: Particle loop - - /* the radiation contribution of the following is added to global memory: - * - valid particles of last super cell - * - from this (one) time step - * - omega_id = theta_idx * radiation_frequencies::N_omega + o - */ - radiation[ theta_idx * radiation_frequencies::N_omega + o] += amplitude; - - } // end frequency loop + } // end frequency loop - // wait till all radiation contributions for this super cell are done - __syncthreads(); + // wait till all radiation contributions for this super cell are done + cupla::__syncthreads(acc); - /* First threads starts loading next frame of the super-cell: - * - * Info: - * The calculation starts with the last SuperCell (must not be full filled) - * all previous SuperCells are full with particles - */ - particlesInFrame = frameSize; - frame = pb.getPreviousFrame( frame ); + /* First threads starts loading next frame of the super-cell: + * + * Info: + * The calculation starts with the last SuperCell (must not be full filled) + * all previous SuperCells are full with particles + */ + particlesInFrame = frameSize; + frame = pb.getPreviousFrame(frame); - } // end while(frame.isValid()) + } // end while(frame.isValid()) - } // end loop over all super cells + } // end loop over all super cells - } // end radiation kernel - }; + } // end radiation kernel + }; -} // namespace radiation + } // namespace radiation -} // namespace plugins + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/VectorTypes.hpp b/include/picongpu/plugins/radiation/VectorTypes.hpp index 255dbf9aa4..70067be846 100644 --- a/include/picongpu/plugins/radiation/VectorTypes.hpp +++ b/include/picongpu/plugins/radiation/VectorTypes.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -26,13 +26,13 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - using vector_X = cuda_vec< picongpu::float3_X, picongpu::float_X >; - using vector_32 = /*__align__(16)*/ cuda_vec< picongpu::float3_32, picongpu::float_32 >; - using vector_64 = /*__align__(32)*/ cuda_vec< picongpu::float3_64, picongpu::float_64 >; -} // namespace radiation -} // namespace plugins + namespace plugins + { + namespace radiation + { + using vector_X = cuda_vec; + using vector_32 = /*__align__(16)*/ cuda_vec; + using vector_64 = /*__align__(32)*/ cuda_vec; + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/amplitude.hpp b/include/picongpu/plugins/radiation/amplitude.hpp index 4e143ba3cc..4a5170fa26 100644 --- a/include/picongpu/plugins/radiation/amplitude.hpp +++ b/include/picongpu/plugins/radiation/amplitude.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, Alexander Debus +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, Alexander Debus * * This file is part of PIConGPU. * @@ -19,144 +19,227 @@ #pragma once +#include "VectorTypes.hpp" + #include #include -#include "VectorTypes.hpp" #include - namespace picongpu { -namespace plugins -{ -namespace radiation -{ - -/** class to store 3 complex numbers for the radiated amplitude - */ -class Amplitude -{ -public: - using complex_64 = pmacc::math::Complex< picongpu::float_64 >; - - /* number of scalar components in Amplitude = 3 (3D) * 2 (complex) = 6 */ - static constexpr uint32_t numComponents = uint32_t(3) * uint32_t(sizeof(complex_64) / sizeof(typename complex_64::type)); - - /** constructor - * - * Arguments: - * - vector_64: real 3D vector - * - float: complex phase */ - DINLINE Amplitude(vector_64 vec, picongpu::float_X phase) - { - picongpu::float_X cosValue; - picongpu::float_X sinValue; - picongpu::math::sincos(phase, sinValue, cosValue); - amp_x=picongpu::math::euler(vec.x(), picongpu::precisionCast(sinValue), picongpu::precisionCast(cosValue) ); - amp_y=picongpu::math::euler(vec.y(), picongpu::precisionCast(sinValue), picongpu::precisionCast(cosValue) ); - amp_z=picongpu::math::euler(vec.z(), picongpu::precisionCast(sinValue), picongpu::precisionCast(cosValue) ); - } - - - /** default constructor - * - * \warning does not initialize values! */ - HDINLINE Amplitude(void) - { - - } - - - /** constructor - * - * Arguments: - * - 6x float: Re(x), Im(x), Re(y), Im(y), Re(z), Im(z) */ - HDINLINE Amplitude(const picongpu::float_64 x_re, const picongpu::float_64 x_im, - const picongpu::float_64 y_re, const picongpu::float_64 y_im, - const picongpu::float_64 z_re, const picongpu::float_64 z_im) - : amp_x(x_re, x_im), amp_y(y_re, y_im), amp_z(z_re, z_im) - { - - } - - - /** returns a zero amplitude vector - * - * used to initialize amplitudes to zero */ - HDINLINE static Amplitude zero(void) - { - Amplitude result; - result.amp_x = complex_64::zero(); - result.amp_y = complex_64::zero(); - result.amp_z = complex_64::zero(); - return result; - } - - /** assign addition */ - HDINLINE Amplitude& operator+=(const Amplitude& other) - { - amp_x += other.amp_x; - amp_y += other.amp_y; - amp_z += other.amp_z; - return *this; - } - - - /** assign difference */ - HDINLINE Amplitude& operator-=(const Amplitude& other) - { - amp_x -= other.amp_x; - amp_y -= other.amp_y; - amp_z -= other.amp_z; - return *this; - } - - - /** calculate radiation from *this amplitude - * - * Returns: \f$\frac{d^2 I}{d \Omega d \omega} = const*Amplitude^2\f$ */ - HDINLINE picongpu::float_64 calc_radiation(void) - { - // const SI factor radiation - const picongpu::float_64 factor = 1.0 / - (16. * util::cube(pmacc::algorithms::math::Pi< picongpu::float_64 >::value) * picongpu::EPS0 * picongpu::SPEED_OF_LIGHT); - - return factor * (picongpu::math::abs2(amp_x) + picongpu::math::abs2(amp_y) + picongpu::math::abs2(amp_z)); - } - - - /** debugging method - * - * Returns: real-x-value */ - HDINLINE picongpu::float_64 debug(void) - { - return amp_x.get_real(); - } - - -private: - complex_64 amp_x; // complex amplitude x-component - complex_64 amp_y; // complex amplitude y-component - complex_64 amp_z; // complex amplitude z-component - -}; -} // namespace radiation -} // namespace plugins + namespace plugins + { + namespace radiation + { + /** class to store 3 complex numbers for the radiated amplitude + */ + template + class Amplitude + { + public: + /* For the intermediate amplitude values we may use single precision, + * for the final accumulation we will have to use double precision. + */ + using complex_T = pmacc::math::Complex; + /* number of scalar components in Amplitude = 3 (3D) * 2 (complex) = 6 */ + static constexpr uint32_t numComponents + = uint32_t(3) * uint32_t(sizeof(complex_T) / sizeof(typename complex_T::type)); + + /** constructor + * + * Arguments: + * - vector_64: real 3D vector + * - float: complex phase */ + DINLINE Amplitude(vector_64 vec, picongpu::float_X phase) + { + picongpu::float_X cosValue; + picongpu::float_X sinValue; + pmacc::math::sincos(phase, sinValue, cosValue); + amp_x = pmacc::math::euler( + precisionCast(vec.x()), + precisionCast(sinValue), + precisionCast(cosValue)); + amp_y = pmacc::math::euler( + precisionCast(vec.y()), + precisionCast(sinValue), + precisionCast(cosValue)); + amp_z = pmacc::math::euler( + precisionCast(vec.z()), + precisionCast(sinValue), + precisionCast(cosValue)); + } + + /** default constructor + * + * \warning does not initialize values! */ + HDINLINE Amplitude(void) + { + } + + + /** constructor + * + * Arguments: + * - 6x float: Re(x), Im(x), Re(y), Im(y), Re(z), Im(z) */ + HDINLINE Amplitude( + const picongpu::float_64 x_re, + const picongpu::float_64 x_im, + const picongpu::float_64 y_re, + const picongpu::float_64 y_im, + const picongpu::float_64 z_re, + const picongpu::float_64 z_im) + : amp_x(x_re, x_im) + , amp_y(y_re, y_im) + , amp_z(z_re, z_im) + { + } + + /** constructor with member initialization + * + * @param x pmacc::math::complex x component of the amplitude vector. + * @param y pmacc::math::complex y component of the amplitude vector. + * @param z pmacc::math::complex z component of the amplitude vector. + */ + HDINLINE Amplitude(const complex_T& x, const complex_T& y, const complex_T& z) + : amp_x(x) + , amp_y(y) + , amp_z(z) + { + } + + /** returns a zero amplitude vector + * + * used to initialize amplitudes to zero */ + HDINLINE static Amplitude zero(void) + { + Amplitude result; + result.amp_x = complex_T::zero(); + result.amp_y = complex_T::zero(); + result.amp_z = complex_T::zero(); + return result; + } + + /** assign addition */ + HDINLINE Amplitude& operator+=(const Amplitude& other) + { + amp_x += other.amp_x; + amp_y += other.amp_y; + amp_z += other.amp_z; + return *this; + } + + + /** assign difference */ + HDINLINE Amplitude& operator-=(const Amplitude& other) + { + amp_x -= other.amp_x; + amp_y -= other.amp_y; + amp_z -= other.amp_z; + return *this; + } + + + /** calculate radiation from *this amplitude + * + * Returns: \f$\frac{d^2 I}{d \Omega d \omega} = const*Amplitude^2\f$ */ + HDINLINE picongpu::float_64 calc_radiation(void) + { + // const SI factor radiation + const picongpu::float_64 factor = 1.0 + / (16. * util::cube(pmacc::math::Pi::value) * picongpu::EPS0 + * picongpu::SPEED_OF_LIGHT); + + return factor * (pmacc::math::abs2(amp_x) + pmacc::math::abs2(amp_y) + pmacc::math::abs2(amp_z)); + } + + + /** debugging method + * + * Returns: real-x-value */ + HDINLINE picongpu::float_64 debug(void) + { + return amp_x.get_real(); + } + + /** Getters for the components + */ + HDINLINE complex_T getXcomponent() const + { + return this->amp_x; + } + HDINLINE complex_T getYcomponent() const + { + return this->amp_y; + } + HDINLINE complex_T getZcomponent() const + { + return this->amp_z; + } + + private: + complex_T amp_x; // complex amplitude x-component + complex_T amp_y; // complex amplitude y-component + complex_T amp_z; // complex amplitude z-component + }; + } // namespace radiation + } // namespace plugins } // namespace picongpu namespace pmacc { -namespace mpi -{ + namespace mpi + { + /** implementation of MPI transaction on Amplitude class */ + template<> + HINLINE MPI_StructAsArray getMPI_StructAsArray>() + { + MPI_StructAsArray result + = getMPI_StructAsArray::complex_T::type>(); + result.sizeMultiplier *= picongpu::plugins::radiation::Amplitude<>::numComponents; + return result; + }; + + } // namespace mpi +} // namespace pmacc - /** implementation of MPI transaction on Amplitude class */ - template<> - HINLINE MPI_StructAsArray getMPI_StructAsArray< picongpu::plugins::radiation::Amplitude >() - { - MPI_StructAsArray result = getMPI_StructAsArray< picongpu::plugins::radiation::Amplitude::complex_64::type > (); - result.sizeMultiplier *= picongpu::plugins::radiation::Amplitude::numComponents; - return result; - }; -} // namespace mpi +namespace pmacc +{ + namespace algorithms + { + namespace precisionCast + { + /* We want to be able to cast a low + * precision amplitude to a high-precision one. + * The functors create temporary Amplitude objects and can + * be detrimental to performance. + */ + template + struct TypeCast> + { + using result = const picongpu::plugins::radiation::Amplitude&; + + HDINLINE result operator()(result amplitude) const + { + return amplitude; + } + }; + + template + struct TypeCast> + { + using result = picongpu::plugins::radiation::Amplitude; + using ParamType = picongpu::plugins::radiation::Amplitude; + HDINLINE result operator()(const ParamType& amplitude) const + { + result Result( + precisionCast(amplitude.getXcomponent()), + precisionCast(amplitude.getYcomponent()), + precisionCast(amplitude.getZcomponent())); + return Result; + } + }; + + } // namespace precisionCast + } // namespace algorithms } // namespace pmacc diff --git a/include/picongpu/plugins/radiation/calc_amplitude.hpp b/include/picongpu/plugins/radiation/calc_amplitude.hpp index afc39203a5..cc9d43e1f6 100644 --- a/include/picongpu/plugins/radiation/calc_amplitude.hpp +++ b/include/picongpu/plugins/radiation/calc_amplitude.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -26,155 +26,158 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - -//protected: -// error class for wrong time access - -class Error_Accessing_Time -{ -public: - - Error_Accessing_Time(void) - { - } -}; - - -struct One_minus_beta_times_n -{ - /// Class to calculate \f$1-\beta \times \vec n\f$ - /// using the best suiting method depending on energy - /// to achieve the best numerical results - /// it will be used as base class for amplitude calculations - - // Taylor just includes a method, When includes just enum - - HDINLINE picongpu::float_32 operator()(const vector_64& n, const Particle & particle) const + namespace plugins { - // 1/gamma^2: - - const picongpu::float_64 gamma_inv_square(particle.get_gamma_inv_square ()); - - //picongpu::float_64 value; // storage for 1-\beta \times \vec n - - // if energy is high enough to cause numerical errors ( equals if 1/gamma^2 is close enough to zero) - // chose a Taylor approximation to to better calculate 1-\beta \times \vec n (which is close to 1-1) - // is energy is low, then the approximation will cause a larger error, therefor calculate - // 1-\beta \times \vec n directly - // with 0.18 the relative error will be below 0.001% for a Taylor series of 1-sqrt(1-x) of 5th order - if (gamma_inv_square < picongpu::GAMMA_INV_SQUARE_RAD_THRESH) + namespace radiation { - const picongpu::float_64 cos_theta(particle.get_cos_theta (n)); // cosine between looking vector and momentum of particle - const picongpu::float_64 taylor_approx(cos_theta * Taylor()(gamma_inv_square) + (1.0 - cos_theta)); - return (taylor_approx); - } - else - { - const vector_64 beta(particle.get_beta ()); // calculate v/c=beta - return (1.0 - beta * n); - } - - } -}; - -struct Retarded_time_1 -{ - // interface for combined 'Amplitude_Calc' classes - // contains more parameters than needed to have the - // same interface as 'Retarded_time_2' - - HDINLINE picongpu::float_64 operator()(const picongpu::float_64 t, - const vector_64& n, const Particle & particle) const - { - const vector_64 r(particle.get_location ()); // location - return (picongpu::float_64) (t - (n * r) / (picongpu::SPEED_OF_LIGHT)); - } - -}; - -template // divisor to the power of 'Exponent' -struct Old_Method -{ - /// classical method to calculate the real vector part of the radiation's amplitude - /// this base class includes both possible interpretations: - /// with Exponent=Cube the integration over t_ret will be assumed (old FFT) - /// with Exponent=Square the integration over t_sim will be assumed (old DFT) - - HDINLINE vector_64 operator()(const vector_64& n, const Particle& particle, const picongpu::float_64 delta_t) const - { - const vector_64 beta(particle.get_beta ()); // beta = v/c - const vector_64 beta_dot((beta - particle.get_beta < When::now + 1 > ()) / delta_t); // numeric differentiation (backward difference) - const Exponent exponent; // instance of the Exponent class // ???is a static class and no instance possible??? - //const One_minus_beta_times_n one_minus_beta_times_n; - const picongpu::float_64 factor(exponent(1.0 / (One_minus_beta_times_n()(n, particle)))); - // factor=1/(1-beta*n)^g g=2 for DFT and g=3 for FFT - return (n % ((n - beta) % beta_dot)) * factor; - } -}; - -// typedef of all possible forms of Old_Method -//typedef Old_Method > Old_FFT; -typedef Old_Method > Old_DFT; - - - - -// ------- Calculate Amplitude class ------------- // - -template -class Calc_Amplitude -{ - /// final class for amplitude calculations - /// derived from a class to calculate the retarded time (TimeCalc; possibilities: - /// Retarded_Time_1 and Retarded_Time_2) and from a class to calculate - /// the real vector part of the amplitude (VecCalc; possibilities: - /// Old_FFT, Old_DFT, Partial_Integral_Method_1, Partial_Integral_Method_2) -public: - /// constructor - // takes a lot of parameters to have a general interface - // not all parameters are needed for all possible combinations - // of base classes - - HDINLINE Calc_Amplitude(const Particle& particle, - const picongpu::float_64 delta_t, - const picongpu::float_64 t_sim) - : m_particle(particle), m_delta_t(delta_t), m_t_sim(t_sim) - { - } - - // get real vector part of amplitude - - HDINLINE vector_64 get_vector(const vector_64& n) const - { - const vector_64 look_direction(n.unit_vec()); // make sure look_direction is a unit vector - VecCalc vecC; - return vecC(look_direction, m_particle, m_delta_t); - } - - // get retarded time - - HDINLINE picongpu::float_64 get_t_ret(const vector_64 look_direction) const - { - TimeCalc timeC; - return timeC(m_t_sim, look_direction, m_particle); - - // const vector_64 r = particle.get_location (); // location - // return (picongpu::float_64) (t - (n * r) / (picongpu::SPEED_OF_LIGHT)); - } - -private: - // data: - const Particle& m_particle; // one particle - const picongpu::float_64 m_delta_t; // length of one time step in simulation - const picongpu::float_64 m_t_sim; // simulation time (for methods not using index*delta_t ) - - -}; - -} // namespace radiation -} // namespace plugins + // protected: + // error class for wrong time access + + class Error_Accessing_Time + { + public: + Error_Accessing_Time(void) + { + } + }; + + + struct One_minus_beta_times_n + { + /// Class to calculate \f$1-\beta \times \vec n\f$ + /// using the best suiting method depending on energy + /// to achieve the best numerical results + /// it will be used as base class for amplitude calculations + + // Taylor just includes a method, When includes just enum + + HDINLINE picongpu::float_32 operator()(const vector_64& n, const Particle& particle) const + { + // 1/gamma^2: + + const picongpu::float_64 gamma_inv_square(particle.get_gamma_inv_square()); + + // picongpu::float_64 value; // storage for 1-\beta \times \vec n + + // if energy is high enough to cause numerical errors ( equals if 1/gamma^2 is close enough to + // zero) chose a Taylor approximation to to better calculate 1-\beta \times \vec n (which is close + // to 1-1) is energy is low, then the approximation will cause a larger error, therefor calculate + // 1-\beta \times \vec n directly + // with 0.18 the relative error will be below 0.001% for a Taylor series of 1-sqrt(1-x) of 5th + // order + if(gamma_inv_square < picongpu::GAMMA_INV_SQUARE_RAD_THRESH) + { + const picongpu::float_64 cos_theta(particle.get_cos_theta( + n)); // cosine between looking vector and momentum of particle + const picongpu::float_64 taylor_approx( + cos_theta * Taylor()(gamma_inv_square) + (1.0 - cos_theta)); + return (taylor_approx); + } + else + { + const vector_64 beta(particle.get_beta()); // calculate v/c=beta + return (1.0 - beta * n); + } + } + }; + + struct Retarded_time_1 + { + // interface for combined 'Amplitude_Calc' classes + // contains more parameters than needed to have the + // same interface as 'Retarded_time_2' + + HDINLINE picongpu::float_64 operator()( + const picongpu::float_64 t, + const vector_64& n, + const Particle& particle) const + { + const vector_64 r(particle.get_location()); // location + return (picongpu::float_64)(t - (n * r) / (picongpu::SPEED_OF_LIGHT)); + } + }; + + template // divisor to the power of 'Exponent' + struct Old_Method + { + /// classical method to calculate the real vector part of the radiation's amplitude + /// this base class includes both possible interpretations: + /// with Exponent=Cube the integration over t_ret will be assumed (old FFT) + /// with Exponent=Square the integration over t_sim will be assumed (old DFT) + + HDINLINE vector_64 + operator()(const vector_64& n, const Particle& particle, const picongpu::float_64 delta_t) const + { + const vector_64 beta(particle.get_beta()); // beta = v/c + const vector_64 beta_dot( + (beta - particle.get_beta()) + / delta_t); // numeric differentiation (backward difference) + const Exponent exponent; // instance of the Exponent class // ???is a static class and no instance + // possible??? const One_minus_beta_times_n one_minus_beta_times_n; + const picongpu::float_64 factor(exponent(1.0 / (One_minus_beta_times_n()(n, particle)))); + // factor=1/(1-beta*n)^g g=2 for DFT and g=3 for FFT + return (n % ((n - beta) % beta_dot)) * factor; + } + }; + + // typedef of all possible forms of Old_Method + // typedef Old_Method > Old_FFT; + typedef Old_Method> Old_DFT; + + + // ------- Calculate Amplitude class ------------- // + + template + class Calc_Amplitude + { + /// final class for amplitude calculations + /// derived from a class to calculate the retarded time (TimeCalc; possibilities: + /// Retarded_Time_1 and Retarded_Time_2) and from a class to calculate + /// the real vector part of the amplitude (VecCalc; possibilities: + /// Old_FFT, Old_DFT, Partial_Integral_Method_1, Partial_Integral_Method_2) + public: + /// constructor + // takes a lot of parameters to have a general interface + // not all parameters are needed for all possible combinations + // of base classes + + HDINLINE Calc_Amplitude( + const Particle& particle, + const picongpu::float_64 delta_t, + const picongpu::float_64 t_sim) + : m_particle(particle) + , m_delta_t(delta_t) + , m_t_sim(t_sim) + { + } + + // get real vector part of amplitude + + HDINLINE vector_64 get_vector(const vector_64& n) const + { + const vector_64 look_direction(n.unit_vec()); // make sure look_direction is a unit vector + VecCalc vecC; + return vecC(look_direction, m_particle, m_delta_t); + } + + // get retarded time + + HDINLINE picongpu::float_64 get_t_ret(const vector_64 look_direction) const + { + TimeCalc timeC; + return timeC(m_t_sim, look_direction, m_particle); + + // const vector_64 r = particle.get_location (); // location + // return (picongpu::float_64) (t - (n * r) / (picongpu::SPEED_OF_LIGHT)); + } + + private: + // data: + const Particle& m_particle; // one particle + const picongpu::float_64 m_delta_t; // length of one time step in simulation + const picongpu::float_64 m_t_sim; // simulation time (for methods not using index*delta_t ) + }; + + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/check_consistency.hpp b/include/picongpu/plugins/radiation/check_consistency.hpp index 635db595f1..d3cc7f03ae 100644 --- a/include/picongpu/plugins/radiation/check_consistency.hpp +++ b/include/picongpu/plugins/radiation/check_consistency.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch +/* Copyright 2013-2021 Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -25,22 +25,21 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - -HINLINE void check_consistency(void) -{ - using namespace parameters; - std::cout << " checking efficiency of radiation code: " ; - if(radiation_frequencies::N_omega%radiation_frequencies::blocksize_omega == 0) - std::cout << "OK" << std::endl; - else - std::cout << "better use power of two for N_omega" << std::endl; - // \@todo is there a way to do this with compile time asserts??? -} + namespace plugins + { + namespace radiation + { + HINLINE void check_consistency(void) + { + using namespace parameters; + std::cout << " checking efficiency of radiation code: "; + if(radiation_frequencies::N_omega % radiation_frequencies::blocksize_omega == 0) + std::cout << "OK" << std::endl; + else + std::cout << "better use power of two for N_omega" << std::endl; + // \@todo is there a way to do this with compile time asserts??? + } -} // namespace radiation -} // namespace plugins + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/debug/PIConGPUVerboseLogRadiation.hpp b/include/picongpu/plugins/radiation/debug/PIConGPUVerboseLogRadiation.hpp index c34fb0440e..c10fc8ff99 100644 --- a/include/picongpu/plugins/radiation/debug/PIConGPUVerboseLogRadiation.hpp +++ b/include/picongpu/plugins/radiation/debug/PIConGPUVerboseLogRadiation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch +/* Copyright 2013-2021 Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once #include @@ -26,26 +25,21 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - -/*create verbose class*/ -DEFINE_VERBOSE_CLASS(PIConGPUVerboseRadiation) -( - /* define log levels for later use - * e.g. log("TEXT");*/ - DEFINE_LOGLVL(0,NOTHING); - DEFINE_LOGLVL(1,PHYSICS); - DEFINE_LOGLVL(2,SIMULATION_STATE); - DEFINE_LOGLVL(4,MEMORY); - DEFINE_LOGLVL(8,CRITICAL); -) -/*set default verbose levels (integer number)*/ -(NOTHING::lvl|PIC_VERBOSE_RADIATION); - -} // namespace radiation -} // namespace plugins + namespace plugins + { + namespace radiation + { + /*create verbose class*/ + DEFINE_VERBOSE_CLASS(PIConGPUVerboseRadiation) + ( + /* define log levels for later use + * e.g. log("TEXT");*/ + DEFINE_LOGLVL(0, NOTHING); DEFINE_LOGLVL(1, PHYSICS); DEFINE_LOGLVL(2, SIMULATION_STATE); + DEFINE_LOGLVL(4, MEMORY); + DEFINE_LOGLVL(8, CRITICAL);) + /*set default verbose levels (integer number)*/ + (NOTHING::lvl | PIC_VERBOSE_RADIATION); + + } // namespace radiation + } // namespace plugins } // namespace picongpu - diff --git a/include/picongpu/plugins/radiation/frequencies/radiation_lin_freq.hpp b/include/picongpu/plugins/radiation/frequencies/radiation_lin_freq.hpp index 3b540c8c08..cd30b3270d 100644 --- a/include/picongpu/plugins/radiation/frequencies/radiation_lin_freq.hpp +++ b/include/picongpu/plugins/radiation/frequencies/radiation_lin_freq.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -24,50 +24,50 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - -namespace linear_frequencies -{ - - - class FreqFunctor + namespace plugins { - public: - FreqFunctor(void) - { } + namespace radiation + { + namespace linear_frequencies + { + class FreqFunctor + { + public: + FreqFunctor(void) + { + } - HDINLINE float_X operator()(const int ID) - { - return omega_min + float_X(ID) * delta_omega; - } + HDINLINE float_X operator()(const int ID) + { + return omega_min + float_X(ID) * delta_omega; + } - HINLINE float_X get(const int ID) - { - return operator()(ID); - } - }; + HINLINE float_X get(const int ID) + { + return operator()(ID); + } + }; - class InitFreqFunctor - { - public: - InitFreqFunctor(void) - { } + class InitFreqFunctor + { + public: + InitFreqFunctor(void) + { + } - HINLINE void Init(const std::string path ) - { } + HINLINE void Init(const std::string path) + { + } - HINLINE FreqFunctor getFunctor(void) - { - return FreqFunctor(); - } - }; + HINLINE FreqFunctor getFunctor(void) + { + return FreqFunctor(); + } + }; -} // namespace linear_frequencies -} // namespace radiation -} // namespace plugins + } // namespace linear_frequencies + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/frequencies/radiation_list_freq.hpp b/include/picongpu/plugins/radiation/frequencies/radiation_list_freq.hpp index 65608a9aa9..aecf48003c 100644 --- a/include/picongpu/plugins/radiation/frequencies/radiation_list_freq.hpp +++ b/include/picongpu/plugins/radiation/frequencies/radiation_list_freq.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, Axel Huebl +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, Axel Huebl * * This file is part of PIConGPU. * @@ -27,108 +27,106 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ -namespace frequencies_from_list -{ - - - class FreqFunctor + namespace plugins { - public: - - typedef GridBuffer::DataBoxType DBoxType; - - FreqFunctor(void) - { } - - template< typename T > - FreqFunctor(T frequencies_handed) - { - this->frequencies_dev = frequencies_handed->getDeviceBuffer().getDataBox(); - this->frequencies_host = frequencies_handed->getHostBuffer().getDataBox(); - } - - DINLINE float_X operator()(const unsigned int ID) - { - return (ID < radiation_frequencies::N_omega) ? frequencies_dev[ID] : 0.0 ; - } - - HINLINE float_X get(const unsigned int ID) - { - return (ID < radiation_frequencies::N_omega) ? frequencies_host[ID] : 0.0 ; - } - - private: - DBoxType frequencies_dev; - DBoxType frequencies_host; - - }; - - - - class InitFreqFunctor - { - public: - InitFreqFunctor(void) - { } - - ~InitFreqFunctor(void) - { - __delete(frequencyBuffer); - } - - typedef GridBuffer::DataBoxType DBoxType; - - HINLINE void Init(const std::string path ) - { - - frequencyBuffer = new GridBuffer(DataSpace (N_omega)); - - - DBoxType frequencyDB = frequencyBuffer->getHostBuffer().getDataBox(); - - std::ifstream freqListFile(path.c_str()); - unsigned int i; - - printf("freq: %s\n", path.c_str()); - - if(!freqListFile) - { - throw std::runtime_error(std::string("The radiation-frequency-file ") + path + std::string(" could not be found.\n")); - } - - - for(i=0; i> frequencyDB[i]; - // verbose output of loaded frequencies if verbose level PHYSICS is set: - log("freq: %1% \t %2%") % i % frequencyDB[i]; - frequencyDB[i] *= UNIT_TIME; - } - - if(i != N_omega) - { - throw std::runtime_error(std::string("The number of frequencies in the list and the number of frequencies in the parameters differ.\n")); - } - - frequencyBuffer->hostToDevice(); - - } - - FreqFunctor getFunctor(void) - { - return FreqFunctor(frequencyBuffer); - } - - private: - GridBuffer* frequencyBuffer; - }; - - -} // namespace frequencies_from_list -} // namespace radiation -} // namespace plugins + namespace radiation + { + namespace frequencies_from_list + { + class FreqFunctor + { + public: + typedef GridBuffer::DataBoxType DBoxType; + + FreqFunctor(void) + { + } + + template + FreqFunctor(T frequencies_handed) + { + this->frequencies_dev = frequencies_handed->getDeviceBuffer().getDataBox(); + this->frequencies_host = frequencies_handed->getHostBuffer().getDataBox(); + } + + DINLINE float_X operator()(const unsigned int ID) + { + return (ID < radiation_frequencies::N_omega) ? frequencies_dev[ID] : 0.0; + } + + HINLINE float_X get(const unsigned int ID) + { + return (ID < radiation_frequencies::N_omega) ? frequencies_host[ID] : 0.0; + } + + private: + DBoxType frequencies_dev; + DBoxType frequencies_host; + }; + + + class InitFreqFunctor + { + public: + InitFreqFunctor(void) + { + } + + ~InitFreqFunctor(void) + { + __delete(frequencyBuffer); + } + + typedef GridBuffer::DataBoxType DBoxType; + + HINLINE void Init(const std::string path) + { + frequencyBuffer = new GridBuffer(DataSpace(N_omega)); + + + DBoxType frequencyDB = frequencyBuffer->getHostBuffer().getDataBox(); + + std::ifstream freqListFile(path.c_str()); + unsigned int i; + + printf("freq: %s\n", path.c_str()); + + if(!freqListFile) + { + throw std::runtime_error( + std::string("The radiation-frequency-file ") + path + + std::string(" could not be found.\n")); + } + + + for(i = 0; i < N_omega && !freqListFile.eof(); ++i) + { + freqListFile >> frequencyDB[i]; + // verbose output of loaded frequencies if verbose level PHYSICS is set: + log("freq: %1% \t %2%") % i % frequencyDB[i]; + frequencyDB[i] *= UNIT_TIME; + } + + if(i != N_omega) + { + throw std::runtime_error(std::string("The number of frequencies in the list and the " + "number of frequencies in the parameters differ.\n")); + } + + frequencyBuffer->hostToDevice(); + } + + FreqFunctor getFunctor(void) + { + return FreqFunctor(frequencyBuffer); + } + + private: + GridBuffer* frequencyBuffer; + }; + + + } // namespace frequencies_from_list + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/frequencies/radiation_log_freq.hpp b/include/picongpu/plugins/radiation/frequencies/radiation_log_freq.hpp index f3ddb5432b..fc9787f0bd 100644 --- a/include/picongpu/plugins/radiation/frequencies/radiation_log_freq.hpp +++ b/include/picongpu/plugins/radiation/frequencies/radiation_log_freq.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -24,57 +24,57 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ -namespace log_frequencies -{ - - - class FreqFunctor - { - public: - FreqFunctor(void) - { - omega_log_min = math::log(omega_min); - delta_omega_log = (math::log(omega_max) - omega_log_min) / float_X(N_omega - 1); - } - - HDINLINE float_X operator()(const int ID) - { - return math::exp(omega_log_min + (float_X(ID)) * delta_omega_log) ; - } - - HINLINE float_X get(const int ID) - { - return operator()(ID); - } - - private: - float_X omega_log_min; - float_X delta_omega_log; - }; - - - class InitFreqFunctor + namespace plugins { - public: - InitFreqFunctor(void) - { } - - HINLINE void Init(const std::string path ) - { } - - - HINLINE FreqFunctor getFunctor(void) - { - return FreqFunctor(); - } - }; - - -} // namespace log_frequencies -} // namespace radiation -} // namespace plugins + namespace radiation + { + namespace log_frequencies + { + class FreqFunctor + { + public: + FreqFunctor(void) + { + omega_log_min = math::log(omega_min); + delta_omega_log = (math::log(omega_max) - omega_log_min) / float_X(N_omega - 1); + } + + HDINLINE float_X operator()(const int ID) + { + return math::exp(omega_log_min + (float_X(ID)) * delta_omega_log); + } + + HINLINE float_X get(const int ID) + { + return operator()(ID); + } + + private: + float_X omega_log_min; + float_X delta_omega_log; + }; + + + class InitFreqFunctor + { + public: + InitFreqFunctor(void) + { + } + + HINLINE void Init(const std::string path) + { + } + + + HINLINE FreqFunctor getFunctor(void) + { + return FreqFunctor(); + } + }; + + + } // namespace log_frequencies + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/nyquist_low_pass.hpp b/include/picongpu/plugins/radiation/nyquist_low_pass.hpp index 097e5c253c..2c69a9524d 100644 --- a/include/picongpu/plugins/radiation/nyquist_low_pass.hpp +++ b/include/picongpu/plugins/radiation/nyquist_low_pass.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -26,45 +26,43 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - -class NyquistLowPass : public One_minus_beta_times_n -{ - -public: - /** - * calculates \f$omega_{Nyquist}\f$ for particle in a direction \f$n\f$ - * \f$omega_{Nyquist} = (\pi - \epsilon )/(\delta t * (1 - \vec(\beta) * \vec(n)))\f$ - * so that all Amplitudes for higher frequencies can be ignored - **/ - HDINLINE NyquistLowPass(const vector_64& n, const Particle& particle) - : omegaNyquist((PI - 0.01)/ - (DELTA_T * - One_minus_beta_times_n()(n, particle))) - { } + namespace plugins + { + namespace radiation + { + class NyquistLowPass : public One_minus_beta_times_n + { + public: + /** + * calculates \f$omega_{Nyquist}\f$ for particle in a direction \f$n\f$ + * \f$omega_{Nyquist} = (\pi - \epsilon )/(\delta t * (1 - \vec(\beta) * \vec(n)))\f$ + * so that all Amplitudes for higher frequencies can be ignored + **/ + HDINLINE NyquistLowPass(const vector_64& n, const Particle& particle) + : omegaNyquist((PI - 0.01) / (DELTA_T * One_minus_beta_times_n()(n, particle))) + { + } - /** - * default constructor - needed for allocating shared memory on GPU (Radiation.hpp kernel) - **/ - HDINLINE NyquistLowPass(void) - { } + /** + * default constructor - needed for allocating shared memory on GPU (Radiation.hpp kernel) + **/ + HDINLINE NyquistLowPass(void) + { + } - /** - * checks if frequency omega is below Nyquist frequency - **/ - HDINLINE bool check(const float_32 omega) - { - return omega < omegaNyquist * radiationNyquist::NyquistFactor; - } + /** + * checks if frequency omega is below Nyquist frequency + **/ + HDINLINE bool check(const float_32 omega) + { + return omega < omegaNyquist * radiationNyquist::NyquistFactor; + } -private: - float_32 omegaNyquist; // Nyquist frequency for a particle (at a certain time step) for one direction -}; + private: + float_32 omegaNyquist; // Nyquist frequency for a particle (at a certain time step) for one direction + }; -} // namespace radiation -} // namespace plugins + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/particle.hpp b/include/picongpu/plugins/radiation/particle.hpp index c7262f797d..5c623a87ee 100644 --- a/include/picongpu/plugins/radiation/particle.hpp +++ b/include/picongpu/plugins/radiation/particle.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -28,151 +28,160 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - -class When -{ - // a enum to describe all needed times -public: - - enum - { - first = 0u, now = 1u, old = 2u, older = 3u - }; -}; - -class Particle : protected Taylor // Taylor includes just some methodes (no real derived class) -{ -public: - ////////////////////////////////////////////////////////////////// - // data: - // the first time (in above order) to be stored - - enum - { - location_begin = When::now, momentum_begin = When::now, beta_begin = When::first - }; - const vector_X momentum_now; - const vector_X momentum_old; - const vector_X location_now; - const picongpu::float_X mass; - -public: - ////////////////////////////////////////////////////////////////// - // constructors: - - HDINLINE Particle(const vector_X& locationNow_set, const vector_X& momentumOld_set, const vector_X& momentumNow_set, const picongpu::float_X mass_set) - : location_now(locationNow_set), momentum_old(momentumOld_set), momentum_now(momentumNow_set), mass(mass_set) - { - - } - - - ////////////////////////////////////////////////////////////////// - // getters: - - template - HDINLINE vector_64 get_location(void) const; - // get location at time when - - template - HDINLINE vector_64 get_momentum(void) const; - // get momentum at time when - - template - HDINLINE vector_64 get_beta(void) const - { - return calc_beta(get_momentum ()); - } // get beta at time when except: - // first --> is specialized below - - template - HDINLINE picongpu::float_64 get_gamma(void) const - { - return calc_gamma(get_momentum ()); - } // get gamma at time when - - template - HDINLINE picongpu::float_64 get_gamma_inv_square(void) const + namespace plugins { - return calc_gamma_inv_square(get_momentum ()); - } // get 1/gamma^2 - - template< unsigned int when> - HDINLINE picongpu::float_64 get_cos_theta(const vector_64& n) const - { - // get cos(theta) at time when - const vector_64 beta = get_beta (); - return calc_cos_theta(n, beta); - } - - -private: - ////////////////////////////////////////////////////////////////// - // private methods: - - HDINLINE vector_64 calc_beta(const vector_X& momentum) const - { - // returns beta=v/c - const picongpu::float_32 gamma1 = calc_gamma(momentum); - return momentum * (1.0 / (mass * picongpu::SPEED_OF_LIGHT * gamma1)); - } - - HDINLINE picongpu::float_64 calc_gamma(const vector_X& momentum) const - { - // return gamma = E/(mc^2) - const picongpu::float_32 x = util::square (momentum * (1.0 / (mass * picongpu::SPEED_OF_LIGHT))); - return picongpu::math::sqrt(1.0 + x); - - } - - HDINLINE picongpu::float_64 calc_gamma_inv_square(const vector_X& momentum) const - { - // returns 1/gamma^2 = m^2*c^2/(m^2*c^2 + p^2) - const picongpu::float_32 Emass = mass * picongpu::SPEED_OF_LIGHT; - return Emass / (Emass + (util::square (momentum)) / Emass); - } - - HDINLINE picongpu::float_64 calc_cos_theta(const vector_64& n, const vector_64& beta) const - { - // return cos of angle between looking and flight direction - return (n * beta) / (std::sqrt(beta * beta)); - } - - - // setters: - - HDINLINE picongpu::float_64 summand(void) const - { - // return \vec n independend summand (next value to add to \vec n independend sum) - const picongpu::float_64 x = get_gamma_inv_square (); - return Taylor()(x); - } - -}; // end of Particle definition - - -template<> -HDINLINE vector_64 Particle::get_location(void) const -{ - return location_now; -} // get location at time when - -template<> -HDINLINE vector_64 Particle::get_momentum(void) const -{ - return momentum_now; -} // get momentum at time when - -template<> -HDINLINE vector_64 Particle::get_momentum(void) const -{ - return momentum_old; -} // get momentum at time when - -} // namespace radiation -} // namespace plugins + namespace radiation + { + class When + { + // a enum to describe all needed times + public: + enum + { + first = 0u, + now = 1u, + old = 2u, + older = 3u + }; + }; + + class Particle : protected Taylor // Taylor includes just some methodes (no real derived class) + { + public: + ////////////////////////////////////////////////////////////////// + // data: + // the first time (in above order) to be stored + + enum + { + location_begin = When::now, + momentum_begin = When::now, + beta_begin = When::first + }; + const vector_X momentum_now; + const vector_X momentum_old; + const vector_X location_now; + const picongpu::float_X mass; + + public: + ////////////////////////////////////////////////////////////////// + // constructors: + + HDINLINE Particle( + const vector_X& locationNow_set, + const vector_X& momentumOld_set, + const vector_X& momentumNow_set, + const picongpu::float_X mass_set) + : location_now(locationNow_set) + , momentum_old(momentumOld_set) + , momentum_now(momentumNow_set) + , mass(mass_set) + { + } + + + ////////////////////////////////////////////////////////////////// + // getters: + + template + HDINLINE vector_64 get_location(void) const; + // get location at time when + + template + HDINLINE vector_64 get_momentum(void) const; + // get momentum at time when + + template + HDINLINE vector_64 get_beta(void) const + { + return calc_beta(get_momentum()); + } // get beta at time when except: + // first --> is specialized below + + template + HDINLINE picongpu::float_64 get_gamma(void) const + { + return calc_gamma(get_momentum()); + } // get gamma at time when + + template + HDINLINE picongpu::float_64 get_gamma_inv_square(void) const + { + return calc_gamma_inv_square(get_momentum()); + } // get 1/gamma^2 + + template + HDINLINE picongpu::float_64 get_cos_theta(const vector_64& n) const + { + // get cos(theta) at time when + const vector_64 beta = get_beta(); + return calc_cos_theta(n, beta); + } + + + private: + ////////////////////////////////////////////////////////////////// + // private methods: + + HDINLINE vector_64 calc_beta(const vector_X& momentum) const + { + // returns beta=v/c + const picongpu::float_32 gamma1 = calc_gamma(momentum); + return momentum * (1.0 / (mass * picongpu::SPEED_OF_LIGHT * gamma1)); + } + + HDINLINE picongpu::float_64 calc_gamma(const vector_X& momentum) const + { + // return gamma = E/(mc^2) + const picongpu::float_32 x = util::square( + momentum * (1.0 / (mass * picongpu::SPEED_OF_LIGHT))); + return picongpu::math::sqrt(1.0 + x); + } + + HDINLINE picongpu::float_64 calc_gamma_inv_square(const vector_X& momentum) const + { + // returns 1/gamma^2 = m^2*c^2/(m^2*c^2 + p^2) + const picongpu::float_32 Emass = mass * picongpu::SPEED_OF_LIGHT; + return Emass / (Emass + (util::square(momentum)) / Emass); + } + + HDINLINE picongpu::float_64 calc_cos_theta(const vector_64& n, const vector_64& beta) const + { + // return cos of angle between looking and flight direction + return (n * beta) / (std::sqrt(beta * beta)); + } + + + // setters: + + HDINLINE picongpu::float_64 summand(void) const + { + // return \vec n independend summand (next value to add to \vec n independend sum) + const picongpu::float_64 x = get_gamma_inv_square(); + return Taylor()(x); + } + + }; // end of Particle definition + + + template<> + HDINLINE vector_64 Particle::get_location(void) const + { + return location_now; + } // get location at time when + + template<> + HDINLINE vector_64 Particle::get_momentum(void) const + { + return momentum_now; + } // get momentum at time when + + template<> + HDINLINE vector_64 Particle::get_momentum(void) const + { + return momentum_old; + } // get momentum at time when + + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/radFormFactor.hpp b/include/picongpu/plugins/radiation/radFormFactor.hpp index 46c195276a..4a150f943c 100644 --- a/include/picongpu/plugins/radiation/radFormFactor.hpp +++ b/include/picongpu/plugins/radiation/radFormFactor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -25,193 +25,206 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - -namespace radFormFactor_baseShape_3D -{ - /** general form factor class of discrete charge distribution of PIC particle shape of order T_shapeOrder - * - * @tparam T_shapeOrder order of charge distribution shape in PIC code used for radiation form factor - */ - - template< uint32_t T_shapeOrder > - struct radFormFactor - { - /** Form Factor for T_shapeOrder-order particle shape charge distribution of N discrete electrons: - * \f[ | \mathcal{F} |^2 = N + (N*N - N) * (sinc^2(n_x * L_x * \omega) * sinc^2(n_y * L_y * \omega) * sinc^2(n_z * L_z * \omega))^T_shapeOrder \f] - * - * with observation direction (unit vector) \f$ \vec{n} = (n_x, n_y, n_z) \f$ - * and with: - * @param N = weighting - * @param omega = frequency - * @param L_d = the size of the CIC-particle / cell in dimension d - * - * @param N = macro particle weighting - * @param omega = frequency at which to calculate the form factor - * @param observer_unit_vec = observation direction - * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 ) \f$ - */ - HDINLINE float_X operator()( const float_X N, const float_X omega, vector_X const & observer_unit_vec ) const - { - float_X sincValue = float_X( 1.0 ); - for( uint32_t d = 0; d < DIM3; ++d ) - sincValue *= math::sinc( observer_unit_vec[d] * cellSize[d] / ( SPEED_OF_LIGHT * float_X( 2.0 ) ) * omega ); - - // here we combine sinc^2(..) with (...)^T_shapeOrder to ...^(2 * T_shapeOrder) - return math::sqrt( N + ( N * N - N ) * util::pow( sincValue , 2 * T_shapeOrder ) ); - } - }; -} // namespace radFormFactor_baseShape_3D - - -namespace radFormFactor_CIC_3D -{ - struct radFormFactor : public radFormFactor_baseShape_3D::radFormFactor< 1 > - { }; -} // namespace radFormFactor_CIC_3D - -namespace radFormFactor_TSC_3D -{ - struct radFormFactor : public radFormFactor_baseShape_3D::radFormFactor< 2 > - { }; -} // namespace radFormFactor_TSC_3D - -namespace radFormFactor_PCS_3D -{ - struct radFormFactor : public radFormFactor_baseShape_3D::radFormFactor< 3 > - { }; -} // namespace radFormFactor_PCS_3D - - -namespace radFormFactor_CIC_1Dy -{ - struct radFormFactor - { - /** Form Factor for 1-d CIC charge distribution iy y of N discrete electrons: - * \f[ | \mathcal{F} |^2 = N + (N*N - N) * sinc^2(n_y * L_y * \omega) \f] - * - * with observation direction (unit vector) \f$ \vec{n} = (n_x, n_y, n_z) \f$ - * and with: - * @param N = weighting - * @param omega = frequency - * @param L_d = the size of the CIC-particle / cell in dimension d - * - * @param N = macro particle weighting - * @param omega = frequency at which to calculate the form factor - * @param observer_unit_vec = observation direction - * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 ) \f$ - */ - HDINLINE float_X operator()(const float_X N, const float_X omega, const vector_X observer_unit_vec) const - { - return math::sqrt( - N + ( N * N - N ) * util::square( - math::sinc( CELL_HEIGHT / ( SPEED_OF_LIGHT * float_X( 2.0 ) ) * omega ) - ) - ); - } - }; -} // namespace radFormFactor_CIC_1Dy - - -namespace radFormFactor_Gauss_spherical -{ - struct radFormFactor - { - /** Form Factor for point-symmetric Gauss-shaped charge distribution of N discrete electrons: - * \f[ = N*q_e* 1/sqrt(2*pi*sigma^2) * exp(-0.5 * r^2/sigma^2) \f] - * with sigma = 0.5*c/delta_t (0.5 because sigma is defined around center) - * - * @param N = macro particle weighting - * @param omega = frequency at which to calculate the form factor - * @param observer_unit_vec = observation direction - * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 ) \f$ - */ - HDINLINE float_X operator()(const float_X N, const float_X omega, const vector_X observer_unit_vec) const - { - /* currently a fixed sigma of DELTA_T * c is used to describe the distribution - might become a parameter */ - return math::sqrt( - N + ( N * N - N ) * util::square( - math::exp( float_X( -0.5 ) * util::square( omega * float_X( 0.5 ) * DELTA_T ) ) - ) - ); - } - }; -} // namespace radFormFactor_Gauss_spherical - - -namespace radFormFactor_Gauss_cell -{ - struct radFormFactor - { - /** Form Factor for per-dimension Gauss-shaped charge distribution of N discrete electrons: - * \f[ = N*q_e* product[d={x,y,z}](1/sqrt(2*pi*sigma_d^2) * exp(-0.5 * d^2/sigma_d^2)) \f] - * with sigma_d = 0.5*cell_width_d*n_d - * - * @param N = macro particle weighting - * @param omega = frequency at which to calculate the form factor - * @param observer_unit_vec = observation direction - * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 ) \f$ - */ - HDINLINE float_X operator()(const float_X N, const float_X omega, const vector_X observer_unit_vec) const - { - return math::sqrt( - N + ( N * N - N ) * util::square( - math::exp( - float_X( -0.5 ) * ( - util::square( observer_unit_vec.x() * CELL_WIDTH / ( SPEED_OF_LIGHT * float_X(2.0) ) * omega ) + - util::square( observer_unit_vec.y() * CELL_HEIGHT / ( SPEED_OF_LIGHT * float_X(2.0) ) * omega ) + - util::square( observer_unit_vec.z() * CELL_DEPTH / ( SPEED_OF_LIGHT * float_X(2.0) ) * omega ) - ) - ) - ) - ); - } - }; -} // namespace radFormFactor_Gauss_cell - - - -namespace radFormFactor_incoherent -{ - struct radFormFactor - { - /** Form Factor for an incoherent charge distribution: - * - * @param N = macro particle weighting - * @param omega = frequency at which to calculate the form factor - * @param observer_unit_vec = observation direction - * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 == \sqrt(weighting) \f$ - */ - HDINLINE float_X operator()(const float_X N, const float_X omega, const vector_X observer_unit_vec) const - { - return math::sqrt( N ); - - } - }; -} // namespace radFormFactor_incoherent - - -namespace radFormFactor_coherent -{ - struct radFormFactor + namespace plugins { - /** Form Factor for a coherent charge distribution: - * - * @param N = macro particle weighting - * @param omega = frequency at which to calculate the form factor - * @param observer_unit_vec = observation direction - * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 == \sqrt(weighting) \f$ - */ - HDINLINE float_X operator()(const float_X N, const float_X omega, const vector_X observer_unit_vec) const - { - return N; - } - }; -} // namespace radFormFactor_coherent - -} // namespace radiation -} // namespace plugins + namespace radiation + { + namespace radFormFactor_baseShape_3D + { + /** general form factor class of discrete charge distribution of PIC particle shape of order + * T_shapeOrder + * + * @tparam T_shapeOrder order of charge distribution shape in PIC code used for radiation form factor + */ + + template + struct radFormFactor + { + /** Form Factor for T_shapeOrder-order particle shape charge distribution of N discrete electrons: + * \f[ | \mathcal{F} |^2 = N + (N*N - N) * (sinc^2(n_x * L_x * \omega) * sinc^2(n_y * L_y * \omega) + * * sinc^2(n_z * L_z * \omega))^T_shapeOrder \f] + * + * with observation direction (unit vector) \f$ \vec{n} = (n_x, n_y, n_z) \f$ + * and with: + * @param N = weighting + * @param omega = frequency + * @param L_d = the size of the CIC-particle / cell in dimension d + * + * @param N = macro particle weighting + * @param omega = frequency at which to calculate the form factor + * @param observer_unit_vec = observation direction + * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 ) \f$ + */ + HDINLINE float_X + operator()(const float_X N, const float_X omega, vector_X const& observer_unit_vec) const + { + float_X sincValue = float_X(1.0); + for(uint32_t d = 0; d < DIM3; ++d) + sincValue *= pmacc::math::sinc( + observer_unit_vec[d] * cellSize[d] / (SPEED_OF_LIGHT * float_X(2.0)) * omega); + + // here we combine sinc^2(..) with (...)^T_shapeOrder to ...^(2 * T_shapeOrder) + return math::sqrt(N + (N * N - N) * util::pow(sincValue, 2 * T_shapeOrder)); + } + }; + } // namespace radFormFactor_baseShape_3D + + + namespace radFormFactor_CIC_3D + { + struct radFormFactor : public radFormFactor_baseShape_3D::radFormFactor<1> + { + }; + } // namespace radFormFactor_CIC_3D + + namespace radFormFactor_TSC_3D + { + struct radFormFactor : public radFormFactor_baseShape_3D::radFormFactor<2> + { + }; + } // namespace radFormFactor_TSC_3D + + namespace radFormFactor_PCS_3D + { + struct radFormFactor : public radFormFactor_baseShape_3D::radFormFactor<3> + { + }; + } // namespace radFormFactor_PCS_3D + + + namespace radFormFactor_CIC_1Dy + { + struct radFormFactor + { + /** Form Factor for 1-d CIC charge distribution iy y of N discrete electrons: + * \f[ | \mathcal{F} |^2 = N + (N*N - N) * sinc^2(n_y * L_y * \omega) \f] + * + * with observation direction (unit vector) \f$ \vec{n} = (n_x, n_y, n_z) \f$ + * and with: + * @param N = weighting + * @param omega = frequency + * @param L_d = the size of the CIC-particle / cell in dimension d + * + * @param N = macro particle weighting + * @param omega = frequency at which to calculate the form factor + * @param observer_unit_vec = observation direction + * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 ) \f$ + */ + HDINLINE float_X + operator()(const float_X N, const float_X omega, const vector_X observer_unit_vec) const + { + return math::sqrt( + N + + (N * N - N) + * util::square( + pmacc::math::sinc(CELL_HEIGHT / (SPEED_OF_LIGHT * float_X(2.0)) * omega))); + } + }; + } // namespace radFormFactor_CIC_1Dy + + + namespace radFormFactor_Gauss_spherical + { + struct radFormFactor + { + /** Form Factor for point-symmetric Gauss-shaped charge distribution of N discrete electrons: + * \f[ = N*q_e* 1/sqrt(2*pi*sigma^2) * exp(-0.5 * r^2/sigma^2) \f] + * with sigma = 0.5*c/delta_t (0.5 because sigma is defined around center) + * + * @param N = macro particle weighting + * @param omega = frequency at which to calculate the form factor + * @param observer_unit_vec = observation direction + * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 ) \f$ + */ + HDINLINE float_X + operator()(const float_X N, const float_X omega, const vector_X observer_unit_vec) const + { + /* currently a fixed sigma of DELTA_T * c is used to describe the distribution - might become a + * parameter */ + return math::sqrt( + N + + (N * N - N) + * util::square( + math::exp(float_X(-0.5) * util::square(omega * float_X(0.5) * DELTA_T)))); + } + }; + } // namespace radFormFactor_Gauss_spherical + + + namespace radFormFactor_Gauss_cell + { + struct radFormFactor + { + /** Form Factor for per-dimension Gauss-shaped charge distribution of N discrete electrons: + * \f[ = N*q_e* product[d={x,y,z}](1/sqrt(2*pi*sigma_d^2) * exp(-0.5 * d^2/sigma_d^2)) \f] + * with sigma_d = 0.5*cell_width_d*n_d + * + * @param N = macro particle weighting + * @param omega = frequency at which to calculate the form factor + * @param observer_unit_vec = observation direction + * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 ) \f$ + */ + HDINLINE float_X + operator()(const float_X N, const float_X omega, const vector_X observer_unit_vec) const + { + return math::sqrt( + N + + (N * N - N) + * util::square(math::exp( + float_X(-0.5) + * (util::square( + observer_unit_vec.x() * CELL_WIDTH / (SPEED_OF_LIGHT * float_X(2.0)) + * omega) + + util::square( + observer_unit_vec.y() * CELL_HEIGHT / (SPEED_OF_LIGHT * float_X(2.0)) + * omega) + + util::square( + observer_unit_vec.z() * CELL_DEPTH / (SPEED_OF_LIGHT * float_X(2.0)) + * omega))))); + } + }; + } // namespace radFormFactor_Gauss_cell + + + namespace radFormFactor_incoherent + { + struct radFormFactor + { + /** Form Factor for an incoherent charge distribution: + * + * @param N = macro particle weighting + * @param omega = frequency at which to calculate the form factor + * @param observer_unit_vec = observation direction + * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 == \sqrt(weighting) \f$ + */ + HDINLINE float_X + operator()(const float_X N, const float_X omega, const vector_X observer_unit_vec) const + { + return math::sqrt(N); + } + }; + } // namespace radFormFactor_incoherent + + + namespace radFormFactor_coherent + { + struct radFormFactor + { + /** Form Factor for a coherent charge distribution: + * + * @param N = macro particle weighting + * @param omega = frequency at which to calculate the form factor + * @param observer_unit_vec = observation direction + * @return the Form Factor: \f$ \sqrt( | \mathcal{F} |^2 == \sqrt(weighting) \f$ + */ + HDINLINE float_X + operator()(const float_X N, const float_X omega, const vector_X observer_unit_vec) const + { + return N; + } + }; + } // namespace radFormFactor_coherent + + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/taylor.hpp b/include/picongpu/plugins/radiation/taylor.hpp index 39491b4195..e3217af05d 100644 --- a/include/picongpu/plugins/radiation/taylor.hpp +++ b/include/picongpu/plugins/radiation/taylor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -23,24 +23,23 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ -struct Taylor -{ - // a Taylor development for 1-sqrt(1-x) - - HDINLINE picongpu::float_64 operator()(picongpu::float_64 x) const + namespace plugins { - // Taylor series of 1-sqrt(1-x) till 5th order - //same like 0.5*x + 0.125*x*x + 0.0625 * x*x*x + 0.0390625 * x*x*x*x + 0.02734375 *x*x*x*x*x; - const picongpu::float_64 x2 = (x * x); - return x * ((0.5 + 0.125 * x) + x2 * (0.0625 + (0.0390625 * x + 0.02734375 * x2))); - } + namespace radiation + { + struct Taylor + { + // a Taylor development for 1-sqrt(1-x) -}; + HDINLINE picongpu::float_64 operator()(picongpu::float_64 x) const + { + // Taylor series of 1-sqrt(1-x) till 5th order + // same like 0.5*x + 0.125*x*x + 0.0625 * x*x*x + 0.0390625 * x*x*x*x + 0.02734375 *x*x*x*x*x; + const picongpu::float_64 x2 = (x * x); + return x * ((0.5 + 0.125 * x) + x2 * (0.0625 + (0.0390625 * x + 0.02734375 * x2))); + } + }; -} // namespace radiation -} // namespace plugins + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/utilities.hpp b/include/picongpu/plugins/radiation/utilities.hpp index 434832d6e5..8a6bb708c3 100644 --- a/include/picongpu/plugins/radiation/utilities.hpp +++ b/include/picongpu/plugins/radiation/utilities.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -21,100 +21,94 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - -namespace util -{ - - //goal: to increase readability of code - - template /// a generic square function - HDINLINE A square(A a) - { - return a*a; - } - - template /// a more generic square function - HDINLINE R square(A a) - { - return a*a; - } - - template /// a generic cube function - HDINLINE A cube(A a) - { - return a * a*a; - } - - template /// a more generic cube function - HDINLINE R cube(A a) - { - return a * a*a; - } - - template /// a more generic square struct - struct Cube + namespace plugins { - - HDINLINE R operator()(A a) + namespace radiation { - return a * a*a; - } - }; - - template /// a more generic square struct - struct Square - { - - HDINLINE R operator()(A a) const - { - return a*a; - } - }; - - - -namespace details -{ - /** power function - with extra const parameter for efficient code - * - * T_type requires cast from int and multiplication - * @tparam T_Type - base type - * @param x - base value - * @param exp - exponent - * @param results (=1) - do not change - workaround to produce efficient code - * @return std::pow(x, exp) - */ - template< typename T_Type > - HDINLINE constexpr T_Type pow( T_Type const x , uint32_t const exp, const T_Type result = T_Type( 1 ) ) - { - return exp == 0 ? result : ( - exp == 1 ? x * result : util::details::pow( x, exp - 1, result * x ) - ); - } -} // namespace details - - /** power function - * - * T_type requires cast from int and multiplication - * @tparam T_Type - base type - * @param x - base value - * @param exp - exponent - * @return std::pow(x, exp) - */ - template< typename T_Type > - HDINLINE constexpr T_Type pow( T_Type const x , uint32_t const exp ) - { - return util::details::pow( x, exp ); - } - -} // namespace util - -} // namespace radiation - -} // namespace plugins + namespace util + { + // goal: to increase readability of code + + template /// a generic square function + HDINLINE A square(A a) + { + return a * a; + } + + template /// a more generic square function + HDINLINE R square(A a) + { + return a * a; + } + + template /// a generic cube function + HDINLINE A cube(A a) + { + return a * a * a; + } + + template /// a more generic cube function + HDINLINE R cube(A a) + { + return a * a * a; + } + + template /// a more generic square struct + struct Cube + { + HDINLINE R operator()(A a) + { + return a * a * a; + } + }; + + template /// a more generic square struct + struct Square + { + HDINLINE R operator()(A a) const + { + return a * a; + } + }; + + + namespace details + { + /** power function - with extra const parameter for efficient code + * + * T_type requires cast from int and multiplication + * @tparam T_Type - base type + * @param x - base value + * @param exp - exponent + * @param results (=1) - do not change - workaround to produce efficient code + * @return std::pow(x, exp) + */ + template + HDINLINE constexpr T_Type pow(T_Type const x, uint32_t const exp, const T_Type result = T_Type(1)) + { + return exp == 0 ? result + : (exp == 1 ? x * result : util::details::pow(x, exp - 1, result * x)); + } + } // namespace details + + /** power function + * + * T_type requires cast from int and multiplication + * @tparam T_Type - base type + * @param x - base value + * @param exp - exponent + * @return std::pow(x, exp) + */ + template + HDINLINE constexpr T_Type pow(T_Type const x, uint32_t const exp) + { + return util::details::pow(x, exp); + } + + } // namespace util + + } // namespace radiation + + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/radiation/vector.hpp b/include/picongpu/plugins/radiation/vector.hpp index bdb46f9827..a0c533fb90 100644 --- a/include/picongpu/plugins/radiation/vector.hpp +++ b/include/picongpu/plugins/radiation/vector.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -25,160 +25,158 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ - -template -struct cuda_vec : public V -{ - // constructor - - HDINLINE cuda_vec(T x, T y, T z) - { - this->x() = x; - this->y() = y; - this->z() = z; - } - - // default constructor - - HDINLINE cuda_vec() - { - - } - - // constructor - - HDINLINE cuda_vec(const V & other) - { - this->x() = other.x(); - this->y() = other.y(); - this->z() = other.z(); - } - - HDINLINE static cuda_vec zero() - { - return cuda_vec(0, 0, 0); - } - - - // conversion between two cuda vectors with different types - - template - HDINLINE cuda_vec(const cuda_vec& other) - { - this->x() = (T) other.x(); - this->y() = (T) other.y(); - this->z() = (T) other.z(); - } - - HDINLINE cuda_vec& operator=(const cuda_vec& other) - { - this->x() = other.x(); - this->y() = other.y(); - this->z() = other.z(); - return (*this); - } - - HDINLINE T &operator[](uint32_t dim) - { - return (&(this->x()))[dim]; - } - - HDINLINE const T &operator[](uint32_t dim) const - { - return (&(this->x()))[dim]; - } - - - // addition - - HDINLINE cuda_vec operator+(const cuda_vec& other) const - { - return cuda_vec (this->x() + other.x(), this->y() + other.y(), this->z() + other.z()); - } - - // difference - - HDINLINE cuda_vec operator-(const cuda_vec& other) const - { - return cuda_vec (this->x() - other.x(), this->y() - other.y(), this->z() - other.z()); - } - - // vector multiplication - - HDINLINE T operator*(const cuda_vec& other) const - { - return this->x() * other.x() + this->y() * other.y() + this->z() * other.z(); - } - - // scalar multiplication - - HDINLINE cuda_vec operator*(const T scalar) const - { - return cuda_vec(scalar * this->x(), scalar * this->y(), scalar * this->z()); - } - - // division (scalar) - - HDINLINE cuda_vec operator/(const T scalar) const - { - return cuda_vec(this->x() / scalar, this->y() / scalar, this->z() / scalar); - } - - // cross product (vector) - - HDINLINE cuda_vec operator%(const cuda_vec& other) const + namespace plugins { - return cuda_vec(this->y() * other.z() - this->z() * other.y(), this->z() * other.x() - this->x() * other.z(), this->x() * other.y() - this->y() * other.x()); - } + namespace radiation + { + template + struct cuda_vec : public V + { + // constructor + + HDINLINE cuda_vec(T x, T y, T z) + { + this->x() = x; + this->y() = y; + this->z() = z; + } + + // default constructor - // magnitude of vector (length of vector) + HDINLINE cuda_vec() + { + } - HDINLINE T magnitude(void) const - { + // constructor + + HDINLINE cuda_vec(const V& other) + { + this->x() = other.x(); + this->y() = other.y(); + this->z() = other.z(); + } - return picongpu::math::sqrt(this->x() * this->x() + this->y() * this->y() + this->z() * this->z()); + HDINLINE static cuda_vec zero() + { + return cuda_vec(0, 0, 0); + } - } - // unit vector in the direction of the vector + // conversion between two cuda vectors with different types - HDINLINE cuda_vec unit_vec(void) const - { - return *this / magnitude(); - } + template + HDINLINE cuda_vec(const cuda_vec& other) + { + this->x() = (T) other.x(); + this->y() = (T) other.y(); + this->z() = (T) other.z(); + } + + HDINLINE cuda_vec& operator=(const cuda_vec& other) + { + this->x() = other.x(); + this->y() = other.y(); + this->z() = other.z(); + return (*this); + } + + HDINLINE T& operator[](uint32_t dim) + { + return (&(this->x()))[dim]; + } + + HDINLINE const T& operator[](uint32_t dim) const + { + return (&(this->x()))[dim]; + } + + + // addition + + HDINLINE cuda_vec operator+(const cuda_vec& other) const + { + return cuda_vec(this->x() + other.x(), this->y() + other.y(), this->z() + other.z()); + } + + // difference + + HDINLINE cuda_vec operator-(const cuda_vec& other) const + { + return cuda_vec(this->x() - other.x(), this->y() - other.y(), this->z() - other.z()); + } + + // vector multiplication + + HDINLINE T operator*(const cuda_vec& other) const + { + return this->x() * other.x() + this->y() * other.y() + this->z() * other.z(); + } + + // scalar multiplication + + HDINLINE cuda_vec operator*(const T scalar) const + { + return cuda_vec(scalar * this->x(), scalar * this->y(), scalar * this->z()); + } + + // division (scalar) + + HDINLINE cuda_vec operator/(const T scalar) const + { + return cuda_vec(this->x() / scalar, this->y() / scalar, this->z() / scalar); + } + + // cross product (vector) + + HDINLINE cuda_vec operator%(const cuda_vec& other) const + { + return cuda_vec( + this->y() * other.z() - this->z() * other.y(), + this->z() * other.x() - this->x() * other.z(), + this->x() * other.y() - this->y() * other.x()); + } + + // magnitude of vector (length of vector) - // assign add + HDINLINE T magnitude(void) const + { + return picongpu::math::sqrt(this->x() * this->x() + this->y() * this->y() + this->z() * this->z()); + } - HDINLINE void operator+=(const cuda_vec& other) - { - this->x() += other.x(); - this->y() += other.y(); - this->z() += other.z(); - } + // unit vector in the direction of the vector - // assign multiply + HDINLINE cuda_vec unit_vec(void) const + { + return *this / magnitude(); + } + + // assign add + + HDINLINE void operator+=(const cuda_vec& other) + { + this->x() += other.x(); + this->y() += other.y(); + this->z() += other.z(); + } - HDINLINE void operator*=(const T scalar) - { - this->x() *= scalar; - this->y() *= scalar; - this->z() *= scalar; - } + // assign multiply -}; + HDINLINE void operator*=(const T scalar) + { + this->x() *= scalar; + this->y() *= scalar; + this->z() *= scalar; + } + }; -} // namespace radiation -} // namespace plugins + } // namespace radiation + } // namespace plugins } // namespace picongpu // print template -HINLINE std::ostream & operator <<(std::ostream & os, const picongpu::plugins::radiation::cuda_vec & v) +HINLINE std::ostream& operator<<(std::ostream& os, const picongpu::plugins::radiation::cuda_vec& v) { os << " ( " << v.x() << " , " << v.y() << " , " << v.z() << " ) "; return os; diff --git a/include/picongpu/plugins/radiation/windowFunctions.hpp b/include/picongpu/plugins/radiation/windowFunctions.hpp index 33de38bedf..3f87328132 100644 --- a/include/picongpu/plugins/radiation/windowFunctions.hpp +++ b/include/picongpu/plugins/radiation/windowFunctions.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Richard Pausch +/* Copyright 2014-2021 Richard Pausch * * This file is part of PIConGPU. * @@ -21,162 +21,157 @@ #include -#include +#include namespace picongpu { -namespace plugins -{ -namespace radiation -{ - - /* several window functions behind namespaces: */ - - -namespace radWindowFunctionTriangle -{ - struct radWindowFunction - { - /** 1D Window function according to the triangle window: - * - * x = position_x - L_x/2 - * f(x) = {1+2x/L_x : (-L_x/2 <= x <= 0 ) - * {1-2x/L_x : (0 <= x <= +L_x/2 ) - * {0.0 : in any other case - * - * @param position_x = 1D position - * @param L_x = length of the simulated area - * assuming that the simulation ranges - * from 0 to L_x in the chosen dimension - * @returns weighting factor to reduce ringing effects due to - * sharp spacial boundaries - **/ - HDINLINE float_X operator()(const float_X position_x, const float_X L_x) const - { - float_X x = position_x - float_X(0.5)*L_x; - return float_X(math::abs(x) <= float_X(0.5)*L_x) - * (float_X(1.0) - float_X(2.0)/L_x * math::abs(x) ); - } - }; -} // namespace radWindowFunctionTriangle - - - -namespace radWindowFunctionHamming -{ - struct radWindowFunction + namespace plugins { - /** 1D Window function according to the Hamming window: - * - * x = position_x - L_x/2 - * a = parameter of the Hamming window (ideal: 0.08) - * f(x) = {a+(1-a)*cos^2(pi*x/L_x) : (-L_x/2 <= x <= +L_x/2 ) - * {0.0 : in any other case - * - * @param position_x = 1D position - * @param L_x = length of the simulated area - * assuming that the simulation ranges - * from 0 to L_x in the chosen dimension - * @returns weighting factor to reduce ringing effects due to - * sharp spacial boundaries - **/ - HDINLINE float_X operator()(const float_X position_x, const float_X L_x) const - { - const float_X x = position_x - L_x*float_X(0.5); - const float_X a = 0.08; /* ideal parameter: -43dB reduction */ - const float_X cosinusValue = math::cos(pmacc::algorithms::math::Pi::value*x/L_x); - return float_X(math::abs(x) <= float_X(0.5)*L_x) - * (a + (float_X(1.0)-a)*cosinusValue*cosinusValue); - } - }; -} // namespace radWindowFunctionHamming - - - -namespace radWindowFunctionTriplett -{ - struct radWindowFunction - { - /** 1D Window function according to the Triplett window: - * - * x = position_x - L_x/2 - * lambda = decay parameter of the Triplett window - * f(x) = {exp(-lambda*|x|)*cos^2(pi*x/L_x) : (-L_x/2 <= x <= +L_x/2 ) - * {0.0 : in any other case - * - * @param position_x = 1D position - * @param L_x = length of the simulated area - * assuming that the simulation ranges - * from 0 to L_x in the chosen dimension - * @returns weighting factor to reduce ringing effects due to - * sharp spacial boundaries - **/ - HDINLINE float_X operator()(const float_X position_x, const float_X L_x) const - { - const float_X x = position_x - L_x*float_X(0.5); - const float_X lambda = float_X(5.0)/L_x; /* larger is better, but too large means no data */ - const float_X cosinusValue = math::cos(pmacc::algorithms::math::Pi::value*x/L_x); - return float_X(math::abs(x) <= float_X(0.5)*L_x) - * (math::exp(float_X(-1.0)*lambda*math::abs(x))*cosinusValue*cosinusValue); - } - }; -} // namespace radWindowFunctionTriplett - - - -namespace radWindowFunctionGauss -{ - struct radWindowFunction - { - /** 1D Window function according to the Gauss window: - * - * x = position_x - L_x/2 - * sigma = standard deviation of the Gauss window - * f(x) = {exp(-0.5*x^2/sigma^2) : (-L_x/2 <= x <= +L_x/2 ) - * {0.0 : in any other case - * - * @param position_x = 1D position - * @param L_x = length of the simulated area - * assuming that the simulation ranges - * from 0 to L_x in the chosen dimension - * @returns weighting factor to reduce ringing effects due to - * sharp spacial boundaries - **/ - HDINLINE float_X operator()(const float_X position_x, const float_X L_x) const - { - const float_X x = position_x - L_x*float_X(0.5); - const float_X sigma = float_X(0.4)*L_x; /* smaller is better, but too small means no data */ - const float_X relativePosition = x/sigma; /* optimization */ - return float_X(math::abs(x) <= float_X(0.5)*L_x) - * (math::exp(float_X(-0.5)*relativePosition*relativePosition)); - } - }; -} // namespace radWindowFunctionGauss - - -namespace radWindowFunctionNone -{ - struct radWindowFunction - { - /** 1D Window function according to the no window: - * - * f(position_x) = always 1.0 - * - * @param position_x = 1D position - * @param L_x = length of the simulated area - * assuming that the simulation ranges - * from 0 to L_x in the chosen dimension - * @returns 1.0 - **/ - HDINLINE float_X operator()(const float_X position_x, const float_X L_x) const - { - return float_X(1.0); - } - }; -} // namespace radWindowFunctionNone - - -} // namespace radiation -} // namespace plugins + namespace radiation + { + /* several window functions behind namespaces: */ + + + namespace radWindowFunctionTriangle + { + struct radWindowFunction + { + /** 1D Window function according to the triangle window: + * + * x = position_x - L_x/2 + * f(x) = {1+2x/L_x : (-L_x/2 <= x <= 0 ) + * {1-2x/L_x : (0 <= x <= +L_x/2 ) + * {0.0 : in any other case + * + * @param position_x = 1D position + * @param L_x = length of the simulated area + * assuming that the simulation ranges + * from 0 to L_x in the chosen dimension + * @returns weighting factor to reduce ringing effects due to + * sharp spacial boundaries + **/ + HDINLINE float_X operator()(const float_X position_x, const float_X L_x) const + { + float_X x = position_x - float_X(0.5) * L_x; + return float_X(math::abs(x) <= float_X(0.5) * L_x) + * (float_X(1.0) - float_X(2.0) / L_x * math::abs(x)); + } + }; + } // namespace radWindowFunctionTriangle + + + namespace radWindowFunctionHamming + { + struct radWindowFunction + { + /** 1D Window function according to the Hamming window: + * + * x = position_x - L_x/2 + * a = parameter of the Hamming window (ideal: 0.08) + * f(x) = {a+(1-a)*cos^2(pi*x/L_x) : (-L_x/2 <= x <= +L_x/2 ) + * {0.0 : in any other case + * + * @param position_x = 1D position + * @param L_x = length of the simulated area + * assuming that the simulation ranges + * from 0 to L_x in the chosen dimension + * @returns weighting factor to reduce ringing effects due to + * sharp spacial boundaries + **/ + HDINLINE float_X operator()(const float_X position_x, const float_X L_x) const + { + const float_X x = position_x - L_x * float_X(0.5); + const float_X a = 0.08; /* ideal parameter: -43dB reduction */ + const float_X cosinusValue = math::cos(pmacc::math::Pi::value * x / L_x); + return float_X(math::abs(x) <= float_X(0.5) * L_x) + * (a + (float_X(1.0) - a) * cosinusValue * cosinusValue); + } + }; + } // namespace radWindowFunctionHamming + + + namespace radWindowFunctionTriplett + { + struct radWindowFunction + { + /** 1D Window function according to the Triplett window: + * + * x = position_x - L_x/2 + * lambda = decay parameter of the Triplett window + * f(x) = {exp(-lambda*|x|)*cos^2(pi*x/L_x) : (-L_x/2 <= x <= +L_x/2 ) + * {0.0 : in any other case + * + * @param position_x = 1D position + * @param L_x = length of the simulated area + * assuming that the simulation ranges + * from 0 to L_x in the chosen dimension + * @returns weighting factor to reduce ringing effects due to + * sharp spacial boundaries + **/ + HDINLINE float_X operator()(const float_X position_x, const float_X L_x) const + { + const float_X x = position_x - L_x * float_X(0.5); + const float_X lambda = float_X(5.0) / L_x; /* larger is better, but too large means no data */ + const float_X cosinusValue = math::cos(pmacc::math::Pi::value * x / L_x); + return float_X(math::abs(x) <= float_X(0.5) * L_x) + * (math::exp(float_X(-1.0) * lambda * math::abs(x)) * cosinusValue * cosinusValue); + } + }; + } // namespace radWindowFunctionTriplett + + + namespace radWindowFunctionGauss + { + struct radWindowFunction + { + /** 1D Window function according to the Gauss window: + * + * x = position_x - L_x/2 + * sigma = standard deviation of the Gauss window + * f(x) = {exp(-0.5*x^2/sigma^2) : (-L_x/2 <= x <= +L_x/2 ) + * {0.0 : in any other case + * + * @param position_x = 1D position + * @param L_x = length of the simulated area + * assuming that the simulation ranges + * from 0 to L_x in the chosen dimension + * @returns weighting factor to reduce ringing effects due to + * sharp spacial boundaries + **/ + HDINLINE float_X operator()(const float_X position_x, const float_X L_x) const + { + const float_X x = position_x - L_x * float_X(0.5); + const float_X sigma = float_X(0.4) * L_x; /* smaller is better, but too small means no data */ + const float_X relativePosition = x / sigma; /* optimization */ + return float_X(math::abs(x) <= float_X(0.5) * L_x) + * (math::exp(float_X(-0.5) * relativePosition * relativePosition)); + } + }; + } // namespace radWindowFunctionGauss + + + namespace radWindowFunctionNone + { + struct radWindowFunction + { + /** 1D Window function according to the no window: + * + * f(position_x) = always 1.0 + * + * @param position_x = 1D position + * @param L_x = length of the simulated area + * assuming that the simulation ranges + * from 0 to L_x in the chosen dimension + * @returns 1.0 + **/ + HDINLINE float_X operator()(const float_X position_x, const float_X L_x) const + { + return float_X(1.0); + } + }; + } // namespace radWindowFunctionNone + + + } // namespace radiation + } // namespace plugins } // namespace picongpu - diff --git a/include/picongpu/plugins/randomizedParticleMerger/RandomizedParticleMerger.hpp b/include/picongpu/plugins/randomizedParticleMerger/RandomizedParticleMerger.hpp new file mode 100644 index 0000000000..78eecf2320 --- /dev/null +++ b/include/picongpu/plugins/randomizedParticleMerger/RandomizedParticleMerger.hpp @@ -0,0 +1,285 @@ +/* Copyright 2017-2021 Heiko Burau, Xeinia Bastrakova, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/plugins/ISimulationPlugin.hpp" +#include "picongpu/plugins/randomizedParticleMerger/RandomizedParticleMerger.kernel" +#include "picongpu/particles/functor/misc/Rng.hpp" + +#include +#include +#include + +#include +#include +#include +#include +#include + + +namespace picongpu +{ + namespace plugins + { + namespace randomizedParticleMerger + { + using namespace pmacc; + namespace bmpl = boost::mpl; + + /** Implements a randomized modification of the particle merging algorithm. + * + * The original particle merging algorithms is + * Luu, P. T., Tueckmantel, T., & Pukhov, A. (2016). + * Voronoi particle merging algorithm for PIC codes. + * Computer Physics Communications, 202, 165-174. + * + * The randomized mofidication developed by S. Bastrakov and X. Bastrakova + * + * @tparam T_ParticlesType species type + * @tparam hasVoronoiCellId if the species type has the voronoiCellId attribute, + * the plugin will only be used for such types + */ + template< + class T_ParticlesType, + bool hasVoronoiCellId + = pmacc::traits::HasIdentifier::type::value> + struct RandomizedParticleMergerWrapped; + + template + struct RandomizedParticleMergerWrapped : ISimulationPlugin + { + private: + std::string name; + std::string prefix; + std::string notifyPeriod; + MappingDesc* cellDescription; + + uint32_t maxParticlesToMerge; + float_X ratioDeletedParticles; + float_X posSpreadThreshold; + float_X momSpreadThreshold; + + public: + using ParticlesType = T_ParticlesType; + + RandomizedParticleMergerWrapped() + : name("RandomizedParticleMerger: merges several macroparticles with" + " similar position and momentum into a single one") + , prefix(ParticlesType::FrameType::getName() + std::string("_randomizedMerger")) + , cellDescription(nullptr) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } + + void notify(uint32_t currentStep) override + { + using SuperCellSize = MappingDesc::SuperCellSize; + + const pmacc::math::Int coreBorderGuardSuperCells + = this->cellDescription->getGridSuperCells(); + const pmacc::math::Int guardSuperCells = this->cellDescription->getGuardingSuperCells(); + const pmacc::math::Int coreBorderSuperCells + = coreBorderGuardSuperCells - 2 * guardSuperCells; + + // this zone represents the core+border area with guard offset in unit of cells + const zone::SphericZone zone( + static_cast>(coreBorderSuperCells * SuperCellSize::toRT()), + guardSuperCells * SuperCellSize::toRT()); + + DataConnector& dc = Environment<>::get().DataConnector(); + auto particles = dc.get(ParticlesType::FrameType::getName(), true); + using Kernel = RandomizedParticleMergerKernel; + + using namespace pmacc::random::distributions; + using Distribution = Uniform; + using RngFactory = particles::functor::misc::Rng; + + RngFactory rngFactory(currentStep); + auto kernel = Kernel{ + particles->getDeviceParticlesBox(), + maxParticlesToMerge, + ratioDeletedParticles, + posSpreadThreshold, + momSpreadThreshold, + rngFactory, + guardSuperCells}; + + algorithm::kernel::Foreach foreach; + foreach(zone, cursor::make_MultiIndexCursor(), kernel) + ; + + // close all gaps caused by removal of particles + particles->fillAllGaps(); + } + + + void setMappingDescription(MappingDesc* cellDescription) override + { + this->cellDescription = cellDescription; + } + + + void pluginRegisterHelp(po::options_description& desc) override + { + desc.add_options()( + (prefix + ".period").c_str(), + po::value(¬ifyPeriod), + "enable plugin [for each n-th step]")( + (prefix + ".maxParticlesToMerge").c_str(), + po::value(&maxParticlesToMerge)->default_value(8), + "minimum number of macroparticles at which we always divide the cell")( + (prefix + ".posSpreadThreshold").c_str(), + po::value(&posSpreadThreshold)->default_value(1e-5), + "Below this threshold of spread in position macroparticles" + " can be merged [unit: cell edge length]")( + (prefix + ".momSpreadThreshold").c_str(), + po::value(&momSpreadThreshold)->default_value(1e-5), + "Below this absolute threshold of spread in momentum" + " macroparticles can be merged [unit: m_el * c].")( + (prefix + ".ratioDeletedParticles").c_str(), + po::value(&ratioDeletedParticles)->default_value(0.1), + "Ratio of macroparticles to be deleted on average"); + } + + std::string pluginGetName() const override + { + return name; + } + + protected: + void pluginLoad() + { + if(notifyPeriod.empty()) + return; + + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + + PMACC_VERIFY_MSG( + maxParticlesToMerge > 1u, + std::string("[Plugin: ") + prefix + + "] maxParticlesToMerge" + " has to be greater than one."); + PMACC_VERIFY_MSG( + ratioDeletedParticles > 0.0_X, + std::string("[Plugin: ") + prefix + + "] ratioDeletedParticles" + " has to be > 0."); + PMACC_VERIFY_MSG( + ratioDeletedParticles < 1.0_X, + std::string("[Plugin: ") + prefix + + "] ratioDeletedParticles" + " has to be < 1."); + PMACC_VERIFY_MSG( + posSpreadThreshold >= 0.0_X, + std::string("[Plugin: ") + prefix + + "] posSpreadThreshold" + " has to be non-negative."); + PMACC_VERIFY_MSG( + momSpreadThreshold >= 0.0_X, + std::string("[Plugin: ") + prefix + + "] momSpreadThreshold" + " has to be non-negative."); + } + + void pluginUnload() + { + } + + void restart(uint32_t, const std::string) + { + } + + void checkpoint(uint32_t, const std::string) + { + } + }; + + + /** Placeholder implementation for species without the required conditions + * + * @tparam T_ParticlesType species type + */ + template + struct RandomizedParticleMergerWrapped : ISimulationPlugin + { + private: + std::string name; + std::string prefix; + std::string notifyPeriod; + MappingDesc* cellDescription; + + public: + using ParticlesType = T_ParticlesType; + + RandomizedParticleMergerWrapped() + : name("RandomizedParticleMerger: merges several macroparticles with" + " similar position and momentum into a single one.\n" + "plugin disabled. Enable plugin by adding the `voronoiCellId`" + " attribute to the particle attribute list.") + , prefix(ParticlesType::FrameType::getName() + std::string("_randomizedMerger")) + , cellDescription(nullptr) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } + + std::string pluginGetName() const + { + return this->name; + } + + protected: + void setMappingDescription(MappingDesc*) + { + } + + void pluginRegisterHelp(po::options_description&) + { + } + + void pluginUnload() + { + } + + void restart(uint32_t, const std::string) + { + } + + void checkpoint(uint32_t, const std::string) + { + } + + void notify(uint32_t) + { + } + }; + + /** Randomized particle merger plugin + * + * @tparam T_ParticlesType species type + */ + template + struct RandomizedParticleMerger : RandomizedParticleMergerWrapped + { + }; + + } // namespace randomizedParticleMerger + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/randomizedParticleMerger/RandomizedParticleMerger.kernel b/include/picongpu/plugins/randomizedParticleMerger/RandomizedParticleMerger.kernel new file mode 100644 index 0000000000..467e242439 --- /dev/null +++ b/include/picongpu/plugins/randomizedParticleMerger/RandomizedParticleMerger.kernel @@ -0,0 +1,508 @@ +/* Copyright 2017-2021 Heiko Burau, Xeinia Bastrakova, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/plugins/randomizedParticleMerger/VoronoiCell.hpp" +#include "picongpu/particles/access/Cell2Particle.hpp" +#include "picongpu/particles/filter/filter.hpp" + +#include +#include + +namespace picongpu +{ + namespace plugins + { + namespace randomizedParticleMerger + { + /** Implements a randomized modification of the particle merging algorithm. + * + * The original particle merging algorithms is + * Luu, P. T., Tueckmantel, T., & Pukhov, A. (2016). + * Voronoi particle merging algorithm for PIC codes. + * Computer Physics Communications, 202, 165-174. + * + * The randomized mofidication developed by S. Bastrakov and X. Bastrakova + * + * @tparam T_ParticlesBox container of the particle species + */ + template + struct RandomizedParticleMergerKernel + { + using ParticlesBox = T_ParticlesBox; + + //! Random factory type + using RngFactory = particles::functor::misc::Rng>; + + private: + using FramePtr = typename ParticlesBox::FramePtr; + using FrameType = typename ParticlesBox::FrameType; + using ArrayVoronoiCells + = memory::Array; + using VoronoiIndexPool + = memory::IndexPool; + + ParticlesBox particlesBox; + /** minimal number of macroparticles needed to divide + the macroparticle collection */ + uint32_t maxParticlesToMerge; + + pmacc::math::Int guardSuperCells; + /** estimated fraction of macroparticles + remaining after the merging process */ + float_X ratioKeptParticles; + /** min position threshold for + macroparticles to be merged */ + float_X posSpreadThreshold; + /** min momentum threshold for + macroparticles to be merged */ + float_X momSpreadThreshold; + /** factory of gitting random value */ + RngFactory rngFactory; + using RandomGen = RngFactory::RandomGen; + + public: + RandomizedParticleMergerKernel( + ParticlesBox particlesBox, + uint32_t maxParticlesToMerge, + float_X ratioDeletedParticles, + float_X posSpreadThreshold, + float_X momSpreadThreshold, + RngFactory rngFactory, + const pmacc::math::Int guardSuperCells) + : particlesBox(particlesBox) + , maxParticlesToMerge(maxParticlesToMerge) + , ratioKeptParticles(1.0_X - ratioDeletedParticles) + , posSpreadThreshold(posSpreadThreshold) + , momSpreadThreshold(momSpreadThreshold) + , rngFactory(rngFactory) + , guardSuperCells(guardSuperCells) + { + } + + /** map cell index to the initial Voronoi cell by aggregating N^simDim 'normal' + * cells to a single Voronoi cell. + * + * @param cellIdx cell index + */ + DINLINE voronoiCellId::type mapCellIdxToInitialVoronoiCell(const uint32_t cellIdx) const + { + const DataSpace cellIdxDim + = DataSpaceOperations::template map(cellIdx); + + const DataSpace voronoiCellDim = cellIdxDim / 2; + + return static_cast(pmacc::math::linearize( + pmacc::math::CT::shrinkTo::type::toRT() / 2, + voronoiCellDim)); + } + + /** Init the Voronoi cell id attribute for each particle in the super cell. + * + * The initial Voronoi cell is chosen by aggregating N^simDim 'normal' cells + * to a single Voronoi cell. + * + * @param cellIdx cell index + */ + template + DINLINE void initVoronoiCellIdAttribute(T_Acc const& acc, const pmacc::math::Int& cellIdx) + { + //! \todo change this as soon as the kernel support lock step programming + constexpr uint32_t numWorkers = pmacc::math::CT::volume::type::value; + const uint32_t workerIdx + = DataSpaceOperations::template map(cellIdx % SuperCellSize::toRT()); + particleAccess::Cell2Particle forEachFrame; + forEachFrame( + acc, + particlesBox, + workerIdx, + cellIdx, + [this](const T_Acc& acc, FramePtr frame, const int linearThreadIdx) { + auto particle = frame[linearThreadIdx]; + const lcellId_t particleCellIdx = particle[localCellIdx_]; + particle[voronoiCellId_] = this->mapCellIdxToInitialVoronoiCell(particleCellIdx); + }, + particles::filter::All{}); + } + + /** Calculate position of particle within a super cell. + * + * @param particleCellIdx local particle cell index + * @param positionWithinCell position within cell + * @return position of particle with respect to its super cell's origin + */ + DINLINE floatD_X + getParticlePosWithinSuperCell(const lcellId_t particleCellIdx, const floatD_X positionWithinCell) const + { + const DataSpace particleCellIdxDim + = DataSpaceOperations::template map(particleCellIdx); + + floatD_X result; + for(int i = 0; i < simDim; i++) + { + result[i] = static_cast(particleCellIdxDim[i]) + positionWithinCell[i]; + } + + return result; + } + + /** Calculate revative position of partilce in Supercell + * + * @param absoluteParticlePos absolute particle coordinates + * @return particleCellIdx particle's supercell + * @return positionWithinCell position particle inside supecell + */ + DINLINE void getSuperCellPos( + const floatD_X absoluteParticlePos, + ::pmacc::math::Vector& particleCellIdx, + floatD_X& positionWithinCell) + { + for(int i = 0; i < simDim; i++) + { + particleCellIdx[i] = static_cast(absoluteParticlePos[i]); + positionWithinCell[i] = absoluteParticlePos[i] - particleCellIdx[i]; + } + } + + /** Decide if subdivision should be done, based on + * information from parents voronoi cells + * + * @param randomGen ramdom generator functor + * @param voronoiCell voronoi cell + */ + DINLINE bool isNeededSubdivision(RandomGen& randomGen, VoronoiCell const& voronoiCell) const + { + // With large enough number of macroparticles we always subdivide + if(voronoiCell.numMacroParticles > maxParticlesToMerge) + return true; + + // Otherwise we compute subdivision probability based on the parameters + // and the number of macroparticles in the cell + float_X halfDivisionCoefficient + = (voronoiCell.expectedNumMacroParticles + voronoiCell.numMacroParticles) / 2.0_X; + + float_X subdivisionProbability + = (voronoiCell.expectedNumMacroParticles - 1.0_X) / (halfDivisionCoefficient - 1.0_X); + + // Spectial probability equations for small Voronoi cells + if(voronoiCell.numMacroParticles == 2) + subdivisionProbability = voronoiCell.expectedNumMacroParticles - 1.0_X; + if(voronoiCell.numMacroParticles == 3) + subdivisionProbability = (voronoiCell.expectedNumMacroParticles - 1.0_X) / 2.0_X; + + return randomGen() < subdivisionProbability; + } + + + DINLINE bool isSpreadEnoughForSubdivision(uint8_t& splittingComponent, VoronoiCell& voronoiCell) const + { + bool isSpreadEnoughForSubdivision = true; + float_X maxSpreadValue = voronoiCell.getMaxValueSpread2(splittingComponent, simDim); + + if(voronoiCell.splittingStage == VoronoiSplittingStage::position + && maxSpreadValue < posSpreadThreshold) + { + voronoiCell.invertSplittingStage(); + maxSpreadValue = voronoiCell.getMaxValueSpread2(splittingComponent, simDim); + if(maxSpreadValue < momSpreadThreshold) + { + voronoiCell.setToReadyForMerging(); + isSpreadEnoughForSubdivision = false; + } + } + if(voronoiCell.splittingStage == VoronoiSplittingStage::momentum + && maxSpreadValue < momSpreadThreshold) + { + voronoiCell.invertSplittingStage(); + maxSpreadValue = voronoiCell.getMaxValueSpread2(splittingComponent, simDim); + if(maxSpreadValue < posSpreadThreshold) + { + voronoiCell.setToReadyForMerging(); + isSpreadEnoughForSubdivision = false; + } + } + return isSpreadEnoughForSubdivision; + } + + /** Merge all particles in voronoi cell into one + * + * New momentum is weighted average of all particles momentums. + * New position is weighted average of all particles positions. + * New weight is sum of all particles weights + * + * @tparam T_Particle particle type + * @trapam T_Acc accelerator type + * + * @param particle current particle + * @param voronoiCell current Voronoi cell + */ + template + DINLINE void mergeVoronoiCell(T_Acc const& acc, T_Particle& particle, VoronoiCell& voronoiCell) + { + if(voronoiCell.isFirstParticle(acc)) + { + /* I am the first particle in the Voronoi cell + * => get dressed with Voronoi cell's attributes + */ + + auto particleCellIdx = pmacc::DataSpace::create(0); + auto relativePosition = floatD_X::create(0.0_X); + getSuperCellPos(voronoiCell.meanPositionValue, particleCellIdx, relativePosition); + lcellId_t localCellIdx = pmacc::math::linearize( + pmacc::math::CT::shrinkTo::type::toRT(), + particleCellIdx); + + particle[localCellIdx_] = localCellIdx; + particle[position_] = relativePosition; + /* Here the voronoiCell.meanMomentumValue is for a single particle, + * multiply to make it for macroparticle + */ + particle[momentum_] = voronoiCell.meanMomentumValue * voronoiCell.numRealParticles; + particle[weighting_] = voronoiCell.numRealParticles; + } + else + { + // I am not the first particle in the Voronoi cell => remove me + particle[multiMask_] = 0; + } + } + + /** This method handles the merging process on the single-particle level. + * + * It is called in the main loop of the merging algorithm. + * Depending on the state of the Voronoi cell where the particle belongs + * to the execution is forked into distinct sub-processes. + * + * @tparam T_Acc accelerator type + * + * @param acc accelerator + * @param cellIdx n-dimensional cell index from the origin of the local domain + * @param listVoronoiCells fixed-sized array of Voronoi cells + */ + template + DINLINE void processParticles( + T_Acc const& acc, + const pmacc::math::Int& cellIdx, + ArrayVoronoiCells& listVoronoiCells) + { + //! \todo change this as soon as the kernel support lock step programming + constexpr uint32_t numWorkers = pmacc::math::CT::volume::type::value; + const uint32_t workerIdx + = DataSpaceOperations::template map(cellIdx % SuperCellSize::toRT()); + particleAccess::Cell2Particle forEachFrame; + forEachFrame( + acc, + this->particlesBox, + workerIdx, + cellIdx, + [&](const T_Acc& acc, FramePtr frame, const int linearThreadIdx) { + auto particle = frame[linearThreadIdx]; + const voronoiCellId::type voronoiCellId = particle[voronoiCellId_]; + + if(voronoiCellId == -1) + return; + + VoronoiCell& voronoiCell = listVoronoiCells[voronoiCellId]; + + const floatD_X position + = this->getParticlePosWithinSuperCell(particle[localCellIdx_], particle[position_]); + + const float_X weighting = particle[weighting_]; + /* Algorithm internally operates with momentums for single + * particles, not macroparticles, so convert + */ + const float3_X singleParticleMomentum = particle[momentum_] / weighting; + + switch(voronoiCell.status) + { + case VoronoiStatus::collecting: + voronoiCell.addParticle(acc, position, singleParticleMomentum, weighting); + break; + + case VoronoiStatus::splitting: + { + const voronoiCellId::type subVoronoiCellId + = voronoiCell.getSubVoronoiCell(position, singleParticleMomentum); + particle[voronoiCellId_] = subVoronoiCellId; + listVoronoiCells[subVoronoiCellId] + .addParticle(acc, position, singleParticleMomentum, weighting); + + break; + } + + case VoronoiStatus::abort: + particle[voronoiCellId_] = -1; + break; + + case VoronoiStatus::readyForMerging: + mergeVoronoiCell(acc, particle, voronoiCell); + particle[voronoiCellId_] = -1; + } + }, + particles::filter::All{}); + } + + /** This method handles the merging process on the Voronoi cell level. + * + * It is called in the main loop of the merging algorithm. + * It does the transition of the distinct states of each Voronoi cell. + * + * @param listVoronoiCells fixed-sized array of Voronoi cells + * @param voronoiIndexPool holds indices of active Voronoi cells within `listVoronoiCells` + * @param randomGen random generator functor + */ + DINLINE void processVoronoiCells( + ArrayVoronoiCells& listVoronoiCells, + VoronoiIndexPool& voronoiIndexPool, + RandomGen& randomGen) const + { + for(voronoiCellId::type voronoiCellId : voronoiIndexPool) + { + VoronoiCell& voronoiCell = listVoronoiCells[voronoiCellId]; + switch(voronoiCell.status) + { + case VoronoiStatus::collecting: + if(voronoiCell.numMacroParticles < 2) + { + voronoiCell.setToAbort(); + break; + } + voronoiCell.finalizePrecalculationValues(maxParticlesToMerge, ratioKeptParticles); + + // Check if subdivision is needed probabilistically + if(isNeededSubdivision(randomGen, voronoiCell)) + { + uint8_t splittingComponent; + float_X maxSpreadValue = voronoiCell.getMaxValueSpread2(splittingComponent, simDim); + + // Continue only when the subdivision makes sense in terms of the spread + if(!isSpreadEnoughForSubdivision(splittingComponent, voronoiCell)) + break; + + voronoiCell.setToSplitting( + splittingComponent, + voronoiIndexPool.get(), + voronoiIndexPool.get()); + + // Abort when no memory for more Voronoi cells + if(voronoiCell.lowerCellId == -1 || voronoiCell.higherCellId == -1) + { + voronoiCell.setToAbort(); + break; + } + + // For better subdivision, change the splitting state each step + VoronoiSplittingStage currentVoronoiStage; + if(voronoiCell.splittingStage == VoronoiSplittingStage::position) + currentVoronoiStage = VoronoiSplittingStage::momentum; + else + currentVoronoiStage = VoronoiSplittingStage::position; + + /* initialize the two new sub Voronoi cells in `collecting` state */ + listVoronoiCells[voronoiCell.lowerCellId] = VoronoiCell( + currentVoronoiStage, + voronoiCell.numMacroParticles, + voronoiCell.expectedNumMacroParticles); + listVoronoiCells[voronoiCell.higherCellId] = VoronoiCell( + currentVoronoiStage, + voronoiCell.numMacroParticles, + voronoiCell.expectedNumMacroParticles); + + break; + } + else + { + voronoiCell.setToReadyForMerging(); + break; + } + + default: + voronoiIndexPool.release(voronoiCellId); + break; + } + } + } + + /** Entry point of the particle merging algorithm + * + * @tparam T_Acc accelerator type + * + * @param acc accelerator + * @param cellIndex n-dimensional cell index from the origin of the local domain + */ + template + DINLINE void operator()(T_Acc const& acc, const pmacc::math::Int& cellIndex) + { + // multi-dim vector from origin of the super cell to a cell in units of cells + const pmacc::math::Int threadIndex = cellIndex % SuperCellSize::toRT(); + const int linearThreadIdx = pmacc::math::linearize( + pmacc::math::CT::shrinkTo::type::toRT(), + threadIndex); + + // Storage for Voronoi cells in shared memory + PMACC_SMEM(acc, listVoronoiCells, ArrayVoronoiCells); + PMACC_SMEM(acc, voronoiIndexPool, VoronoiIndexPool); + + /* number of initial Voronoi cells + * `1u << simDim` is equivalent to `pow(2, simDim)` but can be + * calculated at compile-time to save a shared variable. + */ + constexpr uint16_t numInitialVoronoiCells + = pmacc::math::CT::volume::type::value / (1u << simDim); + + + pmacc::math::Int localOffset = cellIndex / SuperCellSize::toRT() - guardSuperCells; + constexpr uint32_t numWorkers = pmacc::math::CT::volume::type::value; + pmacc::mappings::threads::WorkerCfg workerCfg(linearThreadIdx); + + // Thread 0 of each block creates Voronoi cells + if(linearThreadIdx == 0) + { + voronoiIndexPool = VoronoiIndexPool(numInitialVoronoiCells); + } + __syncthreads(); + + // Set initial Voronoi cells into `collecting` state + if(linearThreadIdx < numInitialVoronoiCells) + listVoronoiCells[linearThreadIdx] = VoronoiCell(); + __syncthreads(); + + // Distribute particle between original cells + initVoronoiCellIdAttribute(acc, cellIndex); + __syncthreads(); + + auto generator = rngFactory(acc, localOffset, workerCfg); + // Main loop of the algorithm: while there are active cells left + while(voronoiIndexPool.size() > 0) + { + processParticles(acc, cellIndex, listVoronoiCells); + __syncthreads(); + + // This part is not yet parallelized between blocks of a thread + if(linearThreadIdx == 0) + processVoronoiCells(listVoronoiCells, voronoiIndexPool, generator); + __syncthreads(); + } + } + }; + + } // namespace randomizedParticleMerger + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/randomizedParticleMerger/VoronoiCell.hpp b/include/picongpu/plugins/randomizedParticleMerger/VoronoiCell.hpp new file mode 100644 index 0000000000..6e12233f7a --- /dev/null +++ b/include/picongpu/plugins/randomizedParticleMerger/VoronoiCell.hpp @@ -0,0 +1,372 @@ +/* Copyright 2017-2021 Heiko Burau, Xeinia Bastrakova, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/algorithms/KinEnergy.hpp" + +#include + +#include + + +namespace picongpu +{ + namespace plugins + { + namespace randomizedParticleMerger + { + //! Status of a Voronoi cell + enum struct VoronoiStatus : uint8_t + { + /* !< a Voronoi cell is collecting particles (first state) */ + collecting, + /* !< the Voronoi cell is splitting thus all its particles have + * to move to one of two sub-Voronoi cells + */ + splitting, + /* !< the cell needs to be destroyed. Before this can happen + * all its particles need to clear their voronoiCellId attribute. + */ + abort, + /* !< the Voronoi cell is ready for merging. After merging it is destroyed. */ + readyForMerging, + }; + + + /** Stage of a Voronoi cell + * + * The spliiting process is two-fold: at first, the splitting is done regarding + * only the spread in position and then by looking at the spread of momentum. + */ + enum struct VoronoiSplittingStage : bool + { + /* !< the spatial distribution is splitted */ + position, + /* !< the momentum distribution is splitted */ + momentum + }; + + //! Voronoi cell representation + struct VoronoiCell + { + VoronoiStatus status; + VoronoiSplittingStage splittingStage; + /** number of macroparticles */ + uint32_t numMacroParticles; + /** number of physical particles */ + float_X numRealParticles; + + /** value of weighted mean momentum for + all physical particles in cell */ + float3_X meanMomentumValue; + /** value of weighted mean position for + all physical particles in cell */ + float3_X meanPositionValue; + /** value of weighted squared mean momentum for + all physical particles in cell */ + float3_X meanMomentumSquaredValue; + /** value of weighted squared mean position for + all physical particles in cell */ + float3_X meanPositionSquaredValue; + + /** axis on which the Voronoi cell is divided */ + uint8_t splittingComponent; + /** cell index of a child "lower" subcelld */ + int32_t lowerCellId; + /** cell index of a child "upper" subcelld */ + int32_t higherCellId; + /** is this particle first in voronoi cell */ + int firstParticleFlag; + /** necessary for probalic algorithm. expected number of particles, + which should be obtained after merging the particles in the current cell */ + float_X expectedNumMacroParticles; + /** number of macroparticles in parent cell */ + uint32_t parentNumMacroParticles; + /** necessary for probalic algorithm. expected number of particles, + which should be obtained after merging the particles in the parent cell */ + float_X parentExpectedNumMacroParticles; + + HDINLINE + VoronoiCell( + VoronoiSplittingStage splittingStage = VoronoiSplittingStage::position, + float_X parentNumMacroParticles = 0.0_X, + float_X parentExpectedNumMacroParticles = float_X(-1.0)) + : status(VoronoiStatus::collecting) + , splittingStage(splittingStage) + , numMacroParticles(0u) + , numRealParticles(float_X(0.0_X)) + , meanMomentumValue(float3_X::create(0.0_X)) + , meanPositionValue(float3_X::create(0.0_X)) + , meanMomentumSquaredValue(float3_X::create(0.0_X)) + , meanPositionSquaredValue(float3_X::create(0.0_X)) + , firstParticleFlag(0) + , expectedNumMacroParticles(0.0_X) + , parentNumMacroParticles(parentNumMacroParticles) + , parentExpectedNumMacroParticles(parentExpectedNumMacroParticles) + + { + } + + /** status setter */ + HDINLINE + void setToAbort() + { + status = VoronoiStatus::abort; + } + + + /** Mark the cell for splitting + * + * @param splittingComponent index of position or momentum component + * to use for splitting + * @param lowerCellId cell index of a new "lower" subcell + * @param higherCellId cell index of a new "upper" subcell + */ + HDINLINE + void setToSplitting( + const uint8_t splittingComponent, + const int32_t lowerCellId, + const int32_t higherCellId) + { + status = VoronoiStatus::splitting; + this->splittingComponent = splittingComponent; + this->lowerCellId = lowerCellId; + this->higherCellId = higherCellId; + } + + + /** status setter */ + HDINLINE + void setToReadyForMerging() + { + this->status = VoronoiStatus::readyForMerging; + } + + /** check if the current thread is associated to the first particle */ + template + DINLINE bool isFirstParticle(const T_Acc& acc) + { + return atomicExch(&this->firstParticleFlag, 1) == 0; + } + + + /** add a particle to this Voronoi cell */ + template + DINLINE void addParticle( + const T_Acc& acc, + const floatD_X position, + const float3_X momentum, + const float_X weighting) + { + cupla::atomicAdd( + acc, + &this->numMacroParticles, + static_cast(1), + ::alpaka::hierarchy::Threads{}); + cupla::atomicAdd(acc, &this->numRealParticles, weighting, ::alpaka::hierarchy::Threads{}); + + const floatD_X position2 = position * position; + + for(int i = 0; i < simDim; i++) + { + cupla::atomicAdd( + acc, + &this->meanPositionValue[i], + weighting * position[i], + ::alpaka::hierarchy::Threads{}); + cupla::atomicAdd( + acc, + &this->meanPositionSquaredValue[i], + weighting * position2[i], + ::alpaka::hierarchy::Threads{}); + } + + const float3_X momentum2 = momentum * momentum; + + for(int i = 0; i < DIM3; i++) + { + cupla::atomicAdd( + acc, + &this->meanMomentumValue[i], + weighting * momentum[i], + ::alpaka::hierarchy::Threads{}); + cupla::atomicAdd( + acc, + &this->meanMomentumSquaredValue[i], + weighting * momentum2[i], + ::alpaka::hierarchy::Threads{}); + } + } + + /** Counting parameters that are necessary before processing vornoi cell: + * mean values and expected number of macro particles + * + * @param minMacroParticlesToDivide min number of macroparticles in a cell + * such that the cell is always subdivided + * @param ratioKeptParticles ratio of particles that are kept on average + */ + HDINLINE + void finalizePrecalculationValues( + const uint32_t minMacroParticlesToDivide, + const float_X ratioKeptParticles) + { + finalizeMeanValues(); + finalizeExpectedNumberParticles(minMacroParticlesToDivide, ratioKeptParticles); + } + + //! Finalize calculation of mean values + HDINLINE + void finalizeMeanValues() + { + meanMomentumValue /= numRealParticles; + meanPositionValue /= numRealParticles; + meanMomentumSquaredValue /= numRealParticles; + meanPositionSquaredValue /= numRealParticles; + } + + /** Count expected number of particles in the cell + * + * @param minMacroParticlesToDivide min number of macroparticles in a cell + * such that the cell is always subdivided + * @param ratioKeptParticles ratio of particles that are kept on average + */ + HDINLINE + void finalizeExpectedNumberParticles( + const uint32_t minMacroParticlesToDivide, + const float_X ratioKeptParticles) + { + // Special case for the original voronoi cells + if(parentExpectedNumMacroParticles < 0) + { + expectedNumMacroParticles = numMacroParticles * ratioKeptParticles; + return; + } + + // Algorithm stop conditions for 1 and 2 macroparticles + if(numMacroParticles == 1u) + expectedNumMacroParticles = 1.0_X; + if(numMacroParticles == 2u && parentNumMacroParticles == 3u) + expectedNumMacroParticles = 2.0_X; + + // Normal subdivision step + if(parentNumMacroParticles > minMacroParticlesToDivide) + { + expectedNumMacroParticles = numMacroParticles * ratioKeptParticles; + } + else + { + float_X undividedCellCoeff + = (parentExpectedNumMacroParticles + parentNumMacroParticles) / 2.0_X; + float_X currentExpectedNumMacroParticles + = numMacroParticles * undividedCellCoeff / parentNumMacroParticles; + expectedNumMacroParticles = currentExpectedNumMacroParticles; + } + } + + /** determine in which of the two sub-Voronoi cells a particle falls */ + HDINLINE + int32_t getSubVoronoiCell(const floatD_X position, const float3_X momentum) const + { + const float_X valParticle = splittingStage == VoronoiSplittingStage::position + ? position[splittingComponent] + : momentum[splittingComponent]; + const float_X meanVoronoi = splittingStage == VoronoiSplittingStage::position + ? meanPositionValue[splittingComponent] + : meanMomentumValue[splittingComponent]; + return valParticle < meanVoronoi ? lowerCellId : higherCellId; + } + + /** Counting parameters that are necessary before processing vornoi cell: + * mean values and expected number of macro particles + * + * @param minMacroParticlesToDivide min number of macroparticles in a cell + * such that the cell is always subdivided + * @param ratioKeptParticles ratio of particles that are kept on average + * @return maximum spread value + * @return component of most spread in position (as function parameter) + */ + + /** auxillary function for getting the mean squared deviation in position or momentum */ + HDINLINE + float_X getMaxValueSpread2(uint8_t& component, const uint8_t dimension) const + { + const float3_X meanValue2 = splittingStage == VoronoiSplittingStage::position + ? meanPositionValue * meanPositionValue + : meanMomentumValue * meanMomentumValue; + + const float3_X valueSpread2 = splittingStage == VoronoiSplittingStage::position + ? meanPositionSquaredValue - meanValue2 + : meanMomentumSquaredValue - meanValue2; + + /* find component of most spread in position */ + component = 0; + float_X maxValueSpread2 = valueSpread2[0]; + for(uint8_t i = 1; i < dimension; i++) + { + if(valueSpread2[i] > maxValueSpread2) + { + maxValueSpread2 = valueSpread2[i]; + component = i; + } + } + + return maxValueSpread2; + } + + + /** calculate the maxmimum squared spread in position + * + * @param component index of position component of maxmimum spread + * @return maxmimum squared spread in position + * @return from uint8_t& component argument -- axis of maximum spread + */ + HDINLINE + float_X getMaxPositionSpread2(uint8_t& component) const + { + return getMaxValueSpread2(component, simDim); + } + + + /** calculate the maxmimum squared spread in momentum + * + * @param component index of momentum component of maxmimum spread + * @return maxmimum squared spread in momentum + * @return from uint8_t& component argument -- axis of maximum squared spread + */ + HDINLINE + float_X getMaxMomentumSpread2(uint8_t& component) const + { + return getMaxValueSpread2(component, DIM3); + } + + /** invesing splitting stage */ + HDINLINE + void invertSplittingStage() + { + if(splittingStage == VoronoiSplittingStage::position) + splittingStage = VoronoiSplittingStage::momentum; + else + splittingStage = VoronoiSplittingStage::position; + } + }; + + } // namespace randomizedParticleMerger + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/transitionRadiation/Calculator.hpp b/include/picongpu/plugins/transitionRadiation/Calculator.hpp index 9dd452592e..3ccf0a522e 100644 --- a/include/picongpu/plugins/transitionRadiation/Calculator.hpp +++ b/include/picongpu/plugins/transitionRadiation/Calculator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, Finn-Ole Carstens +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, Finn-Ole Carstens * * This file is part of PIConGPU. * @@ -24,217 +24,184 @@ namespace picongpu { -namespace plugins -{ -namespace transitionRadiation -{ - using complex_X = pmacc::math::Complex< float_X >; - using complex_64 = pmacc::math::Complex< float_64 >; - - /* Arbitrary margin which is necessary to prevent division by 0 error - * created by particles moving in the plane of the foil. - */ - float_X const DIV_BY_ZERO_MINIMUM = 1.e-7; - - /** Calculator class for calculation of transition radiation. - * - * @param particleSet transitionRadiation::Particle to compute transition radiation for - * @param lookDirection vector of observation direction - */ - class Calculator + namespace plugins { - - private: - transitionRadiation::Particle const & particle; - float3_X const & lookDirection; - - float_X parMomSinTheta; - float_X parMomCosTheta; - float_X const parMomPhi; - float_X parMomSinPhi; - float_X parMomCosPhi; - float_X detectorSinTheta; - float_X detectorCosTheta; - float_X const detectorPhi; - float_X const uSquared; - float_X const parSqrtOnePlusUSquared; - - public: - HDINLINE - Calculator( - transitionRadiation::Particle const & particleSet, - float3_X const & lookDirection - ) : - particle( particleSet ), - lookDirection( lookDirection ), - parMomPhi( particle.getMomPhi( ) ), - // one has to add pi to the polar angle, because phi is in the range of 0 to 2 \pi - detectorPhi( - picongpu::math::atan2( - lookDirection.z( ), - lookDirection.x( ) - ) + picongpu::PI - ), - uSquared( particle.getU( ) * particle.getU( ) ), - parSqrtOnePlusUSquared( - picongpu::math::sqrt( 1 + uSquared ) - ) - { - // frequent calculations - // momentum Space for Particle: - picongpu::math::sincos( - particle.getMomTheta( ), - parMomSinTheta, - parMomCosTheta - ); - picongpu::math::sincos( - parMomPhi - detectorPhi, - parMomSinPhi, - parMomCosPhi - ); - - // detector Position since lookDirection is normalized - float_X const detectorTheta = picongpu::math::acos( lookDirection.y( ) ); - - picongpu::math::sincos( - detectorTheta, - detectorSinTheta, - detectorCosTheta - ); - } - - /** Perpendicular part of normalized energy - * - * Calculates perpendicular part to movement direction of normalized energy - * determined by formula: - * @f[E_{perp} = (u^2 \cos{\psi} \sin{\psi} \sin{\phi} \cos{\theta}) / - * ((\sqrt{1 + u^2} - u \sin{\psi} \cos{\phi} \sin{\theta})^2 - u^2 \cos{\phi}^2 \cos{\theta}^2)@f] - * where \psi is the azimuth angle of the particle momentum and \theta is - * the azimuth angle of the detector position to the movement direction y - * - * @return perpendicular part of normalized energy - */ - HDINLINE - float_X - calcEnergyPerp( ) const + namespace transitionRadiation { - // a, x and y are temporary variables without an explicit physical meaning - float_X const a = uSquared * parMomCosTheta * parMomSinTheta * - parMomSinPhi * detectorCosTheta; - - // Denominator - float_X const x = parSqrtOnePlusUSquared - - particle.getU( ) * parMomSinTheta * parMomCosPhi * detectorSinTheta; - float_X const y = particle.getU( ) * parMomCosTheta * detectorCosTheta; - - float_X denominator = x * x - y * y; - - // Preventing division by 0 - if( math::abs( denominator ) < DIV_BY_ZERO_MINIMUM ) + using complex_X = pmacc::math::Complex; + using complex_64 = pmacc::math::Complex; + + /* Arbitrary margin which is necessary to prevent division by 0 error + * created by particles moving in the plane of the foil. + */ + float_X const DIV_BY_ZERO_MINIMUM = 1.e-7; + + /** Calculator class for calculation of transition radiation. + * + * @param particleSet transitionRadiation::Particle to compute transition radiation for + * @param lookDirection vector of observation direction + */ + class Calculator { - if( denominator < 0.0 ) - denominator = -DIV_BY_ZERO_MINIMUM; - else - denominator = DIV_BY_ZERO_MINIMUM; - } - - return a / denominator; - } - - /** Parallel part of normalized energy - * - * Calculates parallel part to movement direction of normalized energy - * determined by formula: - * @f[E_{para} = (u \cos{\psi} (u \sin{\psi} \cos{\phi} - \sqrt{1 + u^2} \sin{\theta}) / - * ((\sqrt{1 + u^2} - u \sin{\psi} \cos{\phi} \sin{\theta})^2 - u^2 \cos{\phi}^2 \cos{\theta}^2)@f] - * where \psi is the azimuth angle of the particle momentum and \theta is - * the azimuth angle of the detector position to the movement direction y - * - * @return parallel part of normalized energy - */ - HDINLINE - float_X - calcEnergyPara( ) const - { - // a, b, c, x and y are just temporary variables without an explicit physical meaning - float_X const a = particle.getU( ) * parMomCosTheta; - float_X const b = particle.getU( ) * parMomSinTheta * parMomCosPhi; - float_X const c = parSqrtOnePlusUSquared * detectorSinTheta; - - // Denominator - float_X const x = parSqrtOnePlusUSquared - - particle.getU( ) * parMomSinTheta * parMomCosPhi * detectorSinTheta; - float_X const y = particle.getU( ) * parMomCosTheta * detectorCosTheta; - - float_X denominator = x * x - y * y; - - // Preventing division by 0 - if( math::abs( denominator ) < DIV_BY_ZERO_MINIMUM ) + private: + transitionRadiation::Particle const& particle; + float3_X const& lookDirection; + + float_X parMomSinTheta; + float_X parMomCosTheta; + float_X const parMomPhi; + float_X parMomSinPhi; + float_X parMomCosPhi; + float_X detectorSinTheta; + float_X detectorCosTheta; + float_X const detectorPhi; + float_X const uSquared; + float_X const parSqrtOnePlusUSquared; + + public: + HDINLINE + Calculator(transitionRadiation::Particle const& particleSet, float3_X const& lookDirection) + : particle(particleSet) + , lookDirection(lookDirection) + , parMomPhi(particle.getMomPhi()) + , + // one has to add pi to the polar angle, because phi is in the range of 0 to 2 \pi + detectorPhi(picongpu::math::atan2(lookDirection.z(), lookDirection.x()) + picongpu::PI) + , uSquared(particle.getU() * particle.getU()) + , parSqrtOnePlusUSquared(picongpu::math::sqrt(1 + uSquared)) + { + // frequent calculations + // momentum Space for Particle: + pmacc::math::sincos(particle.getMomTheta(), parMomSinTheta, parMomCosTheta); + pmacc::math::sincos(parMomPhi - detectorPhi, parMomSinPhi, parMomCosPhi); + + // detector Position since lookDirection is normalized + float_X const detectorTheta = picongpu::math::acos(lookDirection.y()); + + pmacc::math::sincos(detectorTheta, detectorSinTheta, detectorCosTheta); + } + + /** Perpendicular part of normalized energy + * + * Calculates perpendicular part to movement direction of normalized energy + * determined by formula: + * @f[E_{perp} = (u^2 \cos{\psi} \sin{\psi} \sin{\phi} \cos{\theta}) / + * ((\sqrt{1 + u^2} - u \sin{\psi} \cos{\phi} \sin{\theta})^2 - u^2 \cos{\phi}^2 + * \cos{\theta}^2)@f] where \psi is the azimuth angle of the particle momentum and \theta is the + * azimuth angle of the detector position to the movement direction y + * + * @return perpendicular part of normalized energy + */ + HDINLINE + float_X calcEnergyPerp() const + { + // a, x and y are temporary variables without an explicit physical meaning + float_X const a = uSquared * parMomCosTheta * parMomSinTheta * parMomSinPhi * detectorCosTheta; + + // Denominator + float_X const x + = parSqrtOnePlusUSquared - particle.getU() * parMomSinTheta * parMomCosPhi * detectorSinTheta; + float_X const y = particle.getU() * parMomCosTheta * detectorCosTheta; + + float_X denominator = x * x - y * y; + + // Preventing division by 0 + if(math::abs(denominator) < DIV_BY_ZERO_MINIMUM) + { + if(denominator < 0.0) + denominator = -DIV_BY_ZERO_MINIMUM; + else + denominator = DIV_BY_ZERO_MINIMUM; + } + + return a / denominator; + } + + /** Parallel part of normalized energy + * + * Calculates parallel part to movement direction of normalized energy + * determined by formula: + * @f[E_{para} = (u \cos{\psi} (u \sin{\psi} \cos{\phi} - \sqrt{1 + u^2} \sin{\theta}) / + * ((\sqrt{1 + u^2} - u \sin{\psi} \cos{\phi} \sin{\theta})^2 - u^2 \cos{\phi}^2 + * \cos{\theta}^2)@f] where \psi is the azimuth angle of the particle momentum and \theta is the + * azimuth angle of the detector position to the movement direction y + * + * @return parallel part of normalized energy + */ + HDINLINE + float_X calcEnergyPara() const + { + // a, b, c, x and y are just temporary variables without an explicit physical meaning + float_X const a = particle.getU() * parMomCosTheta; + float_X const b = particle.getU() * parMomSinTheta * parMomCosPhi; + float_X const c = parSqrtOnePlusUSquared * detectorSinTheta; + + // Denominator + float_X const x + = parSqrtOnePlusUSquared - particle.getU() * parMomSinTheta * parMomCosPhi * detectorSinTheta; + float_X const y = particle.getU() * parMomCosTheta * detectorCosTheta; + + float_X denominator = x * x - y * y; + + // Preventing division by 0 + if(math::abs(denominator) < DIV_BY_ZERO_MINIMUM) + { + if(denominator < 0.0) + denominator = -DIV_BY_ZERO_MINIMUM; + else + denominator = DIV_BY_ZERO_MINIMUM; + } + + return a * (b - c) / denominator; + } + + /** Exponent of form factor + * + * Calculates the exponent of the formfactor divided by \omega + * It represents the phase of a single electron in the bunch, but it is mostly + * calculated for performance reasons. + * \f[ F_exp = - i z ( 1 / v - \sin{\theta} \sin{\psi} \cos{\phi_P - \phi_D} / c ) / \cos{\phi} + * - i \sin{\theta} \rho \cos{\phi_P - \phi_D} \f] + * + */ + HDINLINE + complex_X calcFormFactorExponent() const + { + // If case for longitudinal moving particles... leads to 0 later in the kernel + if(math::abs(parMomCosTheta) <= DIV_BY_ZERO_MINIMUM) + return complex_X(-1.0, 0.0); + + float_X const a = detectorSinTheta * parMomSinTheta * math::cos(parMomPhi - detectorPhi); + float_X const b + = -(particle.getPosPara()) * (1 / particle.getVel() - a / SPEED_OF_LIGHT) / (parMomCosTheta); + float_X const c + = -detectorSinTheta * particle.getPosPerp() * math::cos(particle.getPosPhi() - detectorPhi); + + complex_X const fpara = complex_X(0.0, b); + complex_X const fperp = complex_X(0.0, c); + return fpara + fperp; + } + }; // class Calculator + + /** Formfactor + * + * Calculates of the electron bunch with the exponent calculated by the + * Calculator class. + * + * @f[F = \exp{ F_{exp} * \omega }@f] + * + * @param omega observed frequency + * @param exponent exponent of exponential function + */ + HDINLINE + complex_X calcFormFactor(float_X const omega, complex_X const exponent) { - if( denominator < 0.0 ) - denominator = -DIV_BY_ZERO_MINIMUM; - else - denominator = DIV_BY_ZERO_MINIMUM; + // preventing division by 0 + const bool longMovingParticle = exponent.get_real() == -1.0; + return float_X(longMovingParticle) * complex_X(0.0, 0.0) + + float_X(!longMovingParticle) * complex_X(math::exp(exponent * omega)); } - return a * ( b - c ) / denominator; - } - - /** Exponent of form factor - * - * Calculates the exponent of the formfactor divided by \omega - * It represents the phase of a single electron in the bunch, but it is mostly - * calculated for performance reasons. - * \f[ F_exp = - i z ( 1 / v - \sin{\theta} \sin{\psi} \cos{\phi_P - \phi_D} / c ) / \cos{\phi} - * - i \sin{\theta} \rho \cos{\phi_P - \phi_D} \f] - * - */ - HDINLINE - complex_X - calcFormFactorExponent( ) const - { - // If case for longitudinal moving particles... leads to 0 later in the kernel - if ( math::abs( parMomCosTheta ) <= DIV_BY_ZERO_MINIMUM ) - return complex_X( -1.0, 0.0 ); - - float_X const a = detectorSinTheta * parMomSinTheta * math::cos( parMomPhi - detectorPhi ); - float_X const b = - ( particle.getPosPara( ) ) * ( 1 / particle.getVel( ) - a / SPEED_OF_LIGHT) / ( parMomCosTheta ); - float_X const c = - detectorSinTheta * particle.getPosPerp( ) * math::cos( particle.getPosPhi( ) - detectorPhi ); - - complex_X const fpara = complex_X( 0.0, b ); - complex_X const fperp = complex_X( 0.0, c ); - return fpara + fperp; - - } - }; // class Calculator - - /** Formfactor - * - * Calculates of the electron bunch with the exponent calculated by the - * Calculator class. - * - * @f[F = \exp{ F_{exp} * \omega }@f] - * - * @param omega observed frequency - * @param exponent exponent of exponential function - */ - HDINLINE - complex_X - calcFormFactor( - float_X const omega, - complex_X const exponent - ) - { - // preventing division by 0 - const bool longMovingParticle = exponent.get_real() == -1.0; - return float_X( longMovingParticle ) * complex_X( 0.0, 0.0 ) + - float_X( !longMovingParticle ) * complex_X( - math::exp( - exponent * omega - ) - ); - } - -} // namespace transitionRadiation -} // namespace plugins + } // namespace transitionRadiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/transitionRadiation/ExecuteParticleFilter.hpp b/include/picongpu/plugins/transitionRadiation/ExecuteParticleFilter.hpp index 7b967af8ce..1f7df26cd7 100644 --- a/include/picongpu/plugins/transitionRadiation/ExecuteParticleFilter.hpp +++ b/include/picongpu/plugins/transitionRadiation/ExecuteParticleFilter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera, Finn-Ole Carstens +/* Copyright 2017-2021 Rene Widera, Finn-Ole Carstens * * This file is part of PIConGPU. * @@ -31,69 +31,65 @@ namespace picongpu { -namespace plugins -{ -namespace transitionRadiation -{ - - /** read the `transitionRadiationMask` of a species */ - template< bool hasFilter > - struct ExecuteParticleFilter + namespace plugins { - /** get the attribute value of `transitionRadiationMask` - * - * @param species buffer - * @param currentStep current simulation time step - * @return value of the attribute `transitionRadiationMask` - */ - template< typename T_Species > - void operator()( std::shared_ptr const &, const uint32_t currentStep ) + namespace transitionRadiation { - particles::Manipulate< - picongpu::plugins::transitionRadiation::GammaFilter, - T_Species - >{ }( currentStep ); - } - }; + /** read the `transitionRadiationMask` of a species */ + template + struct ExecuteParticleFilter + { + /** get the attribute value of `transitionRadiationMask` + * + * @param species buffer + * @param currentStep current simulation time step + * @return value of the attribute `transitionRadiationMask` + */ + template + void operator()(std::shared_ptr const&, const uint32_t currentStep) + { + particles::Manipulate{}( + currentStep); + } + }; - /** specialization - * - * specialization for the case that the species does not have the attribute - * `transitionRadiationMask` - */ - template< > - struct ExecuteParticleFilter< false > - { - /** get the attribute value of `transitionRadiationMask` - * - * @param particle to be used - * @return always true - */ - template< typename T_Species > - void operator()( const std::shared_ptr, const uint32_t currentStep ) - { } - }; + /** specialization + * + * specialization for the case that the species does not have the attribute + * `transitionRadiationMask` + */ + template<> + struct ExecuteParticleFilter + { + /** get the attribute value of `transitionRadiationMask` + * + * @param particle to be used + * @return always true + */ + template + void operator()(const std::shared_ptr, const uint32_t currentStep) + { + } + }; - /** execute the particle filter on a species - * - * It is **allowed** to call this function even if the species does not contain - * the attribute `transitionRadiationMask`. - * The filter is **not** executed if the species does not contain the attribute `transitionRadiationMask`. - * - * @tparam T_Species species type - * @param species species to be filtered - */ - template< typename T_Species > - void executeParticleFilter( std::shared_ptr& species, const uint32_t currentStep ) - { - constexpr bool hasRadiationFilter = pmacc::traits::HasIdentifier< - typename T_Species::FrameType, - transitionRadiationMask - >::type::value; + /** execute the particle filter on a species + * + * It is **allowed** to call this function even if the species does not contain + * the attribute `transitionRadiationMask`. + * The filter is **not** executed if the species does not contain the attribute `transitionRadiationMask`. + * + * @tparam T_Species species type + * @param species species to be filtered + */ + template + void executeParticleFilter(std::shared_ptr& species, const uint32_t currentStep) + { + constexpr bool hasRadiationFilter = pmacc::traits:: + HasIdentifier::type::value; - return ExecuteParticleFilter< hasRadiationFilter >{ }( species, currentStep ); - } + return ExecuteParticleFilter{}(species, currentStep); + } -} // namespace transitionRadiation -} // namespace plugins + } // namespace transitionRadiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/transitionRadiation/GammaMask.hpp b/include/picongpu/plugins/transitionRadiation/GammaMask.hpp index 06172d696c..c55b847fe8 100644 --- a/include/picongpu/plugins/transitionRadiation/GammaMask.hpp +++ b/include/picongpu/plugins/transitionRadiation/GammaMask.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera, Finn-Ole Carstens +/* Copyright 2017-2021 Rene Widera, Finn-Ole Carstens * * This file is part of PIConGPU. * @@ -26,65 +26,64 @@ namespace picongpu { -namespace plugins -{ -namespace transitionRadiation -{ - /** read the `transitionRadiationMask` of a species */ - template< bool hasTransitionRadiationMask > - struct GetTransitionRadiationMask + namespace plugins { - /** get the attribute value of `transitionRadiationMask` - * - * @param particle particle to be used - * @return value of the attribute `transitionRadiationMask` - */ - template< typename T_Particle > - HDINLINE bool operator()( const T_Particle& particle ) const + namespace transitionRadiation { - return particle[ transitionRadiationMask_ ]; - } - }; + /** read the `transitionRadiationMask` of a species */ + template + struct GetTransitionRadiationMask + { + /** get the attribute value of `transitionRadiationMask` + * + * @param particle particle to be used + * @return value of the attribute `transitionRadiationMask` + */ + template + HDINLINE bool operator()(const T_Particle& particle) const + { + return particle[transitionRadiationMask_]; + } + }; - /** specialization - * - * specialization for the case that the species not owns the attribute - * `transitionRadiationMask` - */ - template< > - struct GetTransitionRadiationMask< false > - { - /** get the attribute value of `transitionRadiationMask` - * - * @param particle to be used - * @return always true - */ - template< typename T_Particle > - HDINLINE bool operator()( const T_Particle& ) const - { - return true; - } - }; + /** specialization + * + * specialization for the case that the species not owns the attribute + * `transitionRadiationMask` + */ + template<> + struct GetTransitionRadiationMask + { + /** get the attribute value of `transitionRadiationMask` + * + * @param particle to be used + * @return always true + */ + template + HDINLINE bool operator()(const T_Particle&) const + { + return true; + } + }; - /** get the value of the particle attribute `transitionRadiationMask` - * - * Allow to read out the value of the attribute `transitionRadiationMask` also if - * it is not defined for the particle. - * - * @tparam T_Particle particle type - * @param particle valid particle - * @return particle attribute value `transitionRadiationMask`, always `true` if attribute `transitionRadiationMask` is not defined - */ - template< typename T_Particle > - HDINLINE bool getTransitionRadiationMask( const T_Particle& particle ) - { - constexpr bool hasTransitionRadiationMask = pmacc::traits::HasIdentifier< - typename T_Particle::FrameType, - transitionRadiationMask - >::type::value; - return GetTransitionRadiationMask< hasTransitionRadiationMask >{}( particle ); - } + /** get the value of the particle attribute `transitionRadiationMask` + * + * Allow to read out the value of the attribute `transitionRadiationMask` also if + * it is not defined for the particle. + * + * @tparam T_Particle particle type + * @param particle valid particle + * @return particle attribute value `transitionRadiationMask`, always `true` if attribute + * `transitionRadiationMask` is not defined + */ + template + HDINLINE bool getTransitionRadiationMask(const T_Particle& particle) + { + constexpr bool hasTransitionRadiationMask = pmacc::traits:: + HasIdentifier::type::value; + return GetTransitionRadiationMask{}(particle); + } -} // namespace transitionRadiation -} // namespace plugins + } // namespace transitionRadiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/transitionRadiation/Particle.hpp b/include/picongpu/plugins/transitionRadiation/Particle.hpp index 14fad463dd..a26dd697a6 100644 --- a/include/picongpu/plugins/transitionRadiation/Particle.hpp +++ b/include/picongpu/plugins/transitionRadiation/Particle.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, Finn-Ole Carstens +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, Finn-Ole Carstens * * This file is part of PIConGPU. * @@ -21,164 +21,135 @@ namespace picongpu { -namespace plugins -{ -namespace transitionRadiation -{ - /** Particle class for transition radiation calculation. - * - * @param locationSet global position of the macro-particle - * @param momentumSet momentum of macro-particle - * @param charge - */ - class Particle + namespace plugins { - private: - float3_X const & momentum; - float_X const mass; - float3_X location; - float_X gamma; - float3_X beta; - float_X betaAbs; - - public: - HDINLINE - Particle( - float3_X const & locationSet, - float3_X const & momentumSet, - float_X const massSet - ) : - location( locationSet ), - momentum( momentumSet ), - mass( massSet ) - { - gamma = calcGamma( ); - beta = calcBeta( ); - betaAbs = math::sqrt( ( beta * beta ).sumOfComponents( ) ); - } - - //! @return momentum - HDINLINE - float3_X - getMomentum( ) const - { - return momentum; - } - - //! @return normalized momentum - HDINLINE - float_X - getU( ) const - { - return gamma * betaAbs; - } - - //! @return velocity v = beta * c - HDINLINE - float_X - getVel( ) const - { - return betaAbs * picongpu::SPEED_OF_LIGHT; - } - - //! propagates the current window to the foil position - HDINLINE - void - propagate( const float_X & propagationDistance ) - { - location += propagationDistance * beta; - } - - //! @return polar angle phi of momentum - HDINLINE - float_X - getMomPhi( ) const - { - // add pi to atan2 function, because phi is in range from 0 to 2 pi - return picongpu::math::atan2( - momentum.x( ), - momentum.z( ) - ) + picongpu::PI; - } - - //! @return azimuth angle psi of momentum - HDINLINE - float_X - getMomTheta( ) const - { - //because of floating point precision x^2+y^2+z^2; - - /** Implementation of transition radiation for in situ calculation in PIConGPU - * - * The transition radiation implemented in this plugin is based on - * C. B. Schroeder, E. Esarey, J. van Tilborg, and W. P. Leemans: - * Theory of coherent transition radiation generated at a plasma-vacuum interface - * (DOI:https://doi.org/10.1103/PhysRevE.69.016501) - * - * Transition radiation is created by charged particles moving through an - * interface where one medium has a different diffraction index as the other - * medium. Since it is mostly used to analyze electron bunches, this plugin - * assumes that the analyzed particles have the mass and charge of electrons. - * - * @tparam T_ParticlesType particle type to compute transition radiation from - */ - template< - typename T_ParticlesType - > - class TransitionRadiation : public ILightweightPlugin + namespace plugins { - private: - - using SuperCellSize = MappingDesc::SuperCellSize; - - using radLog = plugins::radiation::PIConGPUVerboseRadiation; - - GridBuffer< float_X, DIM1 > * incTransRad = nullptr; - GridBuffer< complex_X, DIM1 > * cohTransRadPara = nullptr; - GridBuffer< complex_X, DIM1 > * cohTransRadPerp = nullptr; - GridBuffer< float_X, DIM1 > * numParticles = nullptr; - - transitionRadiation::frequencies::InitFreqFunctor freqInit; - transitionRadiation::frequencies::FreqFunctor freqFkt; - - float_X * tmpITR = nullptr; - complex_X * tmpCTRpara = nullptr; - complex_X * tmpCTRperp = nullptr; - float_X * tmpNum = nullptr; - float_X * theTransRad = nullptr; - MappingDesc * cellDescription = nullptr; - std::string notifyPeriod; - uint32_t timeStep; - - std::string speciesName; - std::string pluginName; - std::string pluginPrefix; - std::string filenamePrefix; - std::string folderTransRad; - - float3_X * detectorPositions = nullptr; - float_X * detectorFrequencies = nullptr; - - bool isMaster = false; - uint32_t currentStep = 0; - - mpi::MPIReduce reduce; - - public: - //! Constructor - TransitionRadiation( ) : - pluginName( "TransitionRadiation: calculate transition radiation of species" ), - speciesName( T_ParticlesType::FrameType::getName( ) ), - pluginPrefix( speciesName + std::string( "_transRad" ) ), - folderTransRad( "transRad" ), - filenamePrefix( pluginPrefix ) - { - Environment< >::get( ).PluginConnector( ).registerPlugin( this ); - } - - virtual - ~TransitionRadiation( ) - { } - - /** Plugin management - * - * Implementation of base class function. Calculates the transition radiation - * by calling the according function of the kernel file, writes data to a - * file and resets the buffers if transition radiation is calculated for - * multiple timesteps. - * - * @param currentStep current step of simulation - */ - void - notify( - uint32_t currentStep - ) + namespace transitionRadiation { - log< radLog::SIMULATION_STATE >( "Transition Radition (%1%): calculate time step %2% " ) % speciesName % currentStep; + using namespace pmacc; + + namespace po = boost::program_options; + using complex_X = pmacc::math::Complex; + + /** Implementation of transition radiation for in situ calculation in PIConGPU + * + * The transition radiation implemented in this plugin is based on + * C. B. Schroeder, E. Esarey, J. van Tilborg, and W. P. Leemans: + * Theory of coherent transition radiation generated at a plasma-vacuum interface + * (DOI:https://doi.org/10.1103/PhysRevE.69.016501) + * + * Transition radiation is created by charged particles moving through an + * interface where one medium has a different diffraction index as the other + * medium. Since it is mostly used to analyze electron bunches, this plugin + * assumes that the analyzed particles have the mass and charge of electrons. + * + * @tparam T_ParticlesType particle type to compute transition radiation from + */ + template + class TransitionRadiation : public ILightweightPlugin + { + private: + using SuperCellSize = MappingDesc::SuperCellSize; + + using radLog = plugins::radiation::PIConGPUVerboseRadiation; + + GridBuffer* incTransRad = nullptr; + GridBuffer* cohTransRadPara = nullptr; + GridBuffer* cohTransRadPerp = nullptr; + GridBuffer* numParticles = nullptr; + + transitionRadiation::frequencies::InitFreqFunctor freqInit; + transitionRadiation::frequencies::FreqFunctor freqFkt; + + float_X* tmpITR = nullptr; + complex_X* tmpCTRpara = nullptr; + complex_X* tmpCTRperp = nullptr; + float_X* tmpNum = nullptr; + float_X* theTransRad = nullptr; + MappingDesc* cellDescription = nullptr; + std::string notifyPeriod; + uint32_t timeStep; + + std::string speciesName; + std::string pluginName; + std::string pluginPrefix; + std::string filenamePrefix; + std::string folderTransRad; + + float3_X* detectorPositions = nullptr; + float_X* detectorFrequencies = nullptr; + + bool isMaster = false; + uint32_t currentStep = 0; + + mpi::MPIReduce reduce; + + public: + //! Constructor + TransitionRadiation() + : pluginName("TransitionRadiation: calculate transition radiation of species") + , speciesName(T_ParticlesType::FrameType::getName()) + , pluginPrefix(speciesName + std::string("_transRad")) + , folderTransRad("transRad") + , filenamePrefix(pluginPrefix) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } - resetBuffers( ); - this->currentStep = currentStep; + virtual ~TransitionRadiation() + { + } - calculateTransitionRadiation( currentStep ); + /** Plugin management + * + * Implementation of base class function. Calculates the transition radiation + * by calling the according function of the kernel file, writes data to a + * file and resets the buffers if transition radiation is calculated for + * multiple timesteps. + * + * @param currentStep current step of simulation + */ + void notify(uint32_t currentStep) + { + log("Transition Radition (%1%): calculate time step %2% ") % speciesName + % currentStep; - log< radLog::SIMULATION_STATE >( "Transition Radition (%1%): finished time step %2% " ) % speciesName % currentStep; + resetBuffers(); + this->currentStep = currentStep; - collectDataGPUToMaster( ); - writeTransRadToText( ); + calculateTransitionRadiation(currentStep); - log< radLog::SIMULATION_STATE >( "Transition Radition (%1%): printed to table %2% " ) % speciesName % currentStep; - } + log("Transition Radition (%1%): finished time step %2% ") % speciesName + % currentStep; - /** Implementation of base class function. Registers plugin options. - * - * @param desc boost::program_options description - */ - void - pluginRegisterHelp( - po::options_description& desc - ) - { - desc.add_options( )( - ( pluginPrefix + ".period" ).c_str( ), - po::value< std::string >( ¬ifyPeriod ), - "enable plugin [for each n-th step]" - ); - } - - /** Implementation of base class function. - * - * @return name of plugin - */ - std::string - pluginGetName( ) const - { - return pluginName; - } - - /** Implementation of base class function. Sets mapping description. - * - * @param cellDescription - */ - void - setMappingDescription( - MappingDesc *cellDescription - ) - { - this->cellDescription = cellDescription; - } + collectDataGPUToMaster(); + writeTransRadToText(); - private: - //! Resets buffers for multiple transition radiation calculation per simulation. - void - resetBuffers ( ) - { - /* Resets all Databuffers and arrays for repeated calculation of the - * transition radiation - */ - incTransRad->getDeviceBuffer( ).reset( false ); - cohTransRadPara->getDeviceBuffer( ).reset( false ); - cohTransRadPerp->getDeviceBuffer( ).reset( false ); - numParticles->getDeviceBuffer( ).reset( false ); - - for( unsigned int i=0; i < elementsTransitionRadiation( ); ++i ) - { - tmpITR[ i ] = 0; - tmpCTRpara[ i ] = 0; - tmpCTRperp[ i ] = 0; - tmpNum[ i ] = 0; - if( isMaster ) + log("Transition Radition (%1%): printed to table %2% ") % speciesName + % currentStep; + } + + /** Implementation of base class function. Registers plugin options. + * + * @param desc boost::program_options description + */ + void pluginRegisterHelp(po::options_description& desc) { - theTransRad[ i ] = 0; + desc.add_options()( + (pluginPrefix + ".period").c_str(), + po::value(¬ifyPeriod), + "enable plugin [for each n-th step]"); } - } - } - - /** Create buffers and arrays - * - * Implementation of base class function. Create buffers and arrays for - * transition radiation calculation and create a folder for transition - * radiation storage. - */ - void - pluginLoad( ) - { - if( !notifyPeriod.empty( ) ) - { - tmpITR = new float_X[ elementsTransitionRadiation( ) ]; - tmpCTRpara = new complex_X[ elementsTransitionRadiation( ) ]; - tmpCTRperp = new complex_X[ elementsTransitionRadiation( ) ]; - tmpNum = new float_X[ elementsTransitionRadiation( ) ]; - - /*only rank 0 create a file*/ - isMaster = reduce.hasResult( mpi::reduceMethods::Reduce( ) ); - pmacc::Filesystem& fs = Environment::get( ).Filesystem( ); - - Environment<>::get( ).PluginConnector( ).setNotificationPeriod( this, notifyPeriod ); - - incTransRad = new GridBuffer< float_X, DIM1 >( - DataSpace< DIM1 > ( elementsTransitionRadiation( ) ) ); - cohTransRadPara = new GridBuffer< complex_X, DIM1 >( - DataSpace< DIM1 > ( elementsTransitionRadiation( ) ) ); - cohTransRadPerp = new GridBuffer< complex_X, DIM1 >( - DataSpace< DIM1 > ( elementsTransitionRadiation( ) ) ); - numParticles = new GridBuffer< float_X, DIM1 >( - DataSpace< DIM1 > ( elementsTransitionRadiation( ) ) ); - - freqInit.Init( listFrequencies::listLocation ); - freqFkt = freqInit.getFunctor( ); - - if ( isMaster ) + + /** Implementation of base class function. + * + * @return name of plugin + */ + std::string pluginGetName() const { - theTransRad = new float_X[ elementsTransitionRadiation( ) ]; - /* save detector position / observation direction */ - detectorPositions = new float3_X[ transitionRadiation::parameters::nObserver ]; - for( - uint32_t detectorIndex=0; - detectorIndex < transitionRadiation::parameters::nObserver; - ++detectorIndex - ) + return pluginName; + } + + /** Implementation of base class function. Sets mapping description. + * + * @param cellDescription + */ + void setMappingDescription(MappingDesc* cellDescription) + { + this->cellDescription = cellDescription; + } + + private: + //! Resets buffers for multiple transition radiation calculation per simulation. + void resetBuffers() + { + /* Resets all Databuffers and arrays for repeated calculation of the + * transition radiation + */ + incTransRad->getDeviceBuffer().reset(false); + cohTransRadPara->getDeviceBuffer().reset(false); + cohTransRadPerp->getDeviceBuffer().reset(false); + numParticles->getDeviceBuffer().reset(false); + + for(unsigned int i = 0; i < elementsTransitionRadiation(); ++i) { - detectorPositions[ detectorIndex ] = transitionRadiation::observationDirection( detectorIndex ); + tmpITR[i] = 0; + tmpCTRpara[i] = 0; + tmpCTRperp[i] = 0; + tmpNum[i] = 0; + if(isMaster) + { + theTransRad[i] = 0; + } } + } - /* save detector frequencies */ - detectorFrequencies = new float_X[ transitionRadiation::frequencies::nOmega ]; - for( - uint32_t detectorIndex=0; - detectorIndex < transitionRadiation::frequencies::nOmega; - ++detectorIndex - ) + /** Create buffers and arrays + * + * Implementation of base class function. Create buffers and arrays for + * transition radiation calculation and create a folder for transition + * radiation storage. + */ + void pluginLoad() + { + if(!notifyPeriod.empty()) { - detectorFrequencies[ detectorIndex ] = freqFkt.get( detectorIndex ); + tmpITR = new float_X[elementsTransitionRadiation()]; + tmpCTRpara = new complex_X[elementsTransitionRadiation()]; + tmpCTRperp = new complex_X[elementsTransitionRadiation()]; + tmpNum = new float_X[elementsTransitionRadiation()]; + + /*only rank 0 create a file*/ + isMaster = reduce.hasResult(mpi::reduceMethods::Reduce()); + pmacc::Filesystem& fs = Environment::get().Filesystem(); + + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + + incTransRad = new GridBuffer(DataSpace(elementsTransitionRadiation())); + cohTransRadPara + = new GridBuffer(DataSpace(elementsTransitionRadiation())); + cohTransRadPerp + = new GridBuffer(DataSpace(elementsTransitionRadiation())); + numParticles = new GridBuffer(DataSpace(elementsTransitionRadiation())); + + freqInit.Init(listFrequencies::listLocation); + freqFkt = freqInit.getFunctor(); + + if(isMaster) + { + theTransRad = new float_X[elementsTransitionRadiation()]; + /* save detector position / observation direction */ + detectorPositions = new float3_X[transitionRadiation::parameters::nObserver]; + for(uint32_t detectorIndex = 0; detectorIndex < transitionRadiation::parameters::nObserver; + ++detectorIndex) + { + detectorPositions[detectorIndex] + = transitionRadiation::observationDirection(detectorIndex); + } + + /* save detector frequencies */ + detectorFrequencies = new float_X[transitionRadiation::frequencies::nOmega]; + for(uint32_t detectorIndex = 0; detectorIndex < transitionRadiation::frequencies::nOmega; + ++detectorIndex) + { + detectorFrequencies[detectorIndex] = freqFkt.get(detectorIndex); + } + + for(unsigned int i = 0; i < elementsTransitionRadiation(); ++i) + { + theTransRad[i] = 0; + } + + fs.createDirectory(folderTransRad); + fs.setDirectoryPermissions(folderTransRad); + } } + } - for ( unsigned int i=0; i< elementsTransitionRadiation( ); ++i ) + //! Implementation of base class function. Deletes buffers andf arrays. + void pluginUnload() + { + if(!notifyPeriod.empty()) { - theTransRad[ i ] = 0; + if(isMaster) + { + __deleteArray(theTransRad); + } + CUDA_CHECK(cuplaGetLastError()); + __delete(incTransRad); + __delete(cohTransRadPara); + __delete(cohTransRadPerp); + __delete(numParticles); + __deleteArray(tmpITR); + __deleteArray(tmpCTRpara); + __deleteArray(tmpCTRperp); + __deleteArray(tmpNum); } + } - fs.createDirectory( folderTransRad ); - fs.setDirectoryPermissions( folderTransRad ); + //! Moves transition radiation data from GPUs to CPUs. + void copyRadiationDeviceToHost() + { + incTransRad->deviceToHost(); + __getTransactionEvent().waitForFinished(); + cohTransRadPara->deviceToHost(); + __getTransactionEvent().waitForFinished(); + cohTransRadPerp->deviceToHost(); + __getTransactionEvent().waitForFinished(); + numParticles->deviceToHost(); + __getTransactionEvent().waitForFinished(); } - } - } - //! Implementation of base class function. Deletes buffers andf arrays. - void - pluginUnload( ) - { - if( !notifyPeriod.empty( ) ) - { - if( isMaster ) + /** Amount of transition radiation values + * + * Calculates amount of different transition radiation values, which + * have to be computed. + * + * @return amount of transition radiation values to be calculated + */ + static unsigned int elementsTransitionRadiation() { - __deleteArray( theTransRad ); + return transitionRadiation::frequencies::nOmega + * transitionRadiation::parameters::nObserver; // storage for amplitude results on GPU } - CUDA_CHECK( cudaGetLastError( ) ); - __delete( incTransRad ); - __delete( cohTransRadPara ); - __delete( cohTransRadPerp ); - __delete( numParticles ); - __deleteArray( tmpITR ); - __deleteArray( tmpCTRpara ); - __deleteArray( tmpCTRperp ); - __deleteArray( tmpNum ); - } - } - - //! Moves transition radiation data from GPUs to CPUs. - void - copyRadiationDeviceToHost( ) - { - incTransRad->deviceToHost( ); - __getTransactionEvent( ).waitForFinished( ); - cohTransRadPara->deviceToHost( ); - __getTransactionEvent( ).waitForFinished( ); - cohTransRadPerp->deviceToHost( ); - __getTransactionEvent( ).waitForFinished( ); - numParticles->deviceToHost( ); - __getTransactionEvent( ).waitForFinished( ); - } - - /** Amount of transition radiation values - * - * Calculates amount of different transition radiation values, which - * have to be computed. - * - * @return amount of transition radiation values to be calculated - */ - static - unsigned int - elementsTransitionRadiation( ) - { - return transitionRadiation::frequencies::nOmega * transitionRadiation::parameters::nObserver; // storage for amplitude results on GPU - } - - /** Combine transition radiation data from each CPU and store result on master. - * - * @remark copyRadiationDeviceToHost( ) should be called before. - */ - void - collectRadiationOnMaster( ) - { - reduce( - nvidia::functors::Add( ), - tmpITR, - incTransRad->getHostBuffer( ).getBasePointer( ), - elementsTransitionRadiation( ), - mpi::reduceMethods::Reduce( ) - ); - reduce( - nvidia::functors::Add( ), - tmpCTRpara, - cohTransRadPara->getHostBuffer( ).getBasePointer( ), - elementsTransitionRadiation( ), - mpi::reduceMethods::Reduce( ) - ); - reduce( - nvidia::functors::Add( ), - tmpCTRperp, - cohTransRadPerp->getHostBuffer( ).getBasePointer( ), - elementsTransitionRadiation( ), - mpi::reduceMethods::Reduce( ) - ); - reduce( - nvidia::functors::Add( ), - tmpNum, - numParticles->getHostBuffer( ).getBasePointer( ), - elementsTransitionRadiation( ), - mpi::reduceMethods::Reduce( ) - ); - } - - //! Write transition radiation data to file. - void - writeTransRadToText( ) - { - // only the master rank writes data - if (isMaster) - { - // get time step as string - std::stringstream o_step; - o_step << currentStep; - // write totalRad data to txt - writeFile(theTransRad, folderTransRad + "/" + filenamePrefix + "_" + o_step.str( ) + ".dat"); - } - } + /** Combine transition radiation data from each CPU and store result on master. + * + * @remark copyRadiationDeviceToHost( ) should be called before. + */ + void collectRadiationOnMaster() + { + reduce( + nvidia::functors::Add(), + tmpITR, + incTransRad->getHostBuffer().getBasePointer(), + elementsTransitionRadiation(), + mpi::reduceMethods::Reduce()); + reduce( + nvidia::functors::Add(), + tmpCTRpara, + cohTransRadPara->getHostBuffer().getBasePointer(), + elementsTransitionRadiation(), + mpi::reduceMethods::Reduce()); + reduce( + nvidia::functors::Add(), + tmpCTRperp, + cohTransRadPerp->getHostBuffer().getBasePointer(), + elementsTransitionRadiation(), + mpi::reduceMethods::Reduce()); + reduce( + nvidia::functors::Add(), + tmpNum, + numParticles->getHostBuffer().getBasePointer(), + elementsTransitionRadiation(), + mpi::reduceMethods::Reduce()); + } + + //! Write transition radiation data to file. + void writeTransRadToText() + { + // only the master rank writes data + if(isMaster) + { + // get time step as string + std::stringstream o_step; + o_step << currentStep; + + // write totalRad data to txt + writeFile(theTransRad, folderTransRad + "/" + filenamePrefix + "_" + o_step.str() + ".dat"); + } + } - //! perform all operations to get data from GPU to master - void - collectDataGPUToMaster( ) - { - // collect data GPU -> CPU -> Master - copyRadiationDeviceToHost( ); - collectRadiationOnMaster( ); - sumTransitionRadiation( theTransRad, tmpITR, tmpCTRpara, tmpCTRperp, tmpNum ); - } - - /** Final transition radiation calculation on CPU side - * - * Calculate transition radiation integrals. This can't happen on the GPU - * since the absolute square of a sum can't be moved within a sum. - * - * @param targetArray array to store transition radiation in - * @param itrArray array of calculated incoherent transition radiation - * @param ctrParaArray array of complex values of the parallel part of the coherent transition radiation - * @param ctrPerpArray array of complex values of the perpendicular part of coherent transition radiation - * @param numArray array of amount of particles - */ - void - sumTransitionRadiation( - float_X * targetArray, - float_X * itrArray, - complex_X * ctrParaArray, - complex_X * ctrPerpArray, - float_X * numArray - ) - { - if (isMaster) - { - /************************************************************ - ******** Here happens the true physical calculation ******** - ************************************************************/ - for( unsigned int i = 0; i < elementsTransitionRadiation( ); ++i ) + //! perform all operations to get data from GPU to master + void collectDataGPUToMaster() { - const float_X ctrPara = math::abs2( ctrParaArray[ i ] ); - const float_X ctrPerp = math::abs2( ctrPerpArray[ i ] ); - if (numArray[i] != 0.0) + // collect data GPU -> CPU -> Master + copyRadiationDeviceToHost(); + collectRadiationOnMaster(); + sumTransitionRadiation(theTransRad, tmpITR, tmpCTRpara, tmpCTRperp, tmpNum); + } + + /** Final transition radiation calculation on CPU side + * + * Calculate transition radiation integrals. This can't happen on the GPU + * since the absolute square of a sum can't be moved within a sum. + * + * @param targetArray array to store transition radiation in + * @param itrArray array of calculated incoherent transition radiation + * @param ctrParaArray array of complex values of the parallel part of the coherent transition + * radiation + * @param ctrPerpArray array of complex values of the perpendicular part of coherent transition + * radiation + * @param numArray array of amount of particles + */ + void sumTransitionRadiation( + float_X* targetArray, + float_X* itrArray, + complex_X* ctrParaArray, + complex_X* ctrPerpArray, + float_X* numArray) + { + if(isMaster) { - targetArray[ i ] = ( - itrArray[ i ] + ( numArray[ i ] - 1.0 ) * ( ctrPara + ctrPerp ) / numArray[i] - ); + /************************************************************ + ******** Here happens the true physical calculation ******** + ************************************************************/ + for(unsigned int i = 0; i < elementsTransitionRadiation(); ++i) + { + const float_X ctrPara = pmacc::math::abs2(ctrParaArray[i]); + const float_X ctrPerp = pmacc::math::abs2(ctrPerpArray[i]); + if(numArray[i] != 0.0) + { + targetArray[i] + = (itrArray[i] + (numArray[i] - 1.0) * (ctrPara + ctrPerp) / numArray[i]); + } + else + targetArray[i] = 0.0; + } } - else - targetArray[ i ] = 0.0; } - } - } - - /** Writes file with transition radiation data with the right units. - * - * @param values transition radiation values - * @param name name of file - */ - void - writeFile( - float_X * values, - std::string name - ) - { - std::ofstream outFile; - outFile.open( - name.c_str( ), - std::ofstream::out | std::ostream::trunc - ); - if ( !outFile ) - { - std::cerr << "Can't open file [" << name << "] for output, disable plugin output. " << std::endl; - isMaster = false; // no Master anymore -> no process is able to write - } - else - { - outFile << "# \t"; - outFile << transitionRadiation::frequencies::getParameters( ); - outFile << transitionRadiation::parameters::nPhi << "\t"; - outFile << transitionRadiation::parameters::phiMin << "\t"; - outFile << transitionRadiation::parameters::phiMax << "\t"; - outFile << transitionRadiation::parameters::nTheta << "\t"; - outFile << transitionRadiation::parameters::thetaMin << "\t"; - outFile << transitionRadiation::parameters::thetaMax << "\t"; - outFile << std::endl; - - for ( - unsigned int index_direction = 0; - index_direction < transitionRadiation::parameters::nObserver; - ++index_direction - ) // over all directions + + /** Writes file with transition radiation data with the right units. + * + * @param values transition radiation values + * @param name name of file + */ + void writeFile(float_X* values, std::string name) { - for ( - unsigned index_omega = 0; - index_omega < transitionRadiation::frequencies::nOmega; - ++index_omega - ) // over all frequencies + std::ofstream outFile; + outFile.open(name.c_str(), std::ofstream::out | std::ostream::trunc); + if(!outFile) { - // Take Amplitude for one direction and frequency, - // calculate the square of the absolute value - // and write to file. - constexpr float_X transRadUnit = - SI::ELECTRON_CHARGE_SI * SI::ELECTRON_CHARGE_SI * - ( 1.0 / ( 4 * PI * SI::EPS0_SI * PI * PI * SI::SPEED_OF_LIGHT_SI ) ); - outFile << - values[ - index_direction * transitionRadiation::frequencies::nOmega + index_omega - ] * transRadUnit << "\t"; - - } // for loop over all frequencies - - outFile << std::endl; - } // for loop over all frequencies - - outFile.flush( ); - outFile << std::endl; //now all data are written to file - - if ( outFile.fail( ) ) - std::cerr << "Error on flushing file [" << name << "]. " << std::endl; - - outFile.close( ); - } - } - - /** Kernel call - * - * Executes the particle filter and calls the transition radiation kernel - * of the kernel file. - * - * @param currentStep current simulation iteration step - */ - void - calculateTransitionRadiation( - uint32_t currentStep - ) - { - DataConnector &dc = Environment< >::get( ).DataConnector( ); - auto particles = dc.get< T_ParticlesType >( - T_ParticlesType::FrameType::getName( ), - true - ); - - /* execute the particle filter */ - transitionRadiation::executeParticleFilter( particles, currentStep ); - - const auto gridDim_rad = transitionRadiation::parameters::nObserver; - - /* number of threads per block = number of cells in a super cell - * = number of particles in a Frame - * (THIS IS PIConGPU SPECIFIC) - * A Frame is the entity that stores particles. - * A super cell can have many Frames. - * Particles in a Frame can be accessed in parallel. - */ - - // Some funny things that make it possible for the kernel to calculate - // the absolute position of the particles - DataSpace< simDim > localSize( cellDescription->getGridLayout( ).getDataSpaceWithoutGuarding( ) ); - const uint32_t numSlides = MovingWindow::getInstance( ).getSlideCounter( currentStep ); - const SubGrid< simDim >& subGrid = Environment< simDim >::get( ).SubGrid( ); - DataSpace< simDim > globalOffset( subGrid.getLocalDomain( ).offset ); - globalOffset.y( ) += ( localSize.y( ) * numSlides ); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - // PIC-like kernel call of the radiation kernel - PMACC_KERNEL( KernelTransRadParticles< - numWorkers - >{ } )( - gridDim_rad, - numWorkers - )( - /*Pointer to particles memory on the device*/ - particles->getDeviceParticlesBox( ), - - /*Pointer to memory of radiated amplitude on the device*/ - incTransRad->getDeviceBuffer( ).getDataBox( ), - cohTransRadPara->getDeviceBuffer( ).getDataBox( ), - cohTransRadPerp->getDeviceBuffer( ).getDataBox( ), - numParticles->getDeviceBuffer( ).getDataBox( ), - globalOffset, - *cellDescription, - freqFkt, - subGrid.getGlobalDomain( ).size - ); - - dc.releaseData( T_ParticlesType::FrameType::getName( ) ); - } - }; - -} // namespace transitionRadiation -} // namespace plugins - -namespace particles -{ -namespace traits -{ - template< - typename T_Species, - typename T_UnspecifiedSpecies - > - struct SpeciesEligibleForSolver< - T_Species, - plugins::transitionRadiation::TransitionRadiation< T_UnspecifiedSpecies > - > + std::cerr << "Can't open file [" << name << "] for output, disable plugin output. " + << std::endl; + isMaster = false; // no Master anymore -> no process is able to write + } + else + { + outFile << "# \t"; + outFile << transitionRadiation::frequencies::getParameters(); + outFile << transitionRadiation::parameters::nPhi << "\t"; + outFile << transitionRadiation::parameters::phiMin << "\t"; + outFile << transitionRadiation::parameters::phiMax << "\t"; + outFile << transitionRadiation::parameters::nTheta << "\t"; + outFile << transitionRadiation::parameters::thetaMin << "\t"; + outFile << transitionRadiation::parameters::thetaMax << "\t"; + outFile << std::endl; + + for(unsigned int index_direction = 0; + index_direction < transitionRadiation::parameters::nObserver; + ++index_direction) // over all directions + { + for(unsigned index_omega = 0; index_omega < transitionRadiation::frequencies::nOmega; + ++index_omega) // over all frequencies + { + // Take Amplitude for one direction and frequency, + // calculate the square of the absolute value + // and write to file. + constexpr float_X transRadUnit = SI::ELECTRON_CHARGE_SI * SI::ELECTRON_CHARGE_SI + * (1.0 / (4 * PI * SI::EPS0_SI * PI * PI * SI::SPEED_OF_LIGHT_SI)); + outFile + << values[index_direction * transitionRadiation::frequencies::nOmega + index_omega] + * transRadUnit + << "\t"; + + } // for loop over all frequencies + + outFile << std::endl; + } // for loop over all frequencies + + outFile.flush(); + outFile << std::endl; // now all data are written to file + + if(outFile.fail()) + std::cerr << "Error on flushing file [" << name << "]. " << std::endl; + + outFile.close(); + } + } + + /** Kernel call + * + * Executes the particle filter and calls the transition radiation kernel + * of the kernel file. + * + * @param currentStep current simulation iteration step + */ + void calculateTransitionRadiation(uint32_t currentStep) + { + DataConnector& dc = Environment<>::get().DataConnector(); + auto particles = dc.get(T_ParticlesType::FrameType::getName(), true); + + /* execute the particle filter */ + transitionRadiation::executeParticleFilter(particles, currentStep); + + const auto gridDim_rad = transitionRadiation::parameters::nObserver; + + /* number of threads per block = number of cells in a super cell + * = number of particles in a Frame + * (THIS IS PIConGPU SPECIFIC) + * A Frame is the entity that stores particles. + * A super cell can have many Frames. + * Particles in a Frame can be accessed in parallel. + */ + + // Some funny things that make it possible for the kernel to calculate + // the absolute position of the particles + DataSpace localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); + const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(currentStep); + const SubGrid& subGrid = Environment::get().SubGrid(); + DataSpace globalOffset(subGrid.getLocalDomain().offset); + globalOffset.y() += (localSize.y() * numSlides); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + // PIC-like kernel call of the radiation kernel + PMACC_KERNEL(KernelTransRadParticles{}) + (gridDim_rad, numWorkers)( + /*Pointer to particles memory on the device*/ + particles->getDeviceParticlesBox(), + + /*Pointer to memory of radiated amplitude on the device*/ + incTransRad->getDeviceBuffer().getDataBox(), + cohTransRadPara->getDeviceBuffer().getDataBox(), + cohTransRadPerp->getDeviceBuffer().getDataBox(), + numParticles->getDeviceBuffer().getDataBox(), + globalOffset, + *cellDescription, + freqFkt, + subGrid.getGlobalDomain().size); + + dc.releaseData(T_ParticlesType::FrameType::getName()); + } + }; + + } // namespace transitionRadiation + } // namespace plugins + + namespace particles { - using FrameType = typename T_Species::FrameType; - - // this plugin needs at least the weighting and momentum attributes - using RequiredIdentifiers = MakeSeq_t< - weighting, - momentum, - position< > - >; - - using SpeciesHasIdentifiers = typename pmacc::traits::HasIdentifiers< - FrameType, - RequiredIdentifiers - >::type; - - // this plugin needs a mass ratio for energy calculation from momentum - using SpeciesHasMass = typename pmacc::traits::HasFlag< - FrameType, - massRatio<> - >::type; - - // transition radiation requires charged particles - using SpeciesHasCharge = typename pmacc::traits::HasFlag< - FrameType, - chargeRatio<> - >::type; - - // this plugin needs the transitionRadiationMask flag - using SpeciesHasMask = typename pmacc::traits::HasIdentifier< - FrameType, - transitionRadiationMask - >::type; - - using type = typename bmpl::and_< - SpeciesHasIdentifiers, - SpeciesHasMass, - SpeciesHasCharge, - SpeciesHasMask - >; - }; -} // namespace traits -} // namespace particles + namespace traits + { + template + struct SpeciesEligibleForSolver< + T_Species, + plugins::transitionRadiation::TransitionRadiation> + { + using FrameType = typename T_Species::FrameType; + + // this plugin needs at least the weighting and momentum attributes + using RequiredIdentifiers = MakeSeq_t>; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + // this plugin needs a mass ratio for energy calculation from momentum + using SpeciesHasMass = typename pmacc::traits::HasFlag>::type; + + // transition radiation requires charged particles + using SpeciesHasCharge = typename pmacc::traits::HasFlag>::type; + + // this plugin needs the transitionRadiationMask flag + using SpeciesHasMask = typename pmacc::traits::HasIdentifier::type; + + using type = + typename bmpl::and_; + }; + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/plugins/transitionRadiation/TransitionRadiation.kernel b/include/picongpu/plugins/transitionRadiation/TransitionRadiation.kernel index edb8706c0c..158a7952cb 100644 --- a/include/picongpu/plugins/transitionRadiation/TransitionRadiation.kernel +++ b/include/picongpu/plugins/transitionRadiation/TransitionRadiation.kernel @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, * Klaus Steiniger, Felix Schmitt, Benjamin Worpitz, * Finn-Ole Carstens * @@ -36,353 +36,302 @@ #include - namespace picongpu { -namespace plugins -{ -namespace transitionRadiation -{ -/** Kernel for computation of transition radiation on GPUs. - * - * @tparam T_numWorkers maximal CUDA threads - * @tparam T_ParBox box with particles - * @tparam T_DBox box with float data - * @tparam T_DBoxComplex box with complex data - * @tparam T_Mapping MappingDescription object - * @tparam T_Acc alpaka accelerator - * @param acc alpaka accelerator - * @param incTransRad output array for storage incoherent transition radiation - * @param cohTransRadPara output array for storage of parallel parts of coherent transition radiation - * @param cohTransRadPerp output array for storage of perpendicular parts of coherent transition radiation - * @param numParticles output array for amount of particles - * @param globalOffset offset of simulation - * @param mapper MappingDesction object - * @param freqFkt frequency functor - * @param simBoxSize size of simulation box - */ - template< - uint32_t T_numWorkers - > - struct KernelTransRadParticles + namespace plugins { - template< - typename T_ParBox, - typename T_DBox, - typename T_DBoxComplex, // Formfactor returns complex values - typename T_Mapping, - typename T_Acc - > - DINLINE - void operator( )( - T_Acc const & acc, - T_ParBox pb, - T_DBox incTransRad, - T_DBoxComplex cohTransRadPara, - T_DBoxComplex cohTransRadPerp, - T_DBox numParticles, - DataSpace< simDim > globalOffset, - T_Mapping mapper, - transitionRadiation::frequencies::FreqFunctor freqFkt, - DataSpace< simDim > simBoxSize - ) const + namespace transitionRadiation { - using namespace mappings::threads; - using complex_X = pmacc::math::Complex< float_X >; - using complex_64 = pmacc::math::Complex< float_64 >; + /** Kernel for computation of transition radiation on GPUs. + * + * @tparam T_numWorkers maximal CUDA threads + * @tparam T_ParBox box with particles + * @tparam T_DBox box with float data + * @tparam T_DBoxComplex box with complex data + * @tparam T_Mapping MappingDescription object + * @tparam T_Acc alpaka accelerator + * @param acc alpaka accelerator + * @param incTransRad output array for storage incoherent transition radiation + * @param cohTransRadPara output array for storage of parallel parts of coherent transition radiation + * @param cohTransRadPerp output array for storage of perpendicular parts of coherent transition radiation + * @param numParticles output array for amount of particles + * @param globalOffset offset of simulation + * @param mapper MappingDesction object + * @param freqFkt frequency functor + * @param simBoxSize size of simulation box + */ + template + struct KernelTransRadParticles + { + template< + typename T_ParBox, + typename T_DBox, + typename T_DBoxComplex, // Formfactor returns complex values + typename T_Mapping, + typename T_Acc> + DINLINE void operator()( + T_Acc const& acc, + T_ParBox pb, + T_DBox incTransRad, + T_DBoxComplex cohTransRadPara, + T_DBoxComplex cohTransRadPerp, + T_DBox numParticles, + DataSpace globalOffset, + T_Mapping mapper, + transitionRadiation::frequencies::FreqFunctor freqFkt, + DataSpace simBoxSize) const + { + using namespace mappings::threads; + using complex_X = pmacc::math::Complex; + using complex_64 = pmacc::math::Complex; - constexpr uint32_t frameSize = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorker = T_numWorkers; + constexpr uint32_t frameSize = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorker = T_numWorkers; - using FrameType = typename T_ParBox::FrameType; - using FramePtr = typename T_ParBox::FramePtr; + using FrameType = typename T_ParBox::FrameType; + using FramePtr = typename T_ParBox::FramePtr; - uint32_t const workerIdx = threadIdx.x; + uint32_t const workerIdx = cupla::threadIdx(acc).x; - /* parallelized in 2 dimensions: - * looking direction (theta, phi) - * (not anymore data handling) - * create shared memory for particle data to reduce global memory calls - * every thread in a block loads one particle and every thread runs - * through all particles and calculates the radiation for one direction - * for all frequencies - */ - constexpr int blockSize = pmacc::math::CT::volume< SuperCellSize >::type::value; + /* parallelized in 2 dimensions: + * looking direction (theta, phi) + * (not anymore data handling) + * create shared memory for particle data to reduce global memory calls + * every thread in a block loads one particle and every thread runs + * through all particles and calculates the radiation for one direction + * for all frequencies + */ + constexpr int blockSize = pmacc::math::CT::volume::type::value; - // perpendicular part of normalized energy - PMACC_SMEM( acc, energyPerp_s, memory::Array< float_X, blockSize > ); + // perpendicular part of normalized energy + PMACC_SMEM(acc, energyPerp_s, memory::Array); - // parallel part of normalized energy - PMACC_SMEM( acc, energyPara_s, memory::Array< float_X, blockSize > ); + // parallel part of normalized energy + PMACC_SMEM(acc, energyPara_s, memory::Array); - // exponent of the form factor - PMACC_SMEM( acc, formfactorExponent_s, memory::Array< complex_X, blockSize > ); + // exponent of the form factor + PMACC_SMEM(acc, formfactorExponent_s, memory::Array); - // storage for macro particle weighting needed if - // the coherent and incoherent radiation of a single - // macro-particle needs to be considered - PMACC_SMEM( acc, radWeighting_s, memory::Array< float_X, blockSize > ); + // storage for macro particle weighting needed if + // the coherent and incoherent radiation of a single + // macro-particle needs to be considered + PMACC_SMEM(acc, radWeighting_s, memory::Array); - // particle counter used if not all particles are considered for - // radiation calculation - PMACC_SMEM( acc, counter_s, int ); + // particle counter used if not all particles are considered for + // radiation calculation + PMACC_SMEM(acc, counter_s, int); - int const theta_idx = blockIdx.x; //blockIdx.x is used to determine theta + int const theta_idx = cupla::blockIdx(acc).x; // cupla::blockIdx(acc).x is used to determine theta - // looking direction (needed for observer) used in the thread - float3_X const look = transitionRadiation::observationDirection( theta_idx ); + // looking direction (needed for observer) used in the thread + float3_X const look = transitionRadiation::observationDirection(theta_idx); - // get extent of guarding super cells (needed to ignore them) - DataSpace< simDim > const guardingSuperCells = mapper.getGuardingSuperCells( ); + // get extent of guarding super cells (needed to ignore them) + DataSpace const guardingSuperCells = mapper.getGuardingSuperCells(); - /* number of super cells on GPU per dimension (still including guard cells) - * remove both guards from count [later one sided guard needs to be added again] - */ - DataSpace< simDim > const superCellsCount( mapper.getGridSuperCells( ) - 2 * guardingSuperCells ); + /* number of super cells on GPU per dimension (still including guard cells) + * remove both guards from count [later one sided guard needs to be added again] + */ + DataSpace const superCellsCount(mapper.getGridSuperCells() - 2 * guardingSuperCells); - // get absolute number of relevant super cells - int const numSuperCells = superCellsCount.productOfComponents( ); + // get absolute number of relevant super cells + int const numSuperCells = superCellsCount.productOfComponents(); - // propagation distance for the particle bunch - float_X const propagationDistance = parameters::foilPosition - globalOffset[ 1 ]; + // propagation distance for the particle bunch + float_X const propagationDistance = parameters::foilPosition - globalOffset[1]; - /* go over all super cells on GPU - * but ignore all guarding supercells - */ - for( int superCellIndex = 0; superCellIndex <= numSuperCells; ++superCellIndex ) - { - // select SuperCell and add one sided guard again - DataSpace< simDim > const superCell = DataSpaceOperations< simDim >::map( - superCellsCount, - superCellIndex - ) + guardingSuperCells; - - - // -guardingSuperCells remove guarding block - DataSpace< simDim > const superCellOffset( - globalOffset + ( - ( superCell - guardingSuperCells ) * - SuperCellSize::toRT( ) - ) - ); - - // pointer to frame storing particles - FramePtr frame = pb.getLastFrame( superCell ); - - // number of particles in current frame - lcellId_t particlesInFrame = pb.getSuperCell( superCell ).getSizeLastFrame( ); - - /* go to next supercell - * - * if "isValid" is false then there is no frame - * inside the superCell (anymore) - */ - while( frame.isValid( ) ) - { - /* since a race condition can occur if "continue loop" is called, - * all threads must wait for the selection of a new frame - * until all threads have evaluated "isValid" - */ - __syncthreads( ); - - ForEachIdx< - IdxConfig< - 1, - numWorker - > - > onlyMaster{ workerIdx }; - - /* The Master process (thread 0) in every thread block is in - * charge of loading a frame from - * the current super cell and evaluate the total number of - * particles in this frame. - */ - onlyMaster( - [ & ]( - uint32_t const, - uint32_t const - ) - { - counter_s = 0; - } - ); + /* go over all super cells on GPU + * but ignore all guarding supercells + */ + for(int superCellIndex = 0; superCellIndex <= numSuperCells; ++superCellIndex) + { + // select SuperCell and add one sided guard again + DataSpace const superCell + = DataSpaceOperations::map(superCellsCount, superCellIndex) + guardingSuperCells; + - __syncthreads( ); + // -guardingSuperCells remove guarding block + DataSpace const superCellOffset( + globalOffset + ((superCell - guardingSuperCells) * SuperCellSize::toRT())); - using ParticleDomCfg = IdxConfig< - frameSize, - numWorker - >; + // pointer to frame storing particles + FramePtr frame = pb.getLastFrame(superCell); - // loop over all particles in the frame - ForEachIdx< ParticleDomCfg > forEachParticle{ workerIdx }; + // number of particles in current frame + lcellId_t particlesInFrame = pb.getSuperCell(superCell).getSizeLastFrame(); - forEachParticle( - [ & ]( - uint32_t const linearIdx, - uint32_t const - ) + /* go to next supercell + * + * if "isValid" is false then there is no frame + * inside the superCell (anymore) + */ + while(frame.isValid()) { - // only threads with particles are running - if( linearIdx < particlesInFrame ) - { - auto par = frame[ linearIdx ]; - // get particle momenta - float3_X const particleMomentum = par[ momentum_ ]; - /* initializes "saveParticleAt" flag with -1 - * because "counter_s" will never be -1 - * therefore, if a particle is saved, a value of counter - * is stored in "saveParticleAt" != -1 - * THIS IS ACTUALLY ONLY NEEDED IF: the radiation flag was set - * LATER: can this be optimized? - */ - - int saveParticleAt = -1; - - // only moving particles create transition radiation - if( ( particleMomentum * particleMomentum ).sumOfComponents( ) > 0.0) + /* since a race condition can occur if "continue loop" is called, + * all threads must wait for the selection of a new frame + * until all threads have evaluated "isValid" + */ + cupla::__syncthreads(acc); + + ForEachIdx> onlyMaster{workerIdx}; + + /* The Master process (thread 0) in every thread block is in + * charge of loading a frame from + * the current super cell and evaluate the total number of + * particles in this frame. + */ + onlyMaster([&](uint32_t const, uint32_t const) { counter_s = 0; }); + + cupla::__syncthreads(acc); + + using ParticleDomCfg = IdxConfig; + + // loop over all particles in the frame + ForEachIdx forEachParticle{workerIdx}; + + forEachParticle([&](uint32_t const linearIdx, uint32_t const) { + // only threads with particles are running + if(linearIdx < particlesInFrame) { - if( transitionRadiation::getTransitionRadiationMask( par ) ) - saveParticleAt = nvidia::atomicAllInc( - acc, - &counter_s, - ::alpaka::hierarchy::Threads{ } - ); - - /* for information: - * atomicAdd returns an int with the previous - * value of "counter_s" != -1 - * therefore, if a particle is selected - * "saveParticleAt" != -1 - */ - // if a particle needs to be considered - if( saveParticleAt != -1 ) + auto par = frame[linearIdx]; + // get particle momenta + float3_X const particleMomentum = par[momentum_]; + /* initializes "saveParticleAt" flag with -1 + * because "counter_s" will never be -1 + * therefore, if a particle is saved, a value of counter + * is stored in "saveParticleAt" != -1 + * THIS IS ACTUALLY ONLY NEEDED IF: the radiation flag was set + * LATER: can this be optimized? + */ + + int saveParticleAt = -1; + + // only moving particles create transition radiation + if((particleMomentum * particleMomentum).sumOfComponents() > 0.0) { - // calculate global position - lcellId_t const cellIdx = par[ localCellIdx_ ]; - - // position inside of the cell - floatD_X const pos = par[ position_ ]; - - // calculate global position of cell - DataSpace< simDim > const globalPos( - superCellOffset + - DataSpaceOperations< simDim >:: - template map< SuperCellSize >( cellIdx ) - ); - - // add global position of cell with local position of particle in cell - float3_X particleLocation; - // set z component to zero in case of simDim==DIM2 - particleLocation[ 2 ] = 0.0; - // run over all components and compute gobal position - for( int i = 0; i < simDim; ++i ) - particleLocation[ i ] = ( - float_X( globalPos[ i ] ) + pos[ i ] - ) * cellSize[ i ]; - - /* get macro-particle weighting - * - * Info: - * the weighting is the number of real particles described - * by a macro-particle - */ - float_X const weighting = par[ weighting_ ]; - radWeighting_s[ saveParticleAt ] = weighting; - - // mass of macro-particle - float_X const particleMass = attribute::getMass( - weighting, - par - ); - - // using transition radiation particle class - transitionRadiation::Particle particle( - particleLocation, - particleMomentum, - particleMass - ); - - // only propagate particles if it is set up in transitionRadiation.param - if( parameters::foilPosition != 0.0 ) - particle.propagate( propagationDistance ); - - // create calculator for TR calculations - transitionRadiation::Calculator const calculator = transitionRadiation::Calculator( - particle, - look - ); - - // calculate values for transition radiation - energyPara_s[ saveParticleAt ] = calculator.calcEnergyPara( ); - - energyPerp_s[ saveParticleAt ] = calculator.calcEnergyPerp( ); - - formfactorExponent_s[ saveParticleAt ] = calculator.calcFormFactorExponent( ); - } - } // only moving particles - } // only threads with particle - } - ); // for each particle - __syncthreads( ); - - // run over all valid omegas for this thread - for( int o = workerIdx; o < transitionRadiation::frequencies::nOmega; o += T_numWorkers ) - { - float_X itrSum = 0.0; - float_X totalParticles = 0.0; - complex_X ctrSumPara = complex_X( 0.0, 0.0 ); - complex_X ctrSumPerp = complex_X( 0.0, 0.0 ); + if(transitionRadiation::getTransitionRadiationMask(par)) + saveParticleAt = nvidia::atomicAllInc( + acc, + &counter_s, + ::alpaka::hierarchy::Threads{}); + + /* for information: + * atomicAdd returns an int with the previous + * value of "counter_s" != -1 + * therefore, if a particle is selected + * "saveParticleAt" != -1 + */ + // if a particle needs to be considered + if(saveParticleAt != -1) + { + // calculate global position + lcellId_t const cellIdx = par[localCellIdx_]; + + // position inside of the cell + floatD_X const pos = par[position_]; + + // calculate global position of cell + DataSpace const globalPos( + superCellOffset + + DataSpaceOperations::template map(cellIdx)); + + // add global position of cell with local position of particle in cell + float3_X particleLocation; + // set z component to zero in case of simDim==DIM2 + particleLocation[2] = 0.0; + // run over all components and compute gobal position + for(int i = 0; i < simDim; ++i) + particleLocation[i] = (float_X(globalPos[i]) + pos[i]) * cellSize[i]; + + /* get macro-particle weighting + * + * Info: + * the weighting is the number of real particles described + * by a macro-particle + */ + float_X const weighting = par[weighting_]; + radWeighting_s[saveParticleAt] = weighting; + + // mass of macro-particle + float_X const particleMass = attribute::getMass(weighting, par); + + // using transition radiation particle class + transitionRadiation::Particle particle( + particleLocation, + particleMomentum, + particleMass); + + // only propagate particles if it is set up in transitionRadiation.param + if(parameters::foilPosition != 0.0) + particle.propagate(propagationDistance); + + // create calculator for TR calculations + transitionRadiation::Calculator const calculator + = transitionRadiation::Calculator(particle, look); + + // calculate values for transition radiation + energyPara_s[saveParticleAt] = calculator.calcEnergyPara(); + + energyPerp_s[saveParticleAt] = calculator.calcEnergyPerp(); + + formfactorExponent_s[saveParticleAt] = calculator.calcFormFactorExponent(); + } + } // only moving particles + } // only threads with particle + }); // for each particle + cupla::__syncthreads(acc); + + // run over all valid omegas for this thread + for(int o = workerIdx; o < transitionRadiation::frequencies::nOmega; o += T_numWorkers) + { + float_X itrSum = 0.0; + float_X totalParticles = 0.0; + complex_X ctrSumPara = complex_X(0.0, 0.0); + complex_X ctrSumPerp = complex_X(0.0, 0.0); - // create a form factor object for physical correct coherence effects within macro-particles - macroParticleFormFactor::radFormFactor const macroParticleFormFactor{ }; + // create a form factor object for physical correct coherence effects within + // macro-particles + macroParticleFormFactor::radFormFactor const macroParticleFormFactor{}; - for( int j = 0; j < counter_s; ++j ) - { - float_X const omega = freqFkt( o ); - complex_X const formfactor = transitionRadiation::calcFormFactor( - omega, - formfactorExponent_s[ j ] - ) * macroParticleFormFactor( - radWeighting_s[ j ], - omega, - look - ); - - itrSum += radWeighting_s[ j ] * ( - energyPerp_s[ j ] * energyPerp_s[ j ] + - energyPara_s[ j ] * energyPara_s[ j ] - ); - totalParticles += radWeighting_s[ j ]; - - ctrSumPara += energyPara_s[ j ] * formfactor; - ctrSumPerp += energyPerp_s[ j ] * formfactor; - } - - int const index = theta_idx * transitionRadiation::frequencies::nOmega + o; - incTransRad[ index ] += itrSum; - numParticles[ index ] += totalParticles; - cohTransRadPara[ index ] += ctrSumPara; - cohTransRadPerp[ index ] += ctrSumPerp; - } - - __syncthreads( ); - - /* First threads starts loading next frame of the super-cell: - * - * Info: - * The calculation starts with the last SuperCell (must not be full filled) - * all previous SuperCells are full with particles - */ - particlesInFrame = frameSize; - frame = pb.getPreviousFrame( frame ); - } // while frame is valid - } // for all supercells - } - }; // struct KernelTransRad - -} // namespace transitionRadiation -} // namespace plugins + for(int j = 0; j < counter_s; ++j) + { + float_X const omega = freqFkt(o); + complex_X const formfactor + = transitionRadiation::calcFormFactor(omega, formfactorExponent_s[j]) + * macroParticleFormFactor(radWeighting_s[j], omega, look); + + itrSum += radWeighting_s[j] + * (energyPerp_s[j] * energyPerp_s[j] + energyPara_s[j] * energyPara_s[j]); + totalParticles += radWeighting_s[j]; + + ctrSumPara += energyPara_s[j] * formfactor; + ctrSumPerp += energyPerp_s[j] * formfactor; + } + + int const index = theta_idx * transitionRadiation::frequencies::nOmega + o; + incTransRad[index] += itrSum; + numParticles[index] += totalParticles; + cohTransRadPara[index] += ctrSumPara; + cohTransRadPerp[index] += ctrSumPerp; + } + + cupla::__syncthreads(acc); + + /* First threads starts loading next frame of the super-cell: + * + * Info: + * The calculation starts with the last SuperCell (must not be full filled) + * all previous SuperCells are full with particles + */ + particlesInFrame = frameSize; + frame = pb.getPreviousFrame(frame); + } // while frame is valid + } // for all supercells + } + }; // struct KernelTransRad + + } // namespace transitionRadiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/transitionRadiation/frequencies/LinearFrequencies.hpp b/include/picongpu/plugins/transitionRadiation/frequencies/LinearFrequencies.hpp index 022a0cb0d2..d598bb5f4c 100644 --- a/include/picongpu/plugins/transitionRadiation/frequencies/LinearFrequencies.hpp +++ b/include/picongpu/plugins/transitionRadiation/frequencies/LinearFrequencies.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, Finn-Ole Carstens +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, Finn-Ole Carstens * * This file is part of PIConGPU. * @@ -24,58 +24,60 @@ namespace picongpu { -namespace plugins -{ -namespace transitionRadiation -{ -namespace linearFrequencies -{ - class FreqFunctor + namespace plugins { - public: - FreqFunctor( void ) - { } - - HDINLINE float_X operator( )( const int ID ) + namespace transitionRadiation { - return omegaMin + float_X( ID ) * deltaOmega; - } + namespace linearFrequencies + { + class FreqFunctor + { + public: + FreqFunctor(void) + { + } - HINLINE float_X get( const int ID ) - { - return operator( )( ID ); - } - }; // FreqFunctor + HDINLINE float_X operator()(const int ID) + { + return omegaMin + float_X(ID) * deltaOmega; + } - class InitFreqFunctor - { - public: - InitFreqFunctor( void ) - { } + HINLINE float_X get(const int ID) + { + return operator()(ID); + } + }; // FreqFunctor - HINLINE void Init( const std::string path ) - { } + class InitFreqFunctor + { + public: + InitFreqFunctor(void) + { + } + HINLINE void Init(const std::string path) + { + } - HINLINE FreqFunctor getFunctor( void ) - { - return FreqFunctor( ); - } - }; // InitFreqFunctor - //! @return frequency params as string - HINLINE - std::string - getParameters( void ) - { - std::string params = std::string( "lin\t" ); - params += std::to_string( nOmega ) + "\t"; - params += std::to_string( SI::omegaMin ) + "\t"; - params += std::to_string( SI::omegaMax ) + "\t"; - return params; - } + HINLINE FreqFunctor getFunctor(void) + { + return FreqFunctor(); + } + }; // InitFreqFunctor + + //! @return frequency params as string + HINLINE + std::string getParameters(void) + { + std::string params = std::string("lin\t"); + params += std::to_string(nOmega) + "\t"; + params += std::to_string(SI::omegaMin) + "\t"; + params += std::to_string(SI::omegaMax) + "\t"; + return params; + } -} // namespace linearFrequencies -} // namespace transitionRadiation -} // namespace plugins + } // namespace linearFrequencies + } // namespace transitionRadiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/transitionRadiation/frequencies/ListFrequencies.hpp b/include/picongpu/plugins/transitionRadiation/frequencies/ListFrequencies.hpp index 2a18c5617d..e98be46953 100644 --- a/include/picongpu/plugins/transitionRadiation/frequencies/ListFrequencies.hpp +++ b/include/picongpu/plugins/transitionRadiation/frequencies/ListFrequencies.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, Axel Huebl, Finn-Ole Carstens +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, Axel Huebl, Finn-Ole Carstens * * This file is part of PIConGPU. * @@ -27,115 +27,115 @@ namespace picongpu { -namespace plugins -{ -namespace transitionRadiation -{ -namespace listFrequencies -{ - class FreqFunctor + namespace plugins { - public: - - typedef GridBuffer< float_X, DIM1 >::DataBoxType DBoxType; - - FreqFunctor( void ) - { } - - template< typename T > - FreqFunctor( T frequencies_handed ) - { - this->frequencies_dev = frequencies_handed->getDeviceBuffer( ).getDataBox( ); - this->frequencies_host = frequencies_handed->getHostBuffer( ).getDataBox( ); - } - - HDINLINE float_X operator( )( const unsigned int ID ) - { - return ( ID < frequencies::nOmega ) ? frequencies_dev[ ID ] : 0.0; - } - - HINLINE float_X get( const unsigned int ID ) - { - return ( ID < frequencies::nOmega ) ? frequencies_host[ ID ] : 0.0; - } - - private: - DBoxType frequencies_dev; - DBoxType frequencies_host; - }; // FreqFunctor - - - - class InitFreqFunctor - { - public: - InitFreqFunctor( void ) - { } - - ~InitFreqFunctor( void ) - { - __delete( frequencyBuffer ); - } - - typedef GridBuffer< picongpu::float_X, DIM1 >::DataBoxType DBoxType; - - HINLINE void Init( const std::string path ) + namespace transitionRadiation { - - frequencyBuffer = new GridBuffer< float_X, DIM1 >( DataSpace< DIM1 >( nOmega ) ); - - - DBoxType frequencyDB = frequencyBuffer->getHostBuffer( ).getDataBox( ); - - std::ifstream freqListFile( path.c_str( ) ); - unsigned int i; - - printf( "freq: %s\n", path.c_str( ) ); - - if( !freqListFile ) - { - throw std::runtime_error( std::string( "The radiation-frequency-file " ) + - path + std::string( " could not be found.\n" ) ); - } - - - for( i = 0; i < nOmega && !freqListFile.eof( ); ++i ) + namespace listFrequencies { - freqListFile >> frequencyDB[ i ]; - // verbose output of loaded frequencies if verbose level PHYSICS is set: - log< plugins::radiation::PIConGPUVerboseRadiation::PHYSICS >("freq: %1% \t %2%") % i % frequencyDB[ i ]; - frequencyDB[ i ] *= UNIT_TIME; - } - - if( i != nOmega ) - { - throw std::runtime_error( std::string( "The number of frequencies in the list and the number of frequencies in the parameters differ.\n" ) ); - } - - frequencyBuffer->hostToDevice( ); - - } - - FreqFunctor getFunctor( void ) - { - return FreqFunctor( frequencyBuffer ); - } - - private: - GridBuffer< float_X, DIM1 > * frequencyBuffer = nullptr; - }; // InitFreqFunctor - - //! @return frequency params as string - HINLINE - std::string - getParameters( ) - { - std::string params = std::string( "list\t" ); - params += std::string( listLocation ) + std::string( "\t" ); - return params; - } - -} // namespace listFrequencies -} // namespace transitionRadiation -} // namespace plugins + class FreqFunctor + { + public: + typedef GridBuffer::DataBoxType DBoxType; + + FreqFunctor(void) + { + } + + template + FreqFunctor(T frequencies_handed) + { + this->frequencies_dev = frequencies_handed->getDeviceBuffer().getDataBox(); + this->frequencies_host = frequencies_handed->getHostBuffer().getDataBox(); + } + + HDINLINE float_X operator()(const unsigned int ID) + { + return (ID < frequencies::nOmega) ? frequencies_dev[ID] : 0.0; + } + + HINLINE float_X get(const unsigned int ID) + { + return (ID < frequencies::nOmega) ? frequencies_host[ID] : 0.0; + } + + private: + DBoxType frequencies_dev; + DBoxType frequencies_host; + }; // FreqFunctor + + + class InitFreqFunctor + { + public: + InitFreqFunctor(void) + { + } + + ~InitFreqFunctor(void) + { + __delete(frequencyBuffer); + } + + typedef GridBuffer::DataBoxType DBoxType; + + HINLINE void Init(const std::string path) + { + frequencyBuffer = new GridBuffer(DataSpace(nOmega)); + + + DBoxType frequencyDB = frequencyBuffer->getHostBuffer().getDataBox(); + + std::ifstream freqListFile(path.c_str()); + unsigned int i; + + printf("freq: %s\n", path.c_str()); + + if(!freqListFile) + { + throw std::runtime_error( + std::string("The radiation-frequency-file ") + path + + std::string(" could not be found.\n")); + } + + + for(i = 0; i < nOmega && !freqListFile.eof(); ++i) + { + freqListFile >> frequencyDB[i]; + // verbose output of loaded frequencies if verbose level PHYSICS is set: + log("freq: %1% \t %2%") % i + % frequencyDB[i]; + frequencyDB[i] *= UNIT_TIME; + } + + if(i != nOmega) + { + throw std::runtime_error(std::string("The number of frequencies in the list and the " + "number of frequencies in the parameters differ.\n")); + } + + frequencyBuffer->hostToDevice(); + } + + FreqFunctor getFunctor(void) + { + return FreqFunctor(frequencyBuffer); + } + + private: + GridBuffer* frequencyBuffer = nullptr; + }; // InitFreqFunctor + + //! @return frequency params as string + HINLINE + std::string getParameters() + { + std::string params = std::string("list\t"); + params += std::string(listLocation) + std::string("\t"); + return params; + } + + } // namespace listFrequencies + } // namespace transitionRadiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/transitionRadiation/frequencies/LogFrequencies.hpp b/include/picongpu/plugins/transitionRadiation/frequencies/LogFrequencies.hpp index c669aba896..2f6ac9d5c9 100644 --- a/include/picongpu/plugins/transitionRadiation/frequencies/LogFrequencies.hpp +++ b/include/picongpu/plugins/transitionRadiation/frequencies/LogFrequencies.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, Finn-Ole Carstens +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, Finn-Ole Carstens * * This file is part of PIConGPU. * @@ -24,67 +24,68 @@ namespace picongpu { -namespace plugins -{ -namespace transitionRadiation -{ -namespace logFrequencies -{ - class FreqFunctor + namespace plugins { - public: - FreqFunctor( void ) + namespace transitionRadiation { - omega_log_min = math::log( omegaMin ); - delta_omega_log = ( math::log( omegaMax ) - omega_log_min ) / float_X( nOmega - 1 ); - } + namespace logFrequencies + { + class FreqFunctor + { + public: + FreqFunctor(void) + { + omega_log_min = math::log(omegaMin); + delta_omega_log = (math::log(omegaMax) - omega_log_min) / float_X(nOmega - 1); + } - HDINLINE float_X operator( )( const int ID ) - { - return math::exp( omega_log_min + ( float_X( ID ) ) * delta_omega_log ); - } + HDINLINE float_X operator()(const int ID) + { + return math::exp(omega_log_min + (float_X(ID)) * delta_omega_log); + } - HINLINE float_X get( const int ID ) - { - return operator( )( ID ); - } + HINLINE float_X get(const int ID) + { + return operator()(ID); + } - private: - float_X omega_log_min; - float_X delta_omega_log; - }; // FreqFunctor + private: + float_X omega_log_min; + float_X delta_omega_log; + }; // FreqFunctor - class InitFreqFunctor - { - public: - InitFreqFunctor( void ) - { } + class InitFreqFunctor + { + public: + InitFreqFunctor(void) + { + } - HINLINE void Init( const std::string path ) - { } + HINLINE void Init(const std::string path) + { + } - HINLINE FreqFunctor getFunctor( void ) - { - return FreqFunctor( ); - } - }; // InitFreqFunctor + HINLINE FreqFunctor getFunctor(void) + { + return FreqFunctor(); + } + }; // InitFreqFunctor - //! @return frequency params as string - HINLINE - std::string - getParameters( void ) - { - std::string params = std::string( "log\t" ); - params += std::to_string( nOmega ) + "\t"; - params += std::to_string( SI::omegaMin ) + "\t"; - params += std::to_string( SI::omegaMax ) + "\t"; - return params; - } + //! @return frequency params as string + HINLINE + std::string getParameters(void) + { + std::string params = std::string("log\t"); + params += std::to_string(nOmega) + "\t"; + params += std::to_string(SI::omegaMin) + "\t"; + params += std::to_string(SI::omegaMax) + "\t"; + return params; + } -} // namespace logFrequencies -} // namespace transitionRadiation -} // namespace plugins + } // namespace logFrequencies + } // namespace transitionRadiation + } // namespace plugins } // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/DetermineElectronDensitySolver.hpp b/include/picongpu/plugins/xrayScattering/DetermineElectronDensitySolver.hpp new file mode 100644 index 0000000000..626962b6c1 --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/DetermineElectronDensitySolver.hpp @@ -0,0 +1,65 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include +#include "picongpu/particles/particleToGrid/derivedAttributes/DerivedAttributes.def" + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + using namespace particles::particleToGrid; + + template + struct IsIon + { + using FrameType = typename T_ParticleType::FrameType; + using type = typename pmacc::traits::HasFlag::type; + }; + + + /** Chose an electron density solver for a given particle type. + * + * Switches between a bound electron number density solver for particles + * with the boundElectrons attribute (ions) and a particle number density + * solver for other particle types (electrons). + * + * @tparam T_ParticleType Scattering particles + * @return ::type TmpField solver to be used + */ + template + struct DetermineElectronDensitySolver + { + using IonSolver = + typename CreateFieldTmpOperation_t:: + Solver; + + using ElectronSolver = + typename CreateFieldTmpOperation_t::Solver; + + using type = + typename boost::mpl::if_::type, IonSolver, ElectronSolver>::type; + }; + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/GetScatteringVector.hpp b/include/picongpu/plugins/xrayScattering/GetScatteringVector.hpp new file mode 100644 index 0000000000..d9fcc8bc2e --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/GetScatteringVector.hpp @@ -0,0 +1,74 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + //! Maps a 1D output array index to the corresponding point in the q-space. + struct GetScatteringVector + { + /** + * @param q_min Begin of the output range for all axis. + * @param q_max End of the output range for all axis. + * @param q_step Output array grid spacing. + * @param numVectors The output array size. + * @param iterOffset Offset for an index shift. + */ + HDINLINE GetScatteringVector( + float2_X const q_min, + float2_X const q_max, + float2_X const q_step, + DataSpace const numVectors, + uint32_t const iterOffset) + : m_q_min(q_min) + , m_q_max(q_max) + , m_q_step(q_step) + , m_numVectors(numVectors) + , m_iterOffset(iterOffset) + { + } + + HDINLINE float2_X operator[](const uint32_t& idx) + { + const uint32_t totalIdx = idx + m_iterOffset; + uint32_t i_y(totalIdx % m_numVectors.y()); + uint32_t i_x(totalIdx / m_numVectors.y()); + + return m_q_min + m_q_step * float2_X(i_x, i_y); + } + + private: + // Pmacc struct members memory alignment for objects stored on devices. + PMACC_ALIGN(m_q_min, const float2_X); + PMACC_ALIGN(m_q_max, const float2_X); + PMACC_ALIGN(m_q_step, const float2_X); + PMACC_ALIGN(m_numVectors, const DataSpace); + PMACC_ALIGN(m_iterOffset, const uint32_t); + }; + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/XrayScattering.hpp b/include/picongpu/plugins/xrayScattering/XrayScattering.hpp new file mode 100644 index 0000000000..9bbcf08892 --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/XrayScattering.hpp @@ -0,0 +1,691 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch, + * Klaus Steiniger, Felix Schmitt, Benjamin Worpitz, + * Juncheng E, Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/particles/traits/SpeciesEligibleForSolver.hpp" +#include "picongpu/plugins/ISimulationPlugin.hpp" +#include "picongpu/plugins/common/stringHelpers.hpp" + +#include "picongpu/fields/FieldTmp.hpp" +#include "picongpu/param/xrayScattering.param" +#include "picongpu/plugins/xrayScattering/beam/XrayScatteringBeam.hpp" +#include "picongpu/plugins/xrayScattering/XrayScattering.kernel" +#include "picongpu/plugins/xrayScattering/XrayScatteringWriter.hpp" +#include "picongpu/plugins/xrayScattering/xrayScatteringUtilities.hpp" +#include "picongpu/plugins/xrayScattering/GetScatteringVector.hpp" +#include "picongpu/plugins/xrayScattering/DetermineElectronDensitySolver.hpp" +#include "picongpu/particles/particleToGrid/derivedAttributes/Density.def" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + using namespace pmacc; + using namespace picongpu::SI; + namespace po = boost::program_options; + using complex_X = pmacc::math::Complex; + + + /** xrayScattering plugin + * This plugin simulates the SAXS scattering amplitude + * from the particles number density. + * + * @tparam T_ParticlesType Scatterers + **/ + template + class XrayScattering : public ISimulationPlugin + { + private: + using SuperCellSize = MappingDesc::SuperCellSize; + + MappingDesc cellDescription; + uint32_t currentStep; + + //! Probing beam characterization + std::unique_ptr probingBeam; + + // memory: + using ComplexBuffer = GridBuffer; + std::unique_ptr amplitude; + // Needed as long as opePMD-api doesn't support complex values: + //! Storage for amplitude real part used when dumping data + std::vector realPart; + //! Storage for amplitude imaginary part used when dumping data + std::vector imgPart; + // Used only in the distributed mode: + //! Storage for receiving amplitude data from another node + std::vector amplitudeReceive; + //! Number of scattering vectors on initialy last rank + uint64_t resOfVectors; + // Used only in the mirrored mode: + std::vector amplitudeMaster; + + // Variables for plugin options: + std::string notifyPeriod; + std::string speciesName; + std::string pluginName; + std::string pluginPrefix; + std::string fileName; + std::string fileExtension; + std::string compressionMethod; + std::string outputPeriod_s; + std::string memoryLayout; + //! Plugin functioning mode + OutputMemoryLayout outputLayout; + //! Time steps at which the output is dumped + using SeqOfTimeSlices = std::vector; + SeqOfTimeSlices outputPeriod; + + /** Range of scattering vector + * The scattering vector here is defined as + * 4*pi*sin(theta)/lambda, where 2 * theta is the angle between the + * incoming k-vector and the scattered one. + * See the definition in this paper https://doi.org/10.1063/1.5008289. + **/ + float2_X q_min, q_max, q_step; + //! Number of scattering vectors + DataSpace numVectors; + + uint32_t totalSimulationCells; + + // Needed to handle the parallelization over multiple hosts. + bool isMaster; + uint32_t mpiRank; + //! Total number of nodes + uint32_t countRanks; + //! Number of Times the distributed output was passed along + uint32_t accumulatedRotations; + mpi::MPIReduce reduce; + + //! Output writer + std::unique_ptr> dataWriter; + + + public: + //! XrayScattering object initializer. + XrayScattering() + : pluginName("xrayScattering: Calculate the SAXS scattering intensity of a " + "species.") + , speciesName(T_ParticlesType::FrameType::getName()) + , pluginPrefix(speciesName + std::string("_xrayScattering")) + , + // this is bodged so it passes the verification at + // MappingDescription.hpp:79 + cellDescription(DataSpace(SuperCellSize::toRT())) + , isMaster(false) + , currentStep(0) + , accumulatedRotations(0) + { + Environment<>::get().PluginConnector().registerPlugin(this); + } + + //! XrayScattering object destructor. + ~XrayScattering() override + { + } + + + //! Adds command line options and their descriptions. + void pluginRegisterHelp(po::options_description& desc) override + { + desc.add_options()( + (pluginPrefix + ".period").c_str(), + po::value(¬ifyPeriod), + "enable plugin [for each n-th step]")( + (pluginPrefix + ".outputPeriod").c_str(), + po::value(&outputPeriod_s)->default_value("1"), + "dump amplitude [for each n-th step]")( + (pluginPrefix + ".qx_max").c_str(), + po::value(&q_max[0])->default_value(5), + "reciprocal space range qx_max (A^-1)")( + (pluginPrefix + ".qy_max").c_str(), + po::value(&q_max[1])->default_value(5), + "reciprocal space range qy_max (A^-1)")( + (pluginPrefix + ".qx_min").c_str(), + po::value(&q_min[0])->default_value(-5), + "reciprocal space range qx_min (A^-1)")( + (pluginPrefix + ".qy_min").c_str(), + po::value(&q_min[1])->default_value(-5), + "reciprocal space range qy_min (A^-1)")( + (pluginPrefix + ".n_qx").c_str(), + po::value(&numVectors[0])->default_value(100), + "number of qx")( + (pluginPrefix + ".n_qy").c_str(), + po::value(&numVectors[1])->default_value(100), + "number of qy")( + (pluginPrefix + ".file").c_str(), + po::value(&fileName)->default_value(pluginName + "Output"), + "output file name")( + (pluginPrefix + ".ext").c_str(), + po::value(&fileExtension)->default_value("bp"), + "openPMD filename extension (this controls the backend " + "picked by the openPMD API)")( + (pluginPrefix + ".compression").c_str(), + po::value(&compressionMethod)->default_value(""), + "Backend-specific openPMD compression method, e.g., zlib " + "(see `adios_config -m` for help)")( + (pluginPrefix + ".memoryLayout").c_str(), + po::value(&memoryLayout)->default_value("mirror"), + "Possible values: 'mirror' and 'distribute'" + "Output can be mirrored on all Host+Device pairs or" + " uniformly distributed over all nodes. Distribute can be used " + "when the output array is to big to store the complete " + "computed q-space on one device."); + } + + + //! Get plugin name. + std::string pluginGetName() const override + { + return pluginName; + } + + + //! Sets Mapping description for the xrayScattering plugin. + void setMappingDescription(MappingDesc* cellDescriptionLoc) override + { + cellDescription = *cellDescriptionLoc; + } + + + void restart(uint32_t timeStep, const std::string restartDirectory) override + { + log("XrayScattering : restart not" + "yet implemented - start with zero values"); + // TODO: Support for restarting. + } + + + void checkpoint(uint32_t timeStep, const std::string restartDirectory) override + { + log("XrayScattering : checkpoint not" + "yet implemented - nothing was saved"); + + // TODO: Support for restarting. + } + + + private: + //! Prepare the plugin in the simulation initialization phase. + void pluginLoad() override + { + if(!notifyPeriod.empty()) + { + /* Beam has to be initialized later as the domain sizes. + * The value retrieved by getDomainSize in + * CoordinateTransform.hpp is still set to (0,0,0) when the + * XrayScattering object is initialized. + */ + probingBeam = std::make_unique(); + // Set the steps at which the xrayScattering amplitude is + // calculated. + Environment<>::get().PluginConnector().setNotificationPeriod(this, notifyPeriod); + // Set the memory layout in use. + std::map layoutMap; + layoutMap["mirror"] = OutputMemoryLayout::Mirror; + layoutMap["distribute"] = OutputMemoryLayout::Distribute; + outputLayout = layoutMap.at(memoryLayout); + + GridController& gc = Environment::get().GridController(); + mpiRank = gc.getGlobalRank(); + isMaster = (mpiRank == 0); + + // Prepare amplitude buffer: + uint32_t bufferSize; + auto totalNumVectors = numVectors.productOfComponents(); + if(outputLayout == OutputMemoryLayout::Mirror) + { + // All vectors are stored on every node. + bufferSize = totalNumVectors; + // Initiate the additional amplitude storage for the reduce + // operation and initiate it with zeros. + amplitudeMaster.assign(totalNumVectors, complex_X(0.0)); + } + else + { + countRanks = gc.getGpuNodes().productOfComponents(); + // Number of scattering vectors in all but last chunk. + // (ceil integer division) + bufferSize = totalNumVectors / countRanks + ((totalNumVectors % countRanks) != 0); + // Number of scattering vectors on the last chunk. + resOfVectors = bufferSize - (bufferSize * countRanks - totalNumVectors); + // Initiate the additional amplitude storage for receiving + // data and initiate it with zeros. + amplitudeReceive.assign(bufferSize, complex_X(0.0)); + } + // Allocate amplitude buffer. + amplitude = std::make_unique(DataSpace(bufferSize)); + // Initialize, on device, its fields with zero. + amplitude->getDeviceBuffer().setValue(0.0); + + // Go to PIC unit system. + constexpr float_X invMeterToInvAngstrom = 1.0e10; + q_min = q_min * invMeterToInvAngstrom * UNIT_LENGTH; + q_max = q_max * invMeterToInvAngstrom * UNIT_LENGTH; + // Set the q-space grid spacing. + q_step = (q_max - q_min) / precisionCast(numVectors); + + // Rank 0 creates the output directory. + pmacc::Filesystem& fs = Environment::get().Filesystem(); + if(isMaster) + { + fs.createDirectory("xrayScatteringOutput"); + fs.setDirectoryPermissions("xrayScatteringOutput"); + } + + // Chose the solver for populating a TmpField with the electron + // density (either the species density or the bound electron + // density). + using ElectronDensitySolver = typename DetermineElectronDensitySolver::type; + // Output unit: + const float_64 amplitudeUnit + = static_cast(FieldTmp::getUnit()[0]) * CELL_WIDTH_SI + * CELL_HEIGHT_SI * CELL_DEPTH_SI * ELECTRON_RADIUS_SI; + + // Set the total number of cells in the simulation. + totalSimulationCells + = Environment::get().SubGrid().getGlobalDomain().size.productOfComponents(); + + // Initialize an object responsible for output writing. + dataWriter = std::make_unique>( + pluginPrefix + "Output", + fileExtension, + "xrayScatteringOutput", + outputLayout, + compressionMethod, + precisionCast(numVectors), + q_step, + amplitudeUnit, + totalSimulationCells); + // Set the output period. + outputPeriod = pluginSystem::toTimeSlice(outputPeriod_s); + } + } + + + void pluginUnload() override + { + } + + + //! Collect amplitude data from each CPU on the master node. + void collectIntensityOnMaster() + { + amplitude->deviceToHost(); + __getTransactionEvent().waitForFinished(); + + reduce( + nvidia::functors::Add(), + amplitudeMaster.data(), + amplitude->getHostBuffer().getBasePointer(), + amplitude->getHostBuffer().getCurrentSize(), + mpi::reduceMethods::Reduce()); + } + + + //! Calculates the offset to the the currently processed output chunk. + HINLINE uint32_t calcOffset(uint32_t const& step) const + { + /* Chunks move with every "rotation" from left to the right (from + * smaller to a higher rank). So after one rotation the rank n has + * the n-1 chunk( counted from 0). + * so: chunk = (rank - rotations) % countRanks + * to avoid a negative number in the modulo operation countRanks + * is added in the beginning and only totalRotations % countRanks + * is subtracted. + */ + uint32_t totalRotations = accumulatedRotations + step; + uint32_t chunk = mpiRank + countRanks; + chunk = ((chunk - (totalRotations % countRanks)) % countRanks); + return chunk * amplitude->getHostBuffer().getCurrentSize(); + } + + + //! Checks if this node hast the last output part. + HINLINE bool hasLastChunk(uint32_t const& step) const + { + uint32_t totalRotations = accumulatedRotations + step; + return mpiRank == (countRanks - 1 + totalRotations) % countRanks; + } + + + //! Writes amplitude data to disk. + HINLINE void writeOutput() + { + if(outputLayout == OutputMemoryLayout::Distribute) + { + amplitude->deviceToHost(); + __getTransactionEvent().waitForFinished(); + realPart = extractReal(amplitude->getHostBuffer()); + imgPart = extractImag(amplitude->getHostBuffer()); + + uint64_t offset = precisionCast(calcOffset(countRanks - 1)); + uint64_t extent; + if(hasLastChunk(countRanks - 1)) + extent = resOfVectors; + else + extent = amplitude->getHostBuffer().getCurrentSize(); + (*dataWriter)(currentStep, extent, offset, realPart, imgPart); + } + else + { + collectIntensityOnMaster(); + if(isMaster) + { + realPart = extractReal(amplitudeMaster); + imgPart = extractImag(amplitudeMaster); + (*dataWriter)(currentStep, realPart, imgPart); + } + // reset amplitudes back to zero + amplitudeMaster.assign(amplitudeMaster.size(), complex_X(0.0)); + } + } + + + /** Passes output chunks from one device to another. + * + * @param step Current step in the Loop over kernel runs, in the current + * simulation step. + */ + HINLINE void communicationOnStep(uint32_t const& step) + { + using namespace mpi; + // No action is necessary on the first step. + if(step == 0u) + return; + // Copy data calculated on GPU , on last step, to CPU memory. + amplitude->deviceToHost(); + // Avoid deadlock between not finished pmacc tasks and mpi blocking + // collectives. + __getTransactionEvent().waitForFinished(); + // MPI asynchronous send & receive: + int bytesToSend = sizeof(complex_X) / sizeof(char); + bytesToSend *= amplitude->getHostBuffer().getCurrentSize(); + + // An mpi request to monitor a non blocking send transaction. + GridController& gc = Environment::get().GridController(); + MPI_Request transactionRequest; + // Pass data to the next node. + MPI_CHECK(MPI_Isend( + amplitude->getHostBuffer().getBasePointer(), + bytesToSend, + MPI_BYTE, + (mpiRank + 1) % countRanks, + 0, + gc.getCommunicator().getMPIComm(), + &transactionRequest)); + // Receive from the proceeding node (blocking transaction). + int receiveFrom = (mpiRank == 0u) ? countRanks - 1 : mpiRank - 1; + MPI_CHECK(MPI_Recv( + amplitudeReceive.data(), + bytesToSend, + MPI_BYTE, + std::move(receiveFrom), + 0, + gc.getCommunicator().getMPIComm(), + MPI_STATUS_IGNORE)); + + // Wait for the send transaction to end. + MPI_Wait(&transactionRequest, MPI_STATUS_IGNORE); + // Copy the received data to the host buffer. + copyVectorToBuffer(amplitudeReceive, amplitude->getHostBuffer()); + // Copy the received data to the device so it can be used as + // output in this step. + amplitude->hostToDevice(); + } + + + /** Calculates a form factor number density of the species. + * + * @param dc data connector + * @param globalOffset offset from the global to the local domain + * @return data box containing the calculated data. + */ + HINLINE FieldTmp::DataBoxType calculateDensity(DataConnector& dc, DataSpace& globalOffset) + { + // Check if there is at least one unused field available. + PMACC_CASSERT_MSG(_please_allocate_at_least_one_FieldTmp_in_memory_param, fieldTmpNumSlots > 0); + // Get a field for density storage. + auto tmpField = dc.get(FieldTmp::getUniqueId(0), true); + // Initiate with zeros. + tmpField->getGridBuffer().getDeviceBuffer().setValue(FieldTmp::ValueType::create(0.0)); + // Chose species. + auto species = dc.get(T_ParticlesType::FrameType::getName(), true); + + // Chose the solver for populating a TmpField with the form factor + // density of the particles. + using ElectronDensitySolver = typename DetermineElectronDensitySolver::type; + // Calculate density. + tmpField->template computeValue(*species, currentStep); + // Release particle data. + dc.releaseData(T_ParticlesType::FrameType::getName()); + // Get the field data box. + FieldTmp::DataBoxType tmpFieldBox = tmpField->getGridBuffer().getDeviceBuffer().getDataBox(); + return tmpFieldBox; + } + + + /** Runs kernel when the output is distributed over nodes. + * + * A single kernel run adds result only to that output part which + * currently resides on the node. The output parts are passed along to + * the neighbouring node, in a circle. This repeats until every node has + * computed all scattering vectors. + * + * @param cellsGrid field grid, without GUARD, on one device + * @param fieldTmpNoGuard field data + * @param globalOffset offset from the global to the local domain + * @param numBlocks number of virtual blocks used in a kernel run + * @param fieldPos TmpField in cell position + */ + template + HINLINE void runKernelInDistributeMode( + DataSpace& cellsGrid, + FieldTmp::DataBoxType const& fieldTmpNoGuard, + DataSpace& globalOffset, + uint32_t const& numBlocks, + T_FieldPos const& fieldPos) + { + // The available number of virtual workers. + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + // Loop over kernel runs. + for(uint32_t step = 0; step < countRanks; step++) + { + uint32_t countVectors, iterOffset; + // Pass along the data. + communicationOnStep(step); + // 1D offset to the begin of the currently processed output + // part. + iterOffset = calcOffset(step); + // Define scattering vectors for the output part. + GetScatteringVector scatteringVectors{q_min, q_max, q_step, numVectors, iterOffset}; + // Handle possibly smaller amount of vectors to be processed + // in the last output part. + if(hasLastChunk(step)) + { + countVectors = resOfVectors; + } + else + countVectors = amplitude->getHostBuffer().getCurrentSize(); + // Start the kernel. + PMACC_KERNEL(KernelXrayScattering{}) + (numBlocks, numWorkers)( + cellsGrid, + fieldTmpNoGuard, + globalOffset, + fieldPos, + amplitude->getDeviceBuffer().getDataBox(), + countVectors, + scatteringVectors, + *probingBeam, + currentStep, + totalSimulationCells); + } + } + + /** Runs xrayScattering kernel when the output is mirrored over nodes. + * + * Kernel runs only once in a simulation time step and computes + * the complete output at once. + * + * @param cellsGrid field grid, without GUARD, on one device + * @param fieldTmpNoGuard field data + * @param globalOffset offset from the global to the local domain + * @param numBlocks number of virtual blocks used in a kernel run + * @param fieldPos TmpField in cell position + */ + template + HINLINE void runKernelInMirrorMode( + DataSpace& cellsGrid, + FieldTmp::DataBoxType const& fieldTmpNoGuard, + DataSpace& globalOffset, + uint32_t const& numBlocks, + T_FieldPos const& fieldPos) + { + // Get the available number of virtual workers. + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + // Define scattering vectors for the output part. + GetScatteringVector scatteringVectors{q_min, q_max, q_step, numVectors, 0}; + // Run the kernel. + PMACC_KERNEL(KernelXrayScattering{}) + (numBlocks, numWorkers)( + cellsGrid, + fieldTmpNoGuard, + globalOffset, + fieldPos, + amplitude->getDeviceBuffer().getDataBox(), + amplitude->getHostBuffer().getCurrentSize(), + scatteringVectors, + *probingBeam, + currentStep, + totalSimulationCells); + } + + + /** Actions performed on every step included in the notify period. + * + * First the form factor density is calculated then the Kernel is + * started. For steps in the output period, amplitude is written + * to disk. + * + * @param currentStep + **/ + HINLINE void notify(uint32_t currentStep) override + { + this->currentStep = currentStep; + + // Get the available number of virtual workers per block. + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + // Form factor density: + // Get the offset to the local domain (this HOST + DEVICE pair). + const SubGrid& subGrid = Environment::get().SubGrid(); + DataSpace globalOffset(subGrid.getLocalDomain().offset); + // Calculate the density and get a data box to access this TmpField. + DataConnector& dc = Environment<>::get().DataConnector(); + FieldTmp::DataBoxType tmpFieldBox = calculateDensity(dc, globalOffset); + // Get the in cell position of a TmpField. + // Could probably remove it as it is the cell origin in all cell + // types. + const picongpu::traits::FieldPosition fieldPos; + // Shift the density box to exclude the GUARD. + DataSpace guardingSC = cellDescription.getGuardingSuperCells(); + auto const fieldTmpNoGuard = tmpFieldBox.shift(guardingSC * SuperCellSize::toRT()); + // Get the field size on this rank (no GUARD). + DataSpace cellsGrid + = (cellDescription.getGridSuperCells() - 2 * guardingSC) * SuperCellSize::toRT(); + uint32_t const totalNumCells = cellsGrid.productOfComponents(); + // Get the number of, virtual, blocks. + PMACC_ASSERT(totalNumCells % numWorkers == 0); + uint32_t const numBlocks = totalNumCells / numWorkers; + + + // Run Kernel. + if(outputLayout == OutputMemoryLayout::Distribute) + { + runKernelInDistributeMode(cellsGrid, fieldTmpNoGuard, globalOffset, numBlocks, fieldPos); + } + else + { + runKernelInMirrorMode(cellsGrid, fieldTmpNoGuard, globalOffset, numBlocks, fieldPos); + } + // Release density data. + dc.releaseData(FieldTmp::getUniqueId(0)); + // Write to disk. + if(pluginSystem::containsStep(outputPeriod, currentStep)) + writeOutput(); + // Update the total number of rotations ( data passes ). + if(outputLayout == OutputMemoryLayout::Distribute) + accumulatedRotations += countRanks - 1; + } + }; + } // namespace xrayScattering + } // namespace plugins + namespace particles + { + namespace traits + { + template + struct SpeciesEligibleForSolver> + { + using FrameType = typename T_Species::FrameType; + + // This plugin needs at least the position and weighting. + using RequiredIdentifiers = MakeSeq_t, weighting>; + + using SpeciesHasIdentifiers = + typename pmacc::traits::HasIdentifiers::type; + + using type = SpeciesHasIdentifiers; + }; + + } // namespace traits + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/XrayScattering.kernel b/include/picongpu/plugins/xrayScattering/XrayScattering.kernel new file mode 100644 index 0000000000..e28212a8c8 --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/XrayScattering.kernel @@ -0,0 +1,163 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/plugins/xrayScattering/XrayScattering.hpp" + +#include +#include +#include +#include + +#include + + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + /** Kernel for xrayScattering calculation. + * + * @tparam T_numWorkers Number of virtual workers on block. + */ + template + struct KernelXrayScattering + { + /** Kernel function. + * + * @param acc alpaka accelerator + * @param cellsGrid Dimensions of BORDER + CORE in cells, not super + * cells. + * @param densityBoxGPU Data box of the density device storage, shifted + * to exclude 1st GUARD. + * @param globalOffset Offset from the global to the local domain. + * @param fieldPos TmpField in cell position. + * @param amplitudeBox Device side data box of the output Buffer. + * @param totalNumVectors Number of scattering vectors to process. + * @param scatteringVectors Scattering vectors to process. + * @param probingBeam Probing beam characterization. + * @param currentStep Current simulation step. + */ + template< + typename T_Acc, + typename T_DensityBoxGPU, + typename T_FieldPos, + typename T_DBox, + typename T_TotalNumVectors, + typename T_ScatteringVectors, + typename T_ProbingBeam> + DINLINE void operator()( + T_Acc const& acc, + DataSpace cellsGrid, + T_DensityBoxGPU densityBoxGPU, + DataSpace globalOffset, + T_FieldPos fieldPos, + T_DBox amplitudeBox, + T_TotalNumVectors const totalNumVectors, + T_ScatteringVectors scatteringVectors, + T_ProbingBeam probingBeam, + uint32_t currentStep, + uint32_t totalSimulationCells + + ) const + { + constexpr uint32_t blockSize + = pmacc::traits::GetNumWorkers::type::value>::value; + constexpr uint32_t numWorkers = T_numWorkers; + uint32_t const workerIdx = cupla::threadIdx(acc).x; + uint32_t const blockIdxLin = cupla::blockIdx(acc).x; + + using complex_X = pmacc::math::Complex; + using namespace pmacc::mappings::threads; + using namespace pmacc; + // Storage for positions in the beam coordinate system. + PMACC_SMEM(acc, positions, memory::Array); + // Storage for (form factor density * beam intensity factor). + PMACC_SMEM(acc, densities, memory::Array); + + uint32_t const linAccessBlockBegin = blockIdxLin * blockSize; + + ForEachIdx>{workerIdx}( + [&](uint32_t const linearIdx, uint32_t const) { + // Each thread reads one field value and saves it together + // with its position, in UNIT_LENGTH, in the global domain. + DataSpace const cellPosition( + DataSpaceOperations::map(cellsGrid, linearIdx + linAccessBlockBegin)); + + DataSpace const cellGlobalPosition(cellPosition + globalOffset); + floatD_X fieldGlobalPosition = precisionCast(cellGlobalPosition) + fieldPos()[0]; + fieldGlobalPosition *= cellSize.shrink(); + + float_X density = densityBoxGPU(cellPosition)[0]; + // Save the cell position in the beam coordinate system. + float3_X position_b = probingBeam.coordinateTransform(currentStep, fieldGlobalPosition); + float_X beamFactor = probingBeam(position_b); + // Store the cell value of the function that has to be + // Fourier transformed. + densities[linearIdx] = density * beamFactor / totalSimulationCells; + // Store position in the beam comoving system + // 3rd component is not needed anymore since q_z = 0. + positions[linearIdx] = position_b.shrink(); + + // Wait for all threads on the block to finish. + cupla::__syncthreads(acc); + + // Calculate the density fourier transform: + // Loop over q-vectors in frequency space: + // Each worker process every numWorkers vector. + for(uint32_t qLoopIdx = workerIdx; qLoopIdx < totalNumVectors; qLoopIdx += numWorkers) + { + float2_X q = scatteringVectors[qLoopIdx]; + complex_X amplitude(0.0); + // Loop over all previously loaded cells: + // This is a volume integral over the local domain + // in the beam coordinate system. + for(uint32_t rLoopIdx = 0; rLoopIdx < blockSize; rLoopIdx++) + { + float_X dotqr; + density = densities[rLoopIdx]; + float2_X position = positions[rLoopIdx]; + dotqr = pmacc::math::dot(position, q); + dotqr *= -1.0; + amplitude += pmacc::math::euler(density, dotqr); + } // end loop over positions + // Add the super cell contribution to the output. + // Avoid racing conditions between blocks. + cupla::atomicAdd( + acc, + &(amplitudeBox[qLoopIdx].get_real()), + amplitude.get_real(), + ::alpaka::hierarchy::Blocks{}); + cupla::atomicAdd( + acc, + &(amplitudeBox[qLoopIdx].get_imag()), + amplitude.get_imag(), + ::alpaka::hierarchy::Blocks{}); + } // end loop over scattering directions + } // end lambda function body + ); + } + }; + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/XrayScatteringWriter.hpp b/include/picongpu/plugins/xrayScattering/XrayScatteringWriter.hpp new file mode 100644 index 0000000000..f7f6573423 --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/XrayScatteringWriter.hpp @@ -0,0 +1,396 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + //! Specifies plugin functioning mode. Mirrored or chunked output possible. + enum class OutputMemoryLayout + { + Mirror, + Distribute + }; + + + //! Specifies complex number component. + enum class Component + { + Real, + Imag + }; + + + //! Maps a linear index to a 2D cell position vector. + HINLINE std::vector map2d(pmacc::math::Vector const& size, uint64_t pos) + { + auto const y(pos % size.y()); + auto const x(pos / size.y()); + return std::vector{x, y}; + } + + + //! Converts a pmacc Vector to an std::Vector. + template + HINLINE std::vector asStandardVector(pmacc::math::Vector const& vec) + { + std::vector res; + res.reserve(DIM); + for(unsigned i = 0; i < DIM; ++i) + { + res.push_back(vec[i]); + } + return res; + } + + + /** Output writer for the xrayScattering plugin. + * + * Handles either a serial, in the mirrored output mode, or a parallel, in + * the distributed (chunked) mode, data writing. Data is saved in the + * openPMD standard using the openPMD API. + * @tparam T_ValueType Type of the values stored in the output. + */ + template + struct XrayScatteringWriter + { + private: + //! A pointer to an openPMD API Series object + std::unique_ptr<::openPMD::Series> openPMDSeries; + //! MPI Communicator for the parallel data write + MPI_Comm mpiCommunicator; + std::string const fileName, fileExtension, dir; + std::string const compressionMethod; + //! Functioning mode + OutputMemoryLayout outputMemoryLayout; + //! Output dimensions + pmacc::math::UInt64 const globalExtent; + //! OpenPMD type specifier for the ValueType + ::openPMD::Datatype datatype; + //! Output SI unit + const float_64 unit; + //! GridSpacing + float2_X const gridSpacing; + + + public: + /** Initializes a XrayScatteringWriter object. + * + * @param fileName Output file name, without the extensions. + * @param fileExtension File extension, specifies the API backend. + * @param dir Where to save the output file. + * @param outputMemoryLayout Functioning mode. + * @param compressionMethod + * @param globalExtent Output dimensions. + */ + HINLINE XrayScatteringWriter( + std::string const fileName, + std::string const fileExtension, + std::string const dir, + OutputMemoryLayout outputMemoryLayout, + std::string const compressionMethod, + pmacc::math::UInt64 const globalExtent, + float2_X const gridSpacing, + float_64 const unit, + uint32_t const totalSimulationCells) + : fileName(fileName) + , dir(dir) + , fileExtension(fileExtension) + , outputMemoryLayout(outputMemoryLayout) + , compressionMethod(compressionMethod) + , globalExtent(globalExtent) + , gridSpacing(gridSpacing) + , unit(unit) + { + if(outputMemoryLayout == OutputMemoryLayout::Distribute) + { + // Set the MPI communicator. + GridController& gc = Environment::get().GridController(); + __getTransactionEvent().waitForFinished(); + mpiCommunicator = MPI_COMM_NULL; + MPI_CHECK(MPI_Comm_dup(gc.getCommunicator().getMPIComm(), &mpiCommunicator)); + } + + datatype = ::openPMD::determineDatatype(); + // Create the output file. + openSeries(::openPMD::Access::CREATE); + openPMDSeries->setMeshesPath("scatteringData"); + openPMDSeries->setAttribute("totalSimulationCells", totalSimulationCells); + closeSeries(); + } + + virtual ~XrayScatteringWriter() + { + if(outputMemoryLayout == OutputMemoryLayout::Distribute) + { + if(mpiCommunicator != MPI_COMM_NULL) + { + // avoid deadlock between not finished pmacc tasks and mpi + // blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK_NO_EXCEPT(MPI_Comm_free(&(mpiCommunicator))); + } + } + } + + private: + HINLINE bool isADIOS1() const + { +#if openPMD_HAVE_ADIOS1 && !openPMD_HAVE_ADIOS2 + return this->fileExtension == "bp"; +#else + return false; +#endif + } + + /** Opens an openPMD Series in a given access mode. + * + * @param at OpenPMD API access type. + */ + HINLINE void openSeries(::openPMD::Access at) + { + if(!openPMDSeries) + { + std::string fullName = dir + '/' + fileName + "." + fileExtension; + log("XrayScatteringWriter: Opening file: %1%") % fullName; + + if(outputMemoryLayout == OutputMemoryLayout::Distribute) + { + // Open a series for a parallel write. + openPMDSeries = std::make_unique<::openPMD::Series>(fullName, at, mpiCommunicator); + } + else + { + // Open a series for a serial write. + openPMDSeries = std::make_unique<::openPMD::Series>(fullName, at); + } + + log("XrayScatteringWriter: Successfully opened file: %1%") % fullName; + } + else + { + throw std::runtime_error("XrayScatteringWriter: Tried opening a Series while old " + "Series was still active."); + } + } + + HINLINE void closeSeries() + { + if(openPMDSeries) + { + log("XrayScatteringWriter: Closing " + "file: %1%") + % fileName; + openPMDSeries.reset(); + if(outputMemoryLayout == OutputMemoryLayout::Distribute) + { + MPI_Barrier(mpiCommunicator); + } + log("XrayScatteringWriter: successfully closed file: %1%") % fileName; + } + else + { + throw std::runtime_error("XrayScatteringWriter: Tried closing a Series that was not" + " active."); + } + } + + + /** Prepare an openPMD mesh for the amplitude. + * @param currentStep + */ + HINLINE ::openPMD::Mesh prepareMesh(uint32_t const currentStep) + { + ::openPMD::Iteration iteration = openPMDSeries->iterations[currentStep]; + ::openPMD::Mesh mesh = iteration.meshes["amplitude"]; + mesh.setGridSpacing(asStandardVector(gridSpacing)); + // 1/angstrom to 1/meter conversion + mesh.setGridUnitSI(1e10); + mesh.setAxisLabels(std::vector{"q_x", "q_y"}); + return mesh; + } + + + /** + * @param currentStep + * @param component Component to write, either real or imaginary + */ + HINLINE ::openPMD::MeshRecordComponent prepareMRC(Component component, ::openPMD::Mesh& mesh) + { + const std::string name_lookup_tpl[] = {"x", "y"}; + ::openPMD::MeshRecordComponent mrc = mesh[name_lookup_tpl[static_cast(component)]]; + + std::vector shape = asStandardVector(globalExtent); + ::openPMD::Dataset dataset{datatype, std::move(shape)}; + + if(isADIOS1()) + { + dataset.transform = compressionMethod; + } + else + { + dataset.compression = compressionMethod; + } + mrc.resetDataset(std::move(dataset)); + mrc.setUnitSI(unit); + return mrc; + } + + public: + /** Write complex numbers to the whole output array. + * + * @param currentStep Current simulation step. + * @param realVec Vector containing the real parts of the complex + * numbers. + * @param imagVec Vector containing the imaginary parts of the + * complex numbers. + */ + HINLINE void operator()( + uint32_t const currentStep, + std::vector& realVec, + std::vector& imagVec) + { + openSeries(::openPMD::Access::READ_WRITE); + + ::openPMD::Mesh mesh = prepareMesh(currentStep); + ::openPMD::MeshRecordComponent mrc_real = prepareMRC(Component::Real, mesh); + ::openPMD::MeshRecordComponent mrc_imag = prepareMRC(Component::Imag, mesh); + + + mrc_real.storeChunk( + ::openPMD::shareRaw(&realVec[0]), + ::openPMD::Offset(DIM2, 0u), + asStandardVector(globalExtent)); + mrc_imag.storeChunk( + ::openPMD::shareRaw(&imagVec[0]), + ::openPMD::Offset(DIM2, 0u), + asStandardVector(globalExtent)); + openPMDSeries->flush(); + + // Avoid deadlock between not finished pmacc tasks and mpi calls in + // openPMD. + __getTransactionEvent().waitForFinished(); + // Close openPMD Series, most likely the actual write point. + closeSeries(); + } + + + /** Write complex numbers to a part of the output array. + * + * @param currentStep Current simulation step. + * @param extent1D The length of the contiguous part of the output + * that is the write destination (1D access). + * @param offset1D The linear (1D access) offset to the first datum + * in the write destination. + * @param realVec Vector containing the real parts of the complex + * numbers. + * @param imagVec Vector containing the imaginary parts of the + * complex numbers. + */ + HINLINE void operator()( + uint32_t const currentStep, + uint64_t extent1D, + uint64_t offset1D, + std::vector& realVec, + std::vector& imagVec) + { + openSeries(::openPMD::Access::READ_WRITE); + + // Get openPMD mesh record components for the real and imaginary + // parts. + ::openPMD::Mesh mesh = prepareMesh(currentStep); + ::openPMD::MeshRecordComponent mrc_real = prepareMRC(Component::Real, mesh); + ::openPMD::MeshRecordComponent mrc_imag = prepareMRC(Component::Imag, mesh); + + // Register chunks to write: + // Since the extent1D and offset1D are indices used in a linear + // access to the array (along last axis, C-order), they don't always + // describe a rectangle in the 2D output space. For that reason it + // is in general not possible to specify the write extend with a + // 2D vector as it is required by the API. Here the output + // destination is split into 3 parts. Two, not full, rows one at + // the begining and one ad the end of the chunk and a rectangular + // chunk in between. + // + + std::vector offset(2); + std::vector extent(2); + // First line. + // Map the beginning of the output chunk. + offset = map2d(globalExtent, offset1D); + + // The first line has not always the maximum possible length. + uint64_t firstLineLength = globalExtent[1] - offset[1]; + // Set the extent vector. + extent = std::vector{1, firstLineLength}; + // Register chunks for imag and real components. + mrc_real.storeChunk(::openPMD::shareRaw(&realVec[0]), offset, extent); + mrc_imag.storeChunk(::openPMD::shareRaw(&imagVec[0]), offset, extent); + + // Middle chunk. + // These lines have the full length. + uint64_t numFullLines = (extent1D - firstLineLength) / globalExtent[1]; + extent[0] = numFullLines; + extent[1] = globalExtent[1]; + // Offset to the middle chunk. + uint64_t localOffset = firstLineLength; + offset = map2d(globalExtent, offset1D + localOffset); + // Register the middle chunk. + mrc_real.storeChunk(::openPMD::shareRaw(&realVec[localOffset]), offset, extent); + mrc_imag.storeChunk(::openPMD::shareRaw(&imagVec[localOffset]), offset, extent); + + // Last line: + // Find out the length of the last line in the 1D chunk. + uint64_t lastLineLength((extent1D - firstLineLength - numFullLines * globalExtent[1])); + if(lastLineLength != 0) + { + localOffset = firstLineLength + numFullLines * globalExtent[1]; + offset = map2d(globalExtent, offset1D + localOffset); + extent[0] = 1; + extent[1] = lastLineLength; + mrc_real.storeChunk(::openPMD::shareRaw(&realVec[localOffset]), offset, extent); + mrc_imag.storeChunk(::openPMD::shareRaw(&imagVec[localOffset]), offset, extent); + } + openPMDSeries->flush(); + // Avoid deadlock between not finished pmacc tasks and mpi calls in + // openPMD. + __getTransactionEvent().waitForFinished(); + // Close the openPMD Series, most likely the actual write point. + closeSeries(); + } + }; + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/beam/AxisSwap.hpp b/include/picongpu/plugins/xrayScattering/beam/AxisSwap.hpp new file mode 100644 index 0000000000..d4fed2e57b --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/beam/AxisSwap.hpp @@ -0,0 +1,71 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + namespace beam + { + /** Swaps vector axes and multiplies the result with an integer vector. + * + * When the integer vector contains only 1 and -1 values, this swap + * correspond to a vector rotation that consists only out of right angle + * subrotations. + * + * @tparam axis0 Which old axis (0,1 or 2) is the new first axis (0). + * @tparam axis1 Which old axis (0,1 or 2) is the new second axis (1). + * @tparam axis2 Which old axis (0,1 or 2) is the new third axis (2). + * @tparam a0 Integer vector first component. + * @tparam a1 Integer vector second component. + * @tparam a2 Integer vector third component. + */ + template + struct AxisSwap + { + //! Performs the axis swap and the multiplication. + static HDINLINE float3_X rotate(float3_X const& vec) + { + return float3_X(a0 * vec[axis0], a1 * vec[axis1], a2 * vec[axis2]); + } + + //! Performs the reversed operation (back rotation). + static HDINLINE float3_X reverse(float3_X const& vec) + { + PMACC_ASSERT(a0 != 0); + PMACC_ASSERT(a1 != 0); + PMACC_ASSERT(a2 != 0); + + float3_X result; + result[axis0] = vec[0] / a0; + result[axis1] = vec[1] / a1; + result[axis2] = vec[2] / a2; + return result; + } + }; + } // namespace beam + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/beam/CoordinateTransform.hpp b/include/picongpu/plugins/xrayScattering/beam/CoordinateTransform.hpp new file mode 100644 index 0000000000..806672a776 --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/beam/CoordinateTransform.hpp @@ -0,0 +1,146 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/plugins/xrayScattering/beam/Side.hpp" +#include "picongpu/plugins/xrayScattering/beam/SecondaryRotation.hpp" +#include "picongpu/param/xrayScattering.param" + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + namespace beam + { + //! Get the global domain size as a 3D vector in 3D and 2D simulations. + template + HINLINE float3_X getDomainSize(); + + // For 3D simulations: + template<> + HINLINE float3_X getDomainSize() + { + DataSpace globalDomainSize = Environment::get().SubGrid().getGlobalDomain().size; + return precisionCast(globalDomainSize); + } + + // For 2D simulations: + template<> + HINLINE float3_X getDomainSize() + { + auto globalDomainSize = Environment::get().SubGrid().getGlobalDomain().size; + return float3_X(globalDomainSize[0], globalDomainSize[1], 0.0); + } + + + /** Defines a coordinate transform from the PIC system into the beam system. + * + * @tparam T_Side Side from which the probing beam is shot at the target. + * @tparam T_SecondaryRotation Rotation of the beam propagation direction. + */ + template + struct CoordinateTransform + { + using Side = T_Side; + using SecondaryRotation = T_SecondaryRotation; + + + HINLINE CoordinateTransform() + { + // TODO: Fix the translation in the coordinate transform. The + // position in the beam system is wrongly calculated. + // Orientation is correct. + /* + using namespace picongpu::plugins::xrayScattering::beam; + // Find the coordinate system translation: + // Starting in the beam coordinate system. + // Transverse(to the beam propagation direction) offset from the + // initial position (the middle of the simulation box side). + float2_X offsetTrans_b + { + BEAM_OFFSET[ 0 ] / UNIT_LENGTH, + BEAM_OFFSET[ 1 ] / UNIT_LENGTH + }; + // Offset along the propagation direction, defined by the beam + // delay. + float_X offsetParallel_b = beamDelay_SI / UNIT_TIME * + SPEED_OF_LIGHT; + // Complete offset from the initial position. + float3_X offsetFromMiddlePoint_b + { + offsetTrans_b[ 0 ], + offsetTrans_b[ 1 ], + -1 * offsetParallel_b + }; + + // Move to the PIC coordinate system. + offsetFromMiddlePoint_b = SecondaryRotation::ReverseOperation:: + rotate( offsetFromMiddlePoint_b ); + float3_X offsetFromMiddlePoint_s = Side::FirstRotation::reverse( + offsetFromMiddlePoint_b ); + + // Find the initial position in the PIC coordinate system. + float3_X toMiddlePoint_s = cellSize * getDomainSize< simDim >( ); + for ( uint32_t ii = 0; ii < 3; ii++ ) + { + toMiddlePoint_s[ ii ] *= Side::beamStartPosition[ ii ]; + } + // Combine both translations. + translationVector_s = toMiddlePoint_s + offsetFromMiddlePoint_s; + */ + } + + + /** Transforms a vector from the PIC system to the beam comoving system. + * + * @param currentStep Current simulation step. + * @param position_s A 3D vector in the PIC coordinate system. + */ + HDINLINE float3_X operator()(uint32_t const& currentStep, float3_X const& position_s) + { + // TODO: Uncomment after fixing the translation. + float3_X result = position_s; /* - translationVector_s; + result[ 2 ] -= currentStep * DELTA_T * SPEED_OF_LIGHT; + */ + result = Side::FirstRotation::rotate(result); + result = SecondaryRotation::rotate(result); + return result; + } + + + //! Wrapper for 2D vectors. + HDINLINE float3_X operator()(uint32_t const& currentStep, float2_X const& position_s) + { + float3_X pos{position_s[0], position_s[1], 0.0}; + return (*this)(currentStep, std::move(pos)); + } + + + private: + // TODO: Uncomment after fixing the translation. + // PMACC_ALIGN( translationVector_s, float3_X ); + }; + } // namespace beam + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/beam/ProbingBeam.hpp b/include/picongpu/plugins/xrayScattering/beam/ProbingBeam.hpp new file mode 100644 index 0000000000..970199d2af --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/beam/ProbingBeam.hpp @@ -0,0 +1,67 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/plugins/xrayScattering/beam/CoordinateTransform.hpp" + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + namespace beam + { + /** Defines the probing beam characteristic. + * + * @tparam T_BeamProfile Beam transverse profile. + * @tparam T_BeamShape Beam temporal shape. + * @tparam T_CoordinateTransform Coordinate transform from the pic + * coordinate system to the beam coordinate system. + */ + template + struct ProbingBeam + { + using BeamProfile = T_BeamProfile; + using BeamShape = T_BeamShape; + PMACC_ALIGN(coordinateTransform, T_CoordinateTransform); + + HINLINE ProbingBeam() : coordinateTransform(){}; + + /** Calculates the probing amplitude at a given position. + * @param position_b Position in the beam comoving coordinate system + * (x, y, z__at_t_0 - c*t). + * @returns Probing wave amplitude scaling at position_b. + */ + HDINLINE float_X operator()(float3_X const& position_b) + { + float_X profileFactor = BeamProfile::getFactor(position_b[0], position_b[1]); + + float_X beamTime = position_b[2] / SPEED_OF_LIGHT; + float_X shapeFactor = BeamShape::getFactor(beamTime); + + return profileFactor * shapeFactor; + } + }; + } // namespace beam + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/beam/SecondaryRotation.hpp b/include/picongpu/plugins/xrayScattering/beam/SecondaryRotation.hpp new file mode 100644 index 0000000000..cd1a2c0f3f --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/beam/SecondaryRotation.hpp @@ -0,0 +1,101 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" + +#include + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + namespace beam + { + /** Defines a coordinate system rotation. + * + * The whole rotation consists of two rotations --- first by the yaw angle + * and then by a the pitch angle. + * + * @tparam T_ParamClass Param class defining the angles. + */ + template + struct SecondaryRotation : T_ParamClass + { + using Params = T_ParamClass; + struct ReversedAngles + { + static constexpr float_X yawAngle = -1.0_X * Params::yawAngle; + static constexpr float_X pitchAngle = -1.0_X * Params::pitchAngle; + } reversedAngles; + + using ReverseOperation = SecondaryRotation; + + private: + static constexpr float_X xAngle = Params::yawAngle; + static constexpr float_X yAngle = Params::pitchAngle; + + //! X axis rotation (yaw angle). + static HDINLINE void xRotation(float3_X& vec) + { + /* A coordinate change for a vector is equal to the inverse + * of its basis transform. When the beam is rotated its coordinate + * system rotates as well. So the coordinate transfer to such + * a rotated basis is just a rotation by the opposite angle. + */ + float_X cos; + float_X sin; + pmacc::math::sincos(-1.0_X * xAngle, sin, cos); + float_X y = vec[1] * cos - vec[2] * sin; + float_X z = vec[1] * sin + vec[2] * cos; + vec[1] = y; + vec[2] = z; + } + + + //! Y axis rotation (pitch angle). + static HDINLINE void yRotation(float3_X& vec) + { + float_X cos; + float_X sin; + pmacc::math::sincos(-1.0_X * yAngle, sin, cos); + float_X x = vec[0] * cos + vec[2] * sin; + float_X z = -1.0_X * vec[0] * sin + vec[2] * cos; + vec[0] = x; + vec[2] = z; + } + + public: + //! Coordinate transform into the rotated coordinate system. + static HDINLINE float3_X rotate(float3_X const& vec) + { + float3_X result = vec; + yRotation(result); + xRotation(result); + return result; + } + }; + + } // namespace beam + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/beam/Side.hpp b/include/picongpu/plugins/xrayScattering/beam/Side.hpp new file mode 100644 index 0000000000..94a5a73ef3 --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/beam/Side.hpp @@ -0,0 +1,103 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/plugins/xrayScattering/beam/AxisSwap.hpp" + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + namespace beam + { + /* This file defines the possible base beam orientations. + * + * Example: X Side + * The beam propagates along the x axis ( PIC coordinate system). + * The base position of the beam coordinate system (0,0,0) point it + * the beam system is placed at in the middle of the x_PIC=0 plane. + * That is at (0, 0.5 * Y, 0.5 * Z), where Y and Z are the lengths of + * the simulation box sides along y_PIC and z_PIC axes. + * Therefore beamStartPosition= ( 0.0, 0.5, 0.5 ) for the XSide. + * + * AxisSwap defines the base rotation of the + * coordinate system. First three integers set how the 3 directions + * (x, y, z) in the PIC system correspond to the ones in the beam + * system. The last 3 numbers are the relative orientations. For XSide: + * AxisSwap< 2, 1, 0, -1, 1, 1 > says: + * * x_beam = - z_PIC, + * * y_beam = y_PIC, + * * z_beam = x_PIC, + */ + + //! Probing along the PIC x basis vector. + struct XSide + { + static constexpr float_X beamStartPosition[3] = {0.0, 0.5, 0.5}; + using FirstRotation = AxisSwap<2, 1, 0, -1, 1, 1>; + }; + + + //! Probing against the PIC x basis vector. + struct XRSide + { + static constexpr float_X beamStartPosition[3] = {1.0, 0.5, 0.5}; + using FirstRotation = AxisSwap<2, 1, 0, -1, -1, -1>; + }; + + + //! Probing along the PIC y basis vector. + struct YSide + { + static constexpr float_X beamStartPosition[3] = {0.5, 0.0, 0.5}; + using FirstRotation = AxisSwap<2, 0, 1, -1, -1, 1>; + }; + + + //! Probing against the PIC y basis vector. + struct YRSide + { + static constexpr float_X beamStartPosition[3] = {0.5, 1.0, 0.5}; + using FirstRotation = AxisSwap<2, 0, 1, -1, 1, -1>; + }; + + + //! Probing along the PIC z basis vector. + struct ZSide + { + static constexpr float_X beamStartPosition[3] = {0.5, 0.5, 0.0}; + using FirstRotation = AxisSwap<1, 0, 2, -1, 1, 1>; + }; + + + //! Probing against the PIC z basis vector. + struct ZRSide + { + static constexpr float_X beamStartPosition[3] = {0.5, 0.5, 0.0}; + using FirstRotation = AxisSwap<1, 0, 2, -1, -1, -1>; + }; + + } // namespace beam + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/beam/XrayScatteringBeam.hpp b/include/picongpu/plugins/xrayScattering/beam/XrayScatteringBeam.hpp new file mode 100644 index 0000000000..57c7042e85 --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/beam/XrayScatteringBeam.hpp @@ -0,0 +1,50 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/plugins/xrayScattering/beam/CoordinateTransform.hpp" +#include "picongpu/plugins/xrayScattering/beam/ProbingBeam.hpp" +#include "picongpu/plugins/xrayScattering/beam/beamProfiles/profiles.hpp" +#include "picongpu/plugins/xrayScattering/beam/beamShapes/shapes.hpp" +#include "picongpu/param/xrayScattering.param" + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + namespace beam + { + // TODO: Move this back to the param file after fixing the coordinate + // transform. + constexpr float_X BEAM_OFFSET[2] = {0.0, 0.0}; + constexpr float_X BEAM_DELAY_SI = 0.0; + using BeamProfile = beamProfiles::ConstProfile; + using BeamShape = beamShapes::ConstShape; + + using BeamCoordinates = CoordinateTransform>; + using XrayScatteringBeam = ProbingBeam; + + } // namespace beam + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/beam/beamProfiles/ConstProfile.hpp b/include/picongpu/plugins/xrayScattering/beam/beamProfiles/ConstProfile.hpp new file mode 100644 index 0000000000..9319d4ca5c --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/beam/beamProfiles/ConstProfile.hpp @@ -0,0 +1,46 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + namespace beam + { + namespace beamProfiles + { + //! Homogeneous beam profile. + struct ConstProfile + { + static HDINLINE constexpr float_X getFactor(const float_X& positionX, const float_X& positionY) + { + return float_X(1.0); + } + }; + } // namespace beamProfiles + } // namespace beam + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/beam/beamProfiles/GaussianProfile.hpp b/include/picongpu/plugins/xrayScattering/beam/beamProfiles/GaussianProfile.hpp new file mode 100644 index 0000000000..f6bc2876ee --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/beam/beamProfiles/GaussianProfile.hpp @@ -0,0 +1,57 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + namespace beam + { + namespace beamProfiles + { + /** Gaussian beam transverse profile. + * + * @tparam T_ParamClass Param Class defining @f[ /sigma_x / /simga_y @f] . + */ + template + struct GaussianProfile : public T_ParamClass + { + using ParamClass = T_ParamClass; + + static HDINLINE float_X getFactor(float_X const& x, float_X const& y) + { + constexpr float_X s_x = ParamClass::sigmaX_SI / UNIT_LENGTH; + constexpr float_X s_y = ParamClass::sigmaY_SI / UNIT_LENGTH; + constexpr float_X tmp_x = x / s_x; + constexpr float_X tmp_y = y / s_y; + float_X exponent = -0.5 * (tmp_x * tmp_x + tmp_y * tmp_y); + return math::exp(exponent); + } + }; + } // namespace beamProfiles + } // namespace beam + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/beam/beamProfiles/profiles.hpp b/include/picongpu/plugins/xrayScattering/beam/beamProfiles/profiles.hpp new file mode 100644 index 0000000000..a11cea20c0 --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/beam/beamProfiles/profiles.hpp @@ -0,0 +1,23 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/plugins/xrayScattering/beam/beamProfiles/ConstProfile.hpp" +#include "picongpu/plugins/xrayScattering/beam/beamProfiles/GaussianProfile.hpp" diff --git a/include/picongpu/plugins/xrayScattering/beam/beamShapes/ConstShape.hpp b/include/picongpu/plugins/xrayScattering/beam/beamShapes/ConstShape.hpp new file mode 100644 index 0000000000..fa4dbfc7d9 --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/beam/beamShapes/ConstShape.hpp @@ -0,0 +1,46 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + namespace beam + { + namespace beamShapes + { + //! Beam intensity homogeneous along the propagation direction. + struct ConstShape + { + static HDINLINE constexpr float_X getFactor(const float_X& time) + { + return 1.0_X; + } + }; + } // namespace beamShapes + } // namespace beam + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/plugins/xrayScattering/beam/beamShapes/shapes.hpp b/include/picongpu/plugins/xrayScattering/beam/beamShapes/shapes.hpp new file mode 100644 index 0000000000..f93aaa73cd --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/beam/beamShapes/shapes.hpp @@ -0,0 +1,22 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/plugins/xrayScattering/beam/beamShapes/ConstShape.hpp" diff --git a/include/picongpu/plugins/xrayScattering/xrayScatteringUtilities.hpp b/include/picongpu/plugins/xrayScattering/xrayScatteringUtilities.hpp new file mode 100644 index 0000000000..6f61a27074 --- /dev/null +++ b/include/picongpu/plugins/xrayScattering/xrayScatteringUtilities.hpp @@ -0,0 +1,109 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include +#include +#include + +#include +#include + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + template + std::vector extractReal(Buffer, DIM1>& complexBuffer) + { + std::vector realValues; + auto size = complexBuffer.getCurrentSize(); + auto dataBox = complexBuffer.getDataBox(); + realValues.reserve(size); + for(uint32_t ii = 0; ii < size; ii++) + { + realValues.push_back(dataBox[ii].get_real()); + } + return realValues; + } + + template + std::vector extractImag(Buffer, DIM1>& complexBuffer) + { + std::vector imagValues; + auto size = complexBuffer.getCurrentSize(); + auto dataBox = complexBuffer.getDataBox(); + imagValues.reserve(size); + for(uint32_t ii = 0; ii < size; ii++) + { + imagValues.push_back(dataBox[ii].get_imag()); + } + return imagValues; + } + + template + std::vector extractReal(std::vector> const& complexVec) + { + std::vector realValues; + realValues.reserve(complexVec.size()); + + std::transform( + std::begin(complexVec), + std::end(complexVec), + std::back_inserter(realValues), + [](pmacc::math::Complex const& data) { return data.get_real(); }); + return realValues; + } + + template + std::vector extractImag(std::vector> const& complexVec) + { + std::vector imagValues; + imagValues.reserve(complexVec.size()); + + std::transform( + std::begin(complexVec), + std::end(complexVec), + std::back_inserter(imagValues), + [](pmacc::math::Complex const& data) { return data.get_imag(); }); + return imagValues; + } + + template + void copyVectorToBuffer(std::vector const& vec, Buffer& buffer) + { + if(buffer.getCurrentSize() == vec.size()) + { + auto dataBox = buffer.getDataBox(); + for(std::size_t ii = 0; ii < vec.size(); ii++) + { + dataBox[ii] = vec[ii]; + } + } + else + throw std::runtime_error("XrayScattering: Tried to copy a vector" + " to a Buffer of a different size"); + } + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/include/picongpu/pmacc_renamings.hpp b/include/picongpu/pmacc_renamings.hpp index 98d6aefa50..49169388eb 100644 --- a/include/picongpu/pmacc_renamings.hpp +++ b/include/picongpu/pmacc_renamings.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,4 +22,4 @@ #include -#define CONST_VECTOR(type,dim,name,...) PMACC_CONST_VECTOR(type,dim,name,__VA_ARGS__) +#define CONST_VECTOR(type, dim, name, ...) PMACC_CONST_VECTOR(type, dim, name, __VA_ARGS__) diff --git a/include/picongpu/random/seed/ISeed.hpp b/include/picongpu/random/seed/ISeed.hpp index 6a2ca4ff9c..e711751610 100644 --- a/include/picongpu/random/seed/ISeed.hpp +++ b/include/picongpu/random/seed/ISeed.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PIConGPU. * @@ -26,26 +26,25 @@ namespace picongpu { -namespace random -{ -namespace seed -{ - /** seed generator interface wrapper - * - * Generated seed is equal on all ranks and can be used together with an - * rank unique seed to initialize a random number generator. - * Depending of the generator T_SeedFunctor the seed is reproducible or - * or changed with each program execution. - */ - template< typename T_SeedFunctor = seed::Value< 42 > > - struct ISeed + namespace random { - uint32_t - operator()() const + namespace seed { - return T_SeedFunctor{}(); - } - }; -} // namespace seed -} // namespace random + /** seed generator interface wrapper + * + * Generated seed is equal on all ranks and can be used together with an + * rank unique seed to initialize a random number generator. + * Depending of the generator T_SeedFunctor the seed is reproducible or + * or changed with each program execution. + */ + template> + struct ISeed + { + uint32_t operator()() const + { + return T_SeedFunctor{}(); + } + }; + } // namespace seed + } // namespace random } // namespace picongpu diff --git a/include/picongpu/random/seed/Seed.cpp b/include/picongpu/random/seed/Seed.cpp index ce7c86171a..8c4c92deb6 100644 --- a/include/picongpu/random/seed/Seed.cpp +++ b/include/picongpu/random/seed/Seed.cpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PIConGPU. * @@ -27,42 +27,33 @@ namespace picongpu { -namespace random -{ -namespace seed -{ - - uint32_t - FromTime::operator()() const + namespace random { - auto now = std::chrono::system_clock::now(); - uint32_t now_ms = std::chrono::time_point_cast< std::chrono::milliseconds >( now ). - time_since_epoch().count(); - - // receive time from rank zero - MPI_Bcast( - &now_ms, - 1, - MPI_UINT32_T, - 0, - MPI_COMM_WORLD - ); - - return now_ms; - } - - uint32_t - FromEnvironment::operator()() const - { - char* seedStr = nullptr; - uint32_t seed = 0; - seedStr = std::getenv( "PIC_SEED" ); - if( seedStr ) - seed = std::stoi( seedStr ); - - return seed; - } - -} // namespace seed -} // namespace random + namespace seed + { + uint32_t FromTime::operator()() const + { + auto now = std::chrono::system_clock::now(); + uint32_t now_ms + = std::chrono::time_point_cast(now).time_since_epoch().count(); + + // receive time from rank zero + MPI_Bcast(&now_ms, 1, MPI_UINT32_T, 0, MPI_COMM_WORLD); + + return now_ms; + } + + uint32_t FromEnvironment::operator()() const + { + char* seedStr = nullptr; + uint32_t seed = 0; + seedStr = std::getenv("PIC_SEED"); + if(seedStr) + seed = std::stoi(seedStr); + + return seed; + } + + } // namespace seed + } // namespace random } // namespace picongpu diff --git a/include/picongpu/random/seed/Seed.hpp b/include/picongpu/random/seed/Seed.hpp index ff3a91b997..58f1948f61 100644 --- a/include/picongpu/random/seed/Seed.hpp +++ b/include/picongpu/random/seed/Seed.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PIConGPU. * @@ -24,47 +24,43 @@ namespace picongpu { -namespace random -{ -namespace seed -{ - - /** constant seed - * - * The seed is equal on each program program start. - */ - template< uint32_t T_constSeedValue > - struct Value + namespace random { - uint32_t - operator()() const + namespace seed { - return T_constSeedValue; - } - }; + /** constant seed + * + * The seed is equal on each program program start. + */ + template + struct Value + { + uint32_t operator()() const + { + return T_constSeedValue; + } + }; - /** time dependant seed - * - * The seed is derived from the current system time. - * The seed is different with each program start. - */ - struct FromTime - { - uint32_t - operator()() const; - }; + /** time dependant seed + * + * The seed is derived from the current system time. + * The seed is different with each program start. + */ + struct FromTime + { + uint32_t operator()() const; + }; - /** read the seed from the environment - * - * Read the seed from the environment variable `PIC_SEED`. - * If `PIC_SEED` is not defined zero will be returned. - */ - struct FromEnvironment - { - uint32_t - operator()() const; - }; + /** read the seed from the environment + * + * Read the seed from the environment variable `PIC_SEED`. + * If `PIC_SEED` is not defined zero will be returned. + */ + struct FromEnvironment + { + uint32_t operator()() const; + }; -} // namespace seed -} // namespace random + } // namespace seed + } // namespace random } // namespace picongpu diff --git a/include/picongpu/simulation/control/DomainAdjuster.hpp b/include/picongpu/simulation/control/DomainAdjuster.hpp index 7b9e33e13b..a621201d85 100644 --- a/include/picongpu/simulation/control/DomainAdjuster.hpp +++ b/include/picongpu/simulation/control/DomainAdjuster.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PIConGPU. * @@ -45,7 +45,6 @@ namespace picongpu */ class DomainAdjuster { - public: /** constructor * @@ -56,16 +55,15 @@ namespace picongpu * @param movingWindowEnabled if moving window is enabled */ DomainAdjuster( - DataSpace< simDim > const & numDevices, - DataSpace< simDim > const & mpiPosition, - DataSpace< simDim > const & isPeriodic, - bool const movingWindowEnabled - ) : - m_numDevices( numDevices), - m_mpiPosition( mpiPosition ), - m_isPeriodic( isPeriodic ), - m_movingWindowEnabled( movingWindowEnabled ), - m_isMaster( mpiPosition == DataSpace< simDim >::create( 0 ) ) + DataSpace const& numDevices, + DataSpace const& mpiPosition, + DataSpace const& isPeriodic, + bool const movingWindowEnabled) + : m_numDevices(numDevices) + , m_mpiPosition(mpiPosition) + , m_isPeriodic(isPeriodic) + , m_movingWindowEnabled(movingWindowEnabled) + , m_isMaster(mpiPosition == DataSpace::create(0)) { } @@ -78,29 +76,26 @@ namespace picongpu * @param[out] localDomainOffset local offset [in cells] relative to the origin of the global domain */ void operator()( - DataSpace< simDim > & globalDomainSize, - DataSpace< simDim > & localDomainSize, - DataSpace< simDim > & localDomainOffset - ) + DataSpace& globalDomainSize, + DataSpace& localDomainSize, + DataSpace& localDomainOffset) { m_globalDomainSize = globalDomainSize; m_localDomainSize = localDomainSize; - for( uint32_t d = 0; d < simDim; ++d ) + for(uint32_t d = 0; d < simDim; ++d) { - multipleOfSuperCell( d ); - minThreeSuperCells( d ); - greaterEqualThanAbsorber( d ); - deriveGlobalDomainSize( d ); - updateLocalDomainOffset( d ); + multipleOfSuperCell(d); + minThreeSuperCells(d); + greaterEqualThanAbsorber(d); + deriveGlobalDomainSize(d); + updateLocalDomainOffset(d); } - if( globalDomainSize != m_globalDomainSize || localDomainSize != m_localDomainSize ) + if(globalDomainSize != m_globalDomainSize || localDomainSize != m_localDomainSize) { - std::cout << " new grid size (global|local|offset): " << - m_globalDomainSize.toString() << "|" << - m_localDomainSize.toString() << "|" << - m_localDomainOffset.toString() << std::endl; + std::cout << " new grid size (global|local|offset): " << m_globalDomainSize.toString() << "|" + << m_localDomainSize.toString() << "|" << m_localDomainOffset.toString() << std::endl; } // write results back @@ -120,7 +115,6 @@ namespace picongpu } private: - /** update local domain offset * * Share the local domain size with all MPI ranks and calculate the offset of the @@ -128,76 +122,70 @@ namespace picongpu * * @param dim dimension to update */ - void updateLocalDomainOffset( size_t const dim ) + void updateLocalDomainOffset(size_t const dim) { - pmacc::GridController< simDim > & gc = pmacc::Environment< simDim >::get( ).GridController( ); + pmacc::GridController& gc = pmacc::Environment::get().GridController(); - int mpiPos( gc.getPosition( )[ dim ] ); + int mpiPos(gc.getPosition()[dim]); int numMpiRanks = gc.getGlobalSize(); // gather mpi position in the direction we are checking - std::vector< int > mpiPositions( numMpiRanks ); - MPI_CHECK( MPI_Allgather( + std::vector mpiPositions(numMpiRanks); + MPI_CHECK(MPI_Allgather( &mpiPos, 1, MPI_INT, mpiPositions.data(), 1, MPI_INT, - gc.getCommunicator().getMPIComm() - )); + gc.getCommunicator().getMPIComm())); // gather local sizes in the direction we are checking - std::vector< uint64_t > allLocalSizes( numMpiRanks ); - uint64_t lSize = static_cast< uint64_t >( m_localDomainSize[ dim ] ); - MPI_CHECK( MPI_Allgather( + std::vector allLocalSizes(numMpiRanks); + uint64_t lSize = static_cast(m_localDomainSize[dim]); + MPI_CHECK(MPI_Allgather( &lSize, 1, MPI_UINT64_T, allLocalSizes.data(), 1, MPI_UINT64_T, - gc.getCommunicator().getMPIComm() - )); + gc.getCommunicator().getMPIComm())); uint64_t offset = 0u; - for( size_t i = 0u; i < mpiPositions.size(); ++i ) + for(size_t i = 0u; i < mpiPositions.size(); ++i) { - if( mpiPositions[ i ] < mpiPos ) - offset += allLocalSizes[ i ]; + if(mpiPositions[i] < mpiPos) + offset += allLocalSizes[i]; } /* since we are not doing independent reduces per slice we need * to adjust the offset result by dividing with the number of * MPI ranks in all other dimensions. */ - offset /= static_cast< uint64_t >( m_numDevices.productOfComponents() / m_numDevices[ dim ] ); - m_localDomainOffset[ dim ] = static_cast< int >( offset ); - + offset /= static_cast(m_numDevices.productOfComponents() / m_numDevices[dim]); + m_localDomainOffset[dim] = static_cast(offset); } /** ensure that the local size is a multiple of the supercell size * * @param dim dimension to update */ - void multipleOfSuperCell( size_t const dim ) + void multipleOfSuperCell(size_t const dim) { - int const sCellSize = SuperCellSize::toRT()[ dim ]; + int const sCellSize = SuperCellSize::toRT()[dim]; // round up to full supercells - int const validLocalSize = - ( ( m_localDomainSize[ dim ] + sCellSize - 1 ) / sCellSize ) * - sCellSize; + int const validLocalSize = ((m_localDomainSize[dim] + sCellSize - 1) / sCellSize) * sCellSize; - if( validLocalSize != m_localDomainSize[ dim ] ) + if(validLocalSize != m_localDomainSize[dim]) { showMessage( dim, "Local grid size is not a multiple of supercell size.", - m_localDomainSize[ dim ], - validLocalSize - ); + m_localDomainSize[dim], + validLocalSize); - m_localDomainSize[ dim ] = validLocalSize; + m_localDomainSize[dim] = validLocalSize; } } @@ -207,21 +195,20 @@ namespace picongpu * * @param dim dimension to update */ - void minThreeSuperCells( size_t const dim ) + void minThreeSuperCells(size_t const dim) { - int numSuperCells = m_localDomainSize[ dim ] / SuperCellSize::toRT()[ dim ]; + int numSuperCells = m_localDomainSize[dim] / SuperCellSize::toRT()[dim]; - if( numSuperCells < 3 ) + if(numSuperCells < 3) { - int newLocalDomainSize = 3 * SuperCellSize::toRT()[ dim ]; + int newLocalDomainSize = 3 * SuperCellSize::toRT()[dim]; showMessage( dim, "Local grid size is not containing at least 3 supercells.", - m_localDomainSize[ dim ], - newLocalDomainSize - ); + m_localDomainSize[dim], + newLocalDomainSize); - m_localDomainSize[ dim ] = newLocalDomainSize; + m_localDomainSize[dim] = newLocalDomainSize; } } @@ -232,49 +219,42 @@ namespace picongpu * * @param dim dimension to update */ - void greaterEqualThanAbsorber( size_t const dim ) + void greaterEqualThanAbsorber(size_t const dim) { - int validLocalSize = m_localDomainSize[ dim ]; + int validLocalSize = m_localDomainSize[dim]; - bool const isAbsorberEnabled = !m_isPeriodic[ dim ]; - bool const isBoundaryDevice = ( m_mpiPosition[ dim ] == 0 || m_mpiPosition[ dim ] == m_numDevices[ dim ] - 1 ); - if( isAbsorberEnabled && isBoundaryDevice ) + bool const isAbsorberEnabled = !m_isPeriodic[dim]; + bool const isBoundaryDevice = (m_mpiPosition[dim] == 0 || m_mpiPosition[dim] == m_numDevices[dim] - 1); + if(isAbsorberEnabled && isBoundaryDevice) { - size_t boundary = m_mpiPosition[ dim ] == 0u ? 0u : 1u; - int maxAbsorberCells = fields::absorber::numCells[ dim ][ boundary ]; + size_t boundary = m_mpiPosition[dim] == 0u ? 0u : 1u; + int maxAbsorberCells = fields::absorber::numCells[dim][boundary]; - if( m_movingWindowEnabled && dim == 1u ) + if(m_movingWindowEnabled && dim == 1u) { /* since the device changes their position during the simulation * the negative and positive absorber cells must fit into the domain */ - maxAbsorberCells = static_cast< int >( - std::max( - fields::absorber::numCells[ dim ][ 0 ], - fields::absorber::numCells[ dim ][ 1 ] - ) - ); + maxAbsorberCells = static_cast( + std::max(fields::absorber::numCells[dim][0], fields::absorber::numCells[dim][1])); } - if( m_localDomainSize[ dim ] < maxAbsorberCells ) + if(m_localDomainSize[dim] < maxAbsorberCells) { - int const sCellSize = SuperCellSize::toRT()[ dim ]; + int const sCellSize = SuperCellSize::toRT()[dim]; // round up to full supercells - validLocalSize = - ( ( maxAbsorberCells + sCellSize - 1 ) / sCellSize ) * - sCellSize; + validLocalSize = ((maxAbsorberCells + sCellSize - 1) / sCellSize) * sCellSize; } - if( validLocalSize != m_localDomainSize[ dim ] ) + if(validLocalSize != m_localDomainSize[dim]) { showMessage( dim, "Local grid size must be greater or equal than the largest absorber.", - m_localDomainSize[ dim ], - validLocalSize - ); + m_localDomainSize[dim], + validLocalSize); - m_localDomainSize[ dim ] = validLocalSize; + m_localDomainSize[dim] = validLocalSize; } } } @@ -287,43 +267,39 @@ namespace picongpu * * @param dim dimension to update */ - void deriveLocalDomainSize( size_t const dim ) + void deriveLocalDomainSize(size_t const dim) { - if( m_movingWindowEnabled && dim == 1u ) + if(m_movingWindowEnabled && dim == 1u) { - pmacc::mpi::MPIReduce mpiReduce; int globalMax; mpiReduce( pmacc::nvidia::functors::Max(), &globalMax, - &m_localDomainSize[ dim ], + &m_localDomainSize[dim], 1, - pmacc::mpi::reduceMethods::AllReduce() - ); + pmacc::mpi::reduceMethods::AllReduce()); int globalMin; mpiReduce( pmacc::nvidia::functors::Min(), &globalMin, - &m_localDomainSize[ dim ], + &m_localDomainSize[dim], 1, - pmacc::mpi::reduceMethods::AllReduce() - ); + pmacc::mpi::reduceMethods::AllReduce()); // local size must be equal for all devices in y direction - if( m_isMaster && globalMax != globalMin ) + if(m_isMaster && globalMax != globalMin) { showMessage( dim, "Local grid size must be equal for all devices because moving window is enabled.", - m_localDomainSize[ dim ], - globalMax - ); + m_localDomainSize[dim], + globalMax); } - m_localDomainSize[ dim ] = globalMax; + m_localDomainSize[dim] = globalMax; } } @@ -333,49 +309,40 @@ namespace picongpu * * @param dim dimension to update */ - void deriveGlobalDomainSize( size_t const dim ) + void deriveGlobalDomainSize(size_t const dim) { uint64_t validGlobalGridSize = 0u; - deriveLocalDomainSize( dim ); + deriveLocalDomainSize(dim); - if( m_movingWindowEnabled && dim == 1u ) + if(m_movingWindowEnabled && dim == 1u) { // the local sizes in slide direction must be equal sized - validGlobalGridSize = static_cast< uint64_t >( m_localDomainSize[ dim ] * m_numDevices[ dim ] ); + validGlobalGridSize = static_cast(m_localDomainSize[dim] * m_numDevices[dim]); } else { - uint64_t localDomainSize = static_cast< uint64_t >( m_localDomainSize[ dim ] ); + uint64_t localDomainSize = static_cast(m_localDomainSize[dim]); pmacc::mpi::MPIReduce mpiReduce; mpiReduce( pmacc::nvidia::functors::Add(), &validGlobalGridSize, &localDomainSize, 1, - pmacc::mpi::reduceMethods::AllReduce() - ); + pmacc::mpi::reduceMethods::AllReduce()); /* since we are not doing independent reduces per slice we need * to adjust the reduce result by dividing the sizes of all other dimensions * we are not check within the method call */ - validGlobalGridSize /= static_cast< uint64_t >( - m_numDevices.productOfComponents() / m_numDevices[ dim ] - ); - + validGlobalGridSize /= static_cast(m_numDevices.productOfComponents() / m_numDevices[dim]); } - if( m_isMaster && validGlobalGridSize != static_cast< uint64_t >( m_globalDomainSize[ dim ] ) ) + if(m_isMaster && validGlobalGridSize != static_cast(m_globalDomainSize[dim])) { - showMessage( - dim, - "Invalid global grid size.", - m_globalDomainSize[ dim ], - validGlobalGridSize - ); + showMessage(dim, "Invalid global grid size.", m_globalDomainSize[dim], validGlobalGridSize); } - m_globalDomainSize[ dim ] = static_cast< int >( validGlobalGridSize ); + m_globalDomainSize[dim] = static_cast(validGlobalGridSize); } /** print a message to the user @@ -387,38 +354,30 @@ namespace picongpu * @param currentSize current domain size in the given direction * @param updatedSize updated/corrected domain size for the given dimension */ - void showMessage( - size_t const dim, - std::string const & msg, - int const currentSize, - int const updatedSize - ) const + void showMessage(size_t const dim, std::string const& msg, int const currentSize, int const updatedSize) const { /**! lookup table to translate a dimension index into a name * * \warning `= { { ... } }` is not required by the c++11 standard but * is necessary for g++ 4.9 */ - std::array< char, 3 > const dimNames = { { 'x', 'y', 'z' } }; + std::array const dimNames = {{'x', 'y', 'z'}}; - if( m_validateOnly ) + if(m_validateOnly) throw std::runtime_error( - std::string( "Dimension " ) + dimNames[ dim ] + ": " + - msg + " Suggestion: set " + std::to_string( currentSize ) + - " to " + std::to_string( updatedSize ) - ); + std::string("Dimension ") + dimNames[dim] + ": " + msg + " Suggestion: set " + + std::to_string(currentSize) + " to " + std::to_string(updatedSize)); else - std::cout << "Dimension " << dimNames[ dim ] << ": " << - msg << " Auto adjust from " << - currentSize << " to " << updatedSize << std::endl; + std::cout << "Dimension " << dimNames[dim] << ": " << msg << " Auto adjust from " << currentSize + << " to " << updatedSize << std::endl; } - DataSpace< simDim > m_globalDomainSize; - DataSpace< simDim > m_localDomainSize; - DataSpace< simDim > m_localDomainOffset; - DataSpace< simDim > const m_numDevices; - DataSpace< simDim > const m_mpiPosition; - DataSpace< simDim > const m_isPeriodic; + DataSpace m_globalDomainSize; + DataSpace m_localDomainSize; + DataSpace m_localDomainOffset; + DataSpace const m_numDevices; + DataSpace const m_mpiPosition; + DataSpace const m_isPeriodic; bool const m_movingWindowEnabled; bool const m_isMaster; diff --git a/include/picongpu/simulation/control/ISimulationStarter.hpp b/include/picongpu/simulation/control/ISimulationStarter.hpp index 4584f20bea..23b7021f2f 100644 --- a/include/picongpu/simulation/control/ISimulationStarter.hpp +++ b/include/picongpu/simulation/control/ISimulationStarter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -32,7 +32,6 @@ namespace picongpu class ISimulationStarter : public IPlugin { public: - virtual ~ISimulationStarter() { } @@ -43,7 +42,7 @@ namespace picongpu * * @return true if no error else false */ - virtual ArgsParser::Status parseConfigs(int argc, char **argv) = 0; + virtual ArgsParser::Status parseConfigs(int argc, char** argv) = 0; /*start simulation * is called after parsConfig and pluginLoad @@ -60,4 +59,4 @@ namespace picongpu // nothing to do here } }; -} +} // namespace picongpu diff --git a/include/picongpu/simulation/control/MovingWindow.hpp b/include/picongpu/simulation/control/MovingWindow.hpp index 0f73cef019..71fd0b4aee 100644 --- a/include/picongpu/simulation/control/MovingWindow.hpp +++ b/include/picongpu/simulation/control/MovingWindow.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, Alexander Debus +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, Alexander Debus * * This file is part of PIConGPU. * @@ -26,424 +26,414 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -/** - * Singleton class managing the moving window, slides. - * Can be used to create window views on the grid. - */ -class MovingWindow -{ -private: - - MovingWindow() = default; - - MovingWindow(MovingWindow& cc); - - void getCurrentSlideInfo(uint32_t currentStep, bool *doSlide, float_64 *offsetFirstGPU) + /** + * Singleton class managing the moving window, slides. + * Can be used to create window views on the grid. + */ + class MovingWindow { - if (doSlide) - *doSlide = false; + private: + MovingWindow() = default; - if (offsetFirstGPU) - *offsetFirstGPU = 0.0; + MovingWindow(MovingWindow& cc); - if (slidingWindowEnabled) + void getCurrentSlideInfo(uint32_t currentStep, bool* doSlide, float_64* offsetFirstGPU) { - /* Sliding stayed enabled but if we reach the end step where we should stop sliding - * the moving window is freezed. - * All offsets will stay constant until the end of the simulation. - */ - if (currentStep >= endSlidingOnStep) - currentStep = endSlidingOnStep; + if(doSlide) + *doSlide = false; - const SubGrid& subGrid = Environment::get().SubGrid(); + if(offsetFirstGPU) + *offsetFirstGPU = 0.0; - /* speed of the moving window */ - const float_64 windowMovingSpeed = float_64(SPEED_OF_LIGHT); + if(slidingWindowEnabled) + { + /* Sliding stayed enabled but if we reach the end step where we should stop sliding + * the moving window is freezed. + * All offsets will stay constant until the end of the simulation. + */ + if(currentStep >= endSlidingOnStep) + currentStep = endSlidingOnStep; - /* defines in which direction the window moves - * - * 0 == x, 1 == y , 2 == z direction - * - * note: currently only y direction is supported - */ - const uint32_t moveDirection = 1; + const SubGrid& subGrid = Environment::get().SubGrid(); - /* the moving window is smaller than the global domain by exactly one - * GPU (local domain size) - * \todo calculation of the globalWindowSizeInMoveDirection is constant should be - * only done once in it's own central object/api - */ - const uint32_t globalWindowSizeInMoveDirection = - subGrid.getGlobalDomain().size[moveDirection] - subGrid.getLocalDomain().size[moveDirection]; + /* speed of the moving window */ + const float_64 windowMovingSpeed = float_64(SPEED_OF_LIGHT); - const uint32_t gpuNumberOfCellsInMoveDirection = subGrid.getLocalDomain().size[moveDirection]; + /* defines in which direction the window moves + * + * 0 == x, 1 == y , 2 == z direction + * + * note: currently only y direction is supported + */ + const uint32_t moveDirection = 1; - /* unit PIConGPU length */ - const float_64 cellSizeInMoveDirection = float_64(cellSize[moveDirection]); + /* the moving window is smaller than the global domain by exactly one + * GPU (local domain size) + * \todo calculation of the globalWindowSizeInMoveDirection is constant should be + * only done once in it's own central object/api + */ + const uint32_t globalWindowSizeInMoveDirection + = subGrid.getGlobalDomain().size[moveDirection] - subGrid.getLocalDomain().size[moveDirection]; - const float_64 deltaWayPerStep = (windowMovingSpeed * float_64(DELTA_T)); + const uint32_t gpuNumberOfCellsInMoveDirection = subGrid.getLocalDomain().size[moveDirection]; - /* How many cells the virtual particle with speed of light is pushed forward - * at the begin of the simulation. - * The number of cells is round up thus we avoid window moves and slides - * depends on half cells. - */ - const uint32_t virtualParticleInitialStartCell = math::ceil( - float_64(globalWindowSizeInMoveDirection) * (float_64(1.0) - movePoint) - ); + /* unit PIConGPU length */ + const float_64 cellSizeInMoveDirection = float_64(cellSize[moveDirection]); - /* Is the time step when the virtual particle **passed** the GPU next to the last - * in the current to the next step - */ - const uint32_t firstSlideStep = math::ceil( - float_64(subGrid.getGlobalDomain().size[moveDirection] - virtualParticleInitialStartCell) * - cellSizeInMoveDirection / deltaWayPerStep - ) - 1; - - /* way which the virtual particle must move before the window begins - * to move the first time [in pic length] */ - const float_64 wayToFirstMove = - float_64(globalWindowSizeInMoveDirection - virtualParticleInitialStartCell) * - cellSizeInMoveDirection; - /* Is the time step when the virtual particle **passed** the moving window - * in the current to the next step - * Signed type of firstMoveStep to allow for edge case movePoint = 0.0 - * for a moving window right from the start of the simulation. - */ - const int32_t firstMoveStep = math::ceil( - wayToFirstMove / deltaWayPerStep - ) - 1; + const float_64 deltaWayPerStep = (windowMovingSpeed * float_64(DELTA_T)); - if (firstMoveStep <= int32_t(currentStep) ) - { - /* calculate the current position of the virtual particle */ - const float_64 virtualParticleWayPassed = - deltaWayPerStep * float_64(currentStep); - const uint32_t virtualParticleWayPassedInCells = uint32_t( - math::floor(virtualParticleWayPassed / cellSizeInMoveDirection) - ); - const uint32_t virtualParticlePositionInCells = - virtualParticleWayPassedInCells + virtualParticleInitialStartCell; - - /* calculate the position of the virtual particle after the current step is calculated */ - const float_64 nextVirtualParticleWayPassed = - deltaWayPerStep * float_64(currentStep + 1); - const uint32_t nextVirtualParticleWayPassedInCells = - uint32_t(math::floor(nextVirtualParticleWayPassed / cellSizeInMoveDirection)); - /* This position is used to detect the point in time where the virtual particle - * moves over a GPU border. + /* How many cells the virtual particle with speed of light is pushed forward + * at the begin of the simulation. + * The number of cells is round up thus we avoid window moves and slides + * depends on half cells. */ - const uint32_t nextVirtualParticlePositionInCells = - nextVirtualParticleWayPassedInCells + virtualParticleInitialStartCell; + const uint32_t virtualParticleInitialStartCell + = math::ceil(float_64(globalWindowSizeInMoveDirection) * (float_64(1.0) - movePoint)); - /* within the to be simulated time step (currentStep -> currentStep+1) - * the virtual particle will have reached at least the position - * of the cell behind the end of the initial global domain - * (also true for all later time steps) + /* Is the time step when the virtual particle **passed** the GPU next to the last + * in the current to the next step */ - const bool endOfInitialGlobalDomain = firstSlideStep <= currentStep; - - /* virtual particle will pass a GPU border during the current - * (to be simulated) time step + const uint32_t firstSlideStep + = math::ceil( + float_64(subGrid.getGlobalDomain().size[moveDirection] - virtualParticleInitialStartCell) + * cellSizeInMoveDirection / deltaWayPerStep) + - 1; + + /* way which the virtual particle must move before the window begins + * to move the first time [in pic length] */ + const float_64 wayToFirstMove + = float_64(globalWindowSizeInMoveDirection - virtualParticleInitialStartCell) + * cellSizeInMoveDirection; + /* Is the time step when the virtual particle **passed** the moving window + * in the current to the next step + * Signed type of firstMoveStep to allow for edge case movePoint = 0.0 + * for a moving window right from the start of the simulation. */ - const bool virtualParticlePassesGPUBorder = - (nextVirtualParticlePositionInCells % gpuNumberOfCellsInMoveDirection) < - (virtualParticlePositionInCells % gpuNumberOfCellsInMoveDirection); + const int32_t firstMoveStep = math::ceil(wayToFirstMove / deltaWayPerStep) - 1; - if (endOfInitialGlobalDomain && virtualParticlePassesGPUBorder) + if(firstMoveStep <= int32_t(currentStep)) { - incrementSlideCounter(currentStep); - if (doSlide) - *doSlide = true; - } + /* calculate the current position of the virtual particle */ + const float_64 virtualParticleWayPassed = deltaWayPerStep * float_64(currentStep); + const uint32_t virtualParticleWayPassedInCells + = uint32_t(math::floor(virtualParticleWayPassed / cellSizeInMoveDirection)); + const uint32_t virtualParticlePositionInCells + = virtualParticleWayPassedInCells + virtualParticleInitialStartCell; + + /* calculate the position of the virtual particle after the current step is calculated */ + const float_64 nextVirtualParticleWayPassed = deltaWayPerStep * float_64(currentStep + 1); + const uint32_t nextVirtualParticleWayPassedInCells + = uint32_t(math::floor(nextVirtualParticleWayPassed / cellSizeInMoveDirection)); + /* This position is used to detect the point in time where the virtual particle + * moves over a GPU border. + */ + const uint32_t nextVirtualParticlePositionInCells + = nextVirtualParticleWayPassedInCells + virtualParticleInitialStartCell; - /* valid range for the offset is [0;GPU number of cells in move direction) */ - if (offsetFirstGPU) - { - /* since the moving window in PIConGPU always starts on the - * first plane (3D) / row (2D) of GPUs in move direction, this - * calculation is equal to the globalWindow.offset in move direction - * - * note: also works with windowMovingSpeed > c + /* within the to be simulated time step (currentStep -> currentStep+1) + * the virtual particle will have reached at least the position + * of the cell behind the end of the initial global domain + * (also true for all later time steps) + */ + const bool endOfInitialGlobalDomain = firstSlideStep <= currentStep; + + /* virtual particle will pass a GPU border during the current + * (to be simulated) time step */ - *offsetFirstGPU = nextVirtualParticlePositionInCells % gpuNumberOfCellsInMoveDirection; + const bool virtualParticlePassesGPUBorder + = (nextVirtualParticlePositionInCells % gpuNumberOfCellsInMoveDirection) + < (virtualParticlePositionInCells % gpuNumberOfCellsInMoveDirection); + + if(endOfInitialGlobalDomain && virtualParticlePassesGPUBorder) + { + incrementSlideCounter(currentStep); + if(doSlide) + *doSlide = true; + } + + /* valid range for the offset is [0;GPU number of cells in move direction) */ + if(offsetFirstGPU) + { + /* since the moving window in PIConGPU always starts on the + * first plane (3D) / row (2D) of GPUs in move direction, this + * calculation is equal to the globalWindow.offset in move direction + * + * note: also works with windowMovingSpeed > c + */ + *offsetFirstGPU = nextVirtualParticlePositionInCells % gpuNumberOfCellsInMoveDirection; + } } } } - } - - /** increment slide counter - * - * It is allowed to call this function more than once per time step - * The function takes care that the counter is only incremented once - * per simulation step - * - * @param current simulation step - */ - void incrementSlideCounter(const uint32_t currentStep) - { - // do not slide twice in one simulation step - if (isSlidingWindowActive( currentStep ) && lastSlideStep < currentStep) + /** increment slide counter + * + * It is allowed to call this function more than once per time step + * The function takes care that the counter is only incremented once + * per simulation step + * + * @param current simulation step + */ + void incrementSlideCounter(const uint32_t currentStep) { - slideCounter++; - lastSlideStep = currentStep; + // do not slide twice in one simulation step + if(isSlidingWindowActive(currentStep) && lastSlideStep < currentStep) + { + slideCounter++; + lastSlideStep = currentStep; + } } - } - /** true is sliding window is activated - * - * How long the window is sliding is defined with endSlidingOnStep. - */ - bool slidingWindowEnabled = false; - - /** Defines when to start sliding the window - * - * A virtual photon starts at t=0 at the lower end (min y) of the global - * simulation box in the positive y direction. The window sliding starts at - * the moment of time when the particle covers the movePoint ratio of the - * global moving window size in the y direction. - * - * Note that with the moving window enabled, there is an additional "hidden" - * row of local domains (and devices simulating them) at the y-front. - * Therefore, the global moving window size in the y direction is the global - * domain size minus a local domain size (which is required to be the same - * for all domains). - * - * So, in short, the window starts sliding in time required to pass the - * distance of movePoint * (global window size in y) when moving with - * the speed of light. - * - * Setting movePoint to 0.0 makes the window start sliding at the start - * of a simulation, and setting it to 1.0 makes it start sliding when the - * virtual photon reaches the start of the "hidden" row of local domains. - * It is permitted to use values outside of the [0.0, 1.0] interval to - * achieve the effects of "pre-movement" and "delayed movement", however - * this might complicate the setup and so not recommended unless essential. - */ - float_64 movePoint; - - /** current number of slides since start of simulation */ - uint32_t slideCounter = 0u; - - /** - * last simulation step with slide - * used to prevent multiple slides per simulation step - */ - uint32_t lastSlideStep = 0u; + /** true is sliding window is activated + * + * How long the window is sliding is defined with endSlidingOnStep. + */ + bool slidingWindowEnabled = false; - //! time step where the sliding window is stopped - uint32_t endSlidingOnStep = 0u; + /** Defines when to start sliding the window + * + * A virtual photon starts at t=0 at the lower end (min y) of the global + * simulation box in the positive y direction. The window sliding starts at + * the moment of time when the particle covers the movePoint ratio of the + * global moving window size in the y direction. + * + * Note that with the moving window enabled, there is an additional "hidden" + * row of local domains (and devices simulating them) at the y-front. + * Therefore, the global moving window size in the y direction is the global + * domain size minus a local domain size (which is required to be the same + * for all domains). + * + * So, in short, the window starts sliding in time required to pass the + * distance of movePoint * (global window size in y) when moving with + * the speed of light. + * + * Setting movePoint to 0.0 makes the window start sliding at the start + * of a simulation, and setting it to 1.0 makes it start sliding when the + * virtual photon reaches the start of the "hidden" row of local domains. + * It is permitted to use values outside of the [0.0, 1.0] interval to + * achieve the effects of "pre-movement" and "delayed movement", however + * this might complicate the setup and so not recommended unless essential. + */ + float_64 movePoint; -public: + /** current number of slides since start of simulation */ + uint32_t slideCounter = 0u; - /** Set window move point which defines when to start sliding the window - * - * See declaration of movePoint for a detailed explanation. - * - * @param point ratio of the global window size - */ - void setMovePoint(float_64 const point) - { - movePoint = point; - } + /** + * last simulation step with slide + * used to prevent multiple slides per simulation step + */ + uint32_t lastSlideStep = 0u; - /** - * Set step where the simulation stops the moving window - * - * @param step 0 means no sliding window, else sliding is enabled until step is reached. - */ - void setEndSlideOnStep(int32_t step) - { - // maybe we have a underflow in the cast, this is fine because it results in a very large number - const uint32_t maxSlideStep = static_cast(step); - if ( maxSlideStep < lastSlideStep) - throw std::runtime_error("It is not allowed to stop the moving window in the past."); + //! time step where the sliding window is stopped + uint32_t endSlidingOnStep = 0u; - endSlidingOnStep = maxSlideStep; + public: + /** Set window move point which defines when to start sliding the window + * + * See declaration of movePoint for a detailed explanation. + * + * @param point ratio of the global window size + */ + void setMovePoint(float_64 const point) + { + movePoint = point; + } - static bool firstCall = true; - /* Disable or enable sliding window only in the first call. - * Later changes of step will not influence if the sliding window is activated. + /** + * Set step where the simulation stops the moving window + * + * @param step 0 means no sliding window, else sliding is enabled until step is reached. */ - if (firstCall && endSlidingOnStep != 0u) - slidingWindowEnabled = true; + void setEndSlideOnStep(int32_t step) + { + // maybe we have a underflow in the cast, this is fine because it results in a very large number + const uint32_t maxSlideStep = static_cast(step); + if(maxSlideStep < lastSlideStep) + throw std::runtime_error("It is not allowed to stop the moving window in the past."); - firstCall = false; - } + endSlidingOnStep = maxSlideStep; - /** - * Set the number of already performed moving window slides - * - * @param slides number of slides - * @param currentStep current simulation timestep - */ - void setSlideCounter(uint32_t slides,uint32_t currentStep) - { - slideCounter = slides; - /* ensure that we will not change the slide counter with `incrementSlideCounter()` - * in the same time step again - */ - lastSlideStep = currentStep; - } + static bool firstCall = true; + /* Disable or enable sliding window only in the first call. + * Later changes of step will not influence if the sliding window is activated. + */ + if(firstCall && endSlidingOnStep != 0u) + slidingWindowEnabled = true; - /** - * Return the number of slides since start of simulation. - * If slide occurs in \p currentStep, it is included in the result. - * - * @param currentStep current simulation step - * @return number of slides - */ - uint32_t getSlideCounter(uint32_t currentStep) - { - getCurrentSlideInfo(currentStep, nullptr, nullptr); - return slideCounter; - } + firstCall = false; + } - /** - * Returns if sliding window is enabled - * - * @return true if enabled, false otherwise - */ - bool isEnabled() const - { - return slidingWindowEnabled; - } + /** + * Set the number of already performed moving window slides + * + * @param slides number of slides + * @param currentStep current simulation timestep + */ + void setSlideCounter(uint32_t slides, uint32_t currentStep) + { + slideCounter = slides; + /* ensure that we will not change the slide counter with `incrementSlideCounter()` + * in the same time step again + */ + lastSlideStep = currentStep; + } - /** - * Returns if the window can move in the current step - * - * @return false, if Moving window is activated (isEnabled() == true) but already stopped. - * true if moving windows is enabled and simulation step is smaller than - */ - bool isSlidingWindowActive(const uint32_t currenStep) const - { - return isEnabled() && currenStep < endSlidingOnStep; - } + /** + * Return the number of slides since start of simulation. + * If slide occurs in \p currentStep, it is included in the result. + * + * @param currentStep current simulation step + * @return number of slides + */ + uint32_t getSlideCounter(uint32_t currentStep) + { + getCurrentSlideInfo(currentStep, nullptr, nullptr); + return slideCounter; + } - /** - * Return if a slide occurs in the current simulation step. - * - * @param currentStep current simulation step - * @return true if slide in current step, false otherwise - */ - bool slideInCurrentStep(uint32_t currentStep) - { - bool doSlide = false; + /** + * Returns if sliding window is enabled + * + * @return true if enabled, false otherwise + */ + bool isEnabled() const + { + return slidingWindowEnabled; + } - if (slidingWindowEnabled) + /** + * Returns if the window can move in the current step + * + * @return false, if Moving window is activated (isEnabled() == true) but already stopped. + * true if moving windows is enabled and simulation step is smaller than + */ + bool isSlidingWindowActive(const uint32_t currenStep) const { - getCurrentSlideInfo(currentStep, &doSlide, nullptr); + return isEnabled() && currenStep < endSlidingOnStep; } - return doSlide; - } + /** + * Return if a slide occurs in the current simulation step. + * + * @param currentStep current simulation step + * @return true if slide in current step, false otherwise + */ + bool slideInCurrentStep(uint32_t currentStep) + { + bool doSlide = false; - /** - * Return true if this is a 'bottom' GPU (y position is y_size - 1), false otherwise - * only set if sliding window is active - */ - bool isBottomGPU(void) const - { - Mask comm_mask = Environment::get().GridController().getCommunicationMask(); - return !comm_mask.isSet(BOTTOM); - } + if(slidingWindowEnabled) + { + getCurrentSlideInfo(currentStep, &doSlide, nullptr); + } - /** - * Returns an instance of MovingWindow - * - * @return an instance - */ - static MovingWindow& getInstance() - { - static MovingWindow instance; - return instance; - } + return doSlide; + } - /** - * Return a window which describes the global and local moving window - * - * @param currentStep current simulation step - * @return moving window - */ - Window getWindow(uint32_t currentStep) - { - const SubGrid& subGrid = Environment::get().SubGrid(); + /** + * Return true if this is a 'bottom' GPU (y position is y_size - 1), false otherwise + * only set if sliding window is active + */ + bool isBottomGPU(void) const + { + Mask comm_mask = Environment::get().GridController().getCommunicationMask(); + return !comm_mask.isSet(BOTTOM); + } - /* Without moving window, the selected window spans the whole global domain. - * \see https://github.com/ComputationalRadiationPhysics/picongpu/wiki/PIConGPU-domain-definitions + /** + * Returns an instance of MovingWindow * - * The window's global offset is therefore zero inside the global domain. - * The window's global and local size are equal to the SubGrid quantities. - * The local window offset is the offset within the global window which - * is equal to the local domain offset of the GPU. + * @return an instance */ - Window window; - window.localDimensions = subGrid.getLocalDomain(); - window.globalDimensions = Selection(subGrid.getGlobalDomain().size); - - /* moving window can only slide in y direction */ - if (slidingWindowEnabled) + static MovingWindow& getInstance() { - /* the moving window is smaller than the global domain by exactly one - * GPU (local domain size) in moving (y) direction - */ - window.globalDimensions.size.y() -= subGrid.getLocalDomain().size.y(); + static MovingWindow instance; + return instance; + } - float_64 offsetFirstGPU = 0.0; - getCurrentSlideInfo(currentStep, nullptr, &offsetFirstGPU); + /** + * Return a window which describes the global and local moving window + * + * @param currentStep current simulation step + * @return moving window + */ + Window getWindow(uint32_t currentStep) + { + const SubGrid& subGrid = Environment::get().SubGrid(); - /* while moving, the windows global offset within the global domain is between 0 - * and smaller than the local domain's size in y. + /* Without moving window, the selected window spans the whole global domain. + * \see https://github.com/ComputationalRadiationPhysics/picongpu/wiki/PIConGPU-domain-definitions + * + * The window's global offset is therefore zero inside the global domain. + * The window's global and local size are equal to the SubGrid quantities. + * The local window offset is the offset within the global window which + * is equal to the local domain offset of the GPU. */ - window.globalDimensions.offset.y() = offsetFirstGPU; - - /* set top/bottom if there are no communication partners - * for this GPU in the respective direction */ - const Mask comm_mask = Environment::get().GridController().getCommunicationMask(); - const bool isTopGpu = !comm_mask.isSet(TOP); - const bool isBottomGpu = !comm_mask.isSet(BOTTOM); + Window window; + window.localDimensions = subGrid.getLocalDomain(); + window.globalDimensions = Selection(subGrid.getGlobalDomain().size); - if (isTopGpu) + /* moving window can only slide in y direction */ + if(slidingWindowEnabled) { - /* the windows local offset within the global window is reduced - * by the global window offset within the global domain + /* the moving window is smaller than the global domain by exactly one + * GPU (local domain size) in moving (y) direction */ - window.localDimensions.size.y() -= offsetFirstGPU; - } - else - { - window.localDimensions.offset.y() = subGrid.getLocalDomain().offset.y() - offsetFirstGPU; - if (isBottomGpu) + window.globalDimensions.size.y() -= subGrid.getLocalDomain().size.y(); + + float_64 offsetFirstGPU = 0.0; + getCurrentSlideInfo(currentStep, nullptr, &offsetFirstGPU); + + /* while moving, the windows global offset within the global domain is between 0 + * and smaller than the local domain's size in y. + */ + window.globalDimensions.offset.y() = offsetFirstGPU; + + /* set top/bottom if there are no communication partners + * for this GPU in the respective direction */ + const Mask comm_mask = Environment::get().GridController().getCommunicationMask(); + const bool isTopGpu = !comm_mask.isSet(TOP); + const bool isBottomGpu = !comm_mask.isSet(BOTTOM); + + if(isTopGpu) { - window.localDimensions.size.y() = offsetFirstGPU; + /* the windows local offset within the global window is reduced + * by the global window offset within the global domain + */ + window.localDimensions.size.y() -= offsetFirstGPU; + } + else + { + window.localDimensions.offset.y() = subGrid.getLocalDomain().offset.y() - offsetFirstGPU; + if(isBottomGpu) + { + window.localDimensions.size.y() = offsetFirstGPU; + } } } - } - - return window; - } - - /** - * Return a window which describes the global and local domain - * - * @param currentStep current simulation step - * @return window over global/local domain - */ - Window getDomainAsWindow(uint32_t currentStep) const - { - const SubGrid& subGrid = Environment::get().SubGrid(); - Window window; - window.localDimensions = subGrid.getLocalDomain(); - window.globalDimensions = Selection(subGrid.getGlobalDomain().size); + return window; + } - return window; - } + /** + * Return a window which describes the global and local domain + * + * @param currentStep current simulation step + * @return window over global/local domain + */ + Window getDomainAsWindow(uint32_t currentStep) const + { + const SubGrid& subGrid = Environment::get().SubGrid(); + Window window; -}; + window.localDimensions = subGrid.getLocalDomain(); + window.globalDimensions = Selection(subGrid.getGlobalDomain().size); -} //namespace picongpu + return window; + } + }; +} // namespace picongpu diff --git a/include/picongpu/simulation/control/MySimulation.hpp b/include/picongpu/simulation/control/MySimulation.hpp deleted file mode 100644 index ff9b536d5a..0000000000 --- a/include/picongpu/simulation/control/MySimulation.hpp +++ /dev/null @@ -1,731 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, - * Richard Pausch, Alexander Debus, Marco Garten, - * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include -#include "picongpu/simulation_defines.hpp" -#include "picongpu/versionFormat.hpp" -#include "picongpu/random/seed/ISeed.hpp" - -#include -#include -#include -#include -#include "picongpu/simulation/control/MovingWindow.hpp" -#include -#include - -#include "picongpu/fields/FieldE.hpp" -#include "picongpu/fields/FieldB.hpp" -#include "picongpu/fields/FieldJ.hpp" -#include "picongpu/fields/FieldTmp.hpp" -#include "picongpu/fields/MaxwellSolver/Solvers.hpp" -#include "picongpu/fields/MaxwellSolver/YeePML/Field.hpp" -#include "picongpu/fields/background/cellwiseOperation.hpp" -#include "picongpu/initialization/IInitPlugin.hpp" -#include "picongpu/initialization/ParserGridDistribution.hpp" -#include "picongpu/particles/Manipulate.hpp" -#include "picongpu/particles/manipulators/manipulators.hpp" -#include "picongpu/particles/filter/filter.hpp" -#include "picongpu/particles/flylite/NonLTE.tpp" -#include "picongpu/simulation/control/DomainAdjuster.hpp" -#include "picongpu/simulation/stage/Bremsstrahlung.hpp" -#include "picongpu/simulation/stage/CurrentBackground.hpp" -#include "picongpu/simulation/stage/CurrentDeposition.hpp" -#include "picongpu/simulation/stage/CurrentInterpolationAndAdditionToEMF.hpp" -#include "picongpu/simulation/stage/CurrentReset.hpp" -#include "picongpu/simulation/stage/FieldBackground.hpp" -#include "picongpu/simulation/stage/MomentumBackup.hpp" -#include "picongpu/simulation/stage/ParticleIonization.hpp" -#include "picongpu/simulation/stage/ParticlePush.hpp" -#include "picongpu/simulation/stage/PopulationKinetics.hpp" -#include "picongpu/simulation/stage/SynchrotronRadiation.hpp" -#include -#include - -#if( PMACC_CUDA_ENABLED == 1 ) -# include "picongpu/particles/bremsstrahlung/ScaledSpectrum.hpp" -# include "picongpu/particles/bremsstrahlung/PhotonEmissionAngle.hpp" -#endif - -#include "picongpu/particles/synchrotronPhotons/SynchrotronFunctions.hpp" - -#include -#include -#include -#include - -#include -#include - -#include -#include "picongpu/particles/ParticlesFunctors.hpp" -#include "picongpu/particles/InitFunctors.hpp" -#if( PMACC_CUDA_ENABLED == 1 ) -# include -#endif -#include -#include -#include "picongpu/particles/traits/HasIonizersWithRNG.hpp" -#include - -#include -#include -#include - - -namespace picongpu -{ -using namespace pmacc; - -/** - * Global simulation controller class. - * - * Initialises simulation data and defines the simulation steps - * for each iteration. - * - * @tparam DIM the dimension (2-3) for the simulation - */ -class MySimulation : public SimulationHelper -{ -public: - - /** - * Constructor - */ - MySimulation() : - myFieldSolver(nullptr), - cellDescription(nullptr), - initialiserController(nullptr), - slidingWindow(false), - windowMovePoint(0.0), - endSlidingOnStep(-1), - showVersionOnce(false) - { - } - - virtual void pluginRegisterHelp(po::options_description& desc) - { - SimulationHelper::pluginRegisterHelp(desc); - desc.add_options() - ("versionOnce", po::value(&showVersionOnce)->zero_tokens(), "print version information once and start") - - ("devices,d", po::value > (&devices)->multitoken(), "number of devices in each dimension") - - ("grid,g", po::value > (&gridSize)->multitoken(), - "size of the simulation grid") - - ("gridDist", po::value > (&gridDistribution)->multitoken(), - "Regex to describe the static distribution of the cells for each device," - "default: equal distribution over all devices\n" - " example:\n" - " -d 2 4 1\n" - " -g 128 192 12\n" - " --gridDist \"64{2}\" \"64,32{2},64\"\n") - - ("periodic", po::value > (&periodic)->multitoken(), - "specifying whether the grid is periodic (1) or not (0) in each dimension, default: no periodic dimensions") - - ("moving,m", po::value(&slidingWindow)->zero_tokens(), "enable sliding/moving window") - /* For now we still use the compile-time movePoint variable to set - * the default value and provide backward compatibility - */ - ("windowMovePoint", po::value(&windowMovePoint)->default_value(movePoint), - "ratio of the global window size in y which defines when to " - "start sliding the window. " - "The window starts sliding at the time required to pass the " - "distance of windowMovePoint * (global window size in y) " - "when moving with the speed of light") - ("stopWindow", po::value(&endSlidingOnStep)->default_value(-1), - "stops the window at stimulation step, " - "-1 means that window is never stopping") - ("autoAdjustGrid", po::value(&autoAdjustGrid)->default_value(true), - "auto adjust the grid size if PIConGPU conditions are not fulfilled"); - } - - std::string pluginGetName() const - { - return "PIConGPU"; - } - - virtual void pluginLoad() - { - //fill periodic with 0 - while (periodic.size() < 3) - periodic.push_back(0); - - // check on correct number of devices. fill with default value 1 for missing dimensions - if (devices.size() > 3) - { - std::cerr << "Invalid number of devices.\nuse [-d dx=1 dy=1 dz=1]" << std::endl; - } - else - while (devices.size() < 3) - devices.push_back(1); - - // check on correct grid size. fill with default grid size value 1 for missing 3. dimension - if (gridSize.size() < 2 || gridSize.size() > 3) - { - std::cerr << "Invalid or missing grid size.\nuse -g width height [depth=1]" << std::endl; - } - else - if (gridSize.size() == 2) - gridSize.push_back(1); - - if (slidingWindow && devices[1] == 1) - { - std::cerr << "Invalid configuration. Can't use moving window with one device in Y direction" << std::endl; - } - - DataSpace gridSizeGlobal; - DataSpace gpus; - DataSpace isPeriodic; - - for (uint32_t i = 0; i < simDim; ++i) - { - gridSizeGlobal[i] = gridSize[i]; - gpus[i] = devices[i]; - isPeriodic[i] = periodic[i]; - } - - Environment::get().initDevices(gpus, isPeriodic); - pmacc::GridController< simDim > & gc = pmacc::Environment::get().GridController(); - - DataSpace myGPUpos(gc.getPosition()); - - if( gc.getGlobalRank() == 0 ) - { - if( showVersionOnce ) - { - void( getSoftwareVersions( std::cout ) ); - } - } - - // calculate the number of local grid cells and - // the local cell offset to the global box - for (uint32_t dim = 0; dim < gridDistribution.size() && dim < simDim; ++dim) - { - // parse string - ParserGridDistribution parserGD(gridDistribution.at(dim)); - - // verify number of blocks and devices in dimension match - parserGD.verifyDevices(gpus[dim]); - - // calculate local grid points & offset - gridSizeLocal[dim] = parserGD.getLocalSize(myGPUpos[dim]); - } - // by default: use an equal distributed box for all omitted params - for (uint32_t dim = gridDistribution.size(); dim < simDim; ++dim) - { - gridSizeLocal[dim] = gridSizeGlobal[dim] / gpus[dim]; - } - - DataSpace gridOffset; - - DomainAdjuster domainAdjuster( - gpus, - myGPUpos, - isPeriodic, - slidingWindow - ); - - if(!autoAdjustGrid) - domainAdjuster.validateOnly(); - - domainAdjuster(gridSizeGlobal, gridSizeLocal, gridOffset); - - Environment::get().initGrids(gridSizeGlobal, gridSizeLocal, gridOffset); - - if( !slidingWindow ) - { - windowMovePoint = 0.0; - endSlidingOnStep = 0; - } - MovingWindow::getInstance().setMovePoint(windowMovePoint); - MovingWindow::getInstance().setEndSlideOnStep(endSlidingOnStep); - - log ("rank %1%; localsize %2%; localoffset %3%;") % - myGPUpos.toString() % gridSizeLocal.toString() % gridOffset.toString(); - - SimulationHelper::pluginLoad(); - - GridLayout layout(gridSizeLocal, GuardSize::toRT() * SuperCellSize::toRT()); - cellDescription = new MappingDesc(layout.getDataSpace(), DataSpace(GuardSize::toRT())); - - if (gc.getGlobalRank() == 0) - { - if (MovingWindow::getInstance().isEnabled()) - log ("Sliding Window is ON"); - else - log ("Sliding Window is OFF"); - } - } - - virtual void pluginUnload() - { - DataConnector &dc = Environment<>::get().DataConnector(); - - SimulationHelper::pluginUnload(); - - __delete(myFieldSolver); - - /** unshare all registered ISimulationData sets - * - * @todo can be removed as soon as our Environment learns to shutdown in - * a distinct order, e.g. DataConnector before CUDA context - */ - dc.clean(); - - __delete(cellDescription); - } - - void notify(uint32_t) - { - - } - - virtual void init() - { - namespace nvmem = pmacc::nvidia::memory; - - DataConnector &dc = Environment<>::get().DataConnector(); - initFields(dc); - - // create field solver - this->myFieldSolver = new fields::Solver(*cellDescription); - - // Initialize random number generator and synchrotron functions, if there are synchrotron or bremsstrahlung Photons - using AllSynchrotronPhotonsSpecies = typename pmacc::particles::traits::FilterByFlag< - VectorAllSpecies, - synchrotronPhotons<> - >::type; - using AllBremsstrahlungPhotonsSpecies = typename pmacc::particles::traits::FilterByFlag< - VectorAllSpecies, - bremsstrahlungPhotons<> - >::type; - - // create factory for the random number generator - const uint32_t userSeed = random::seed::ISeed< random::SeedGenerator >{}(); - const uint32_t seed = std::hash{}( - std::to_string( userSeed ) - ); - - using RNGFactory = pmacc::random::RNGProvider< simDim, random::Generator >; - auto rngFactory = pmacc::memory::makeUnique< RNGFactory >( - Environment::get().SubGrid().getLocalDomain().size - ); - if (Environment::get().GridController().getGlobalRank() == 0) - { - log("used Random Number Generator: %1% seed: %2%") % - rngFactory->getName() % - userSeed; - } - - // init and share random number generator - pmacc::GridController& gridCon = pmacc::Environment::get().GridController(); - rngFactory->init( gridCon.getScalarPosition() ^ seed ); - dc.consume( std::move( rngFactory ) ); - - // Initialize synchrotron functions, if there are synchrotron photon species - if(!bmpl::empty::value) - { - this->synchrotronFunctions.init(); - } -#if( PMACC_CUDA_ENABLED == 1 ) - // Initialize bremsstrahlung lookup tables, if there are species containing bremsstrahlung photons - if(!bmpl::empty::value) - { - meta::ForEach< - AllBremsstrahlungPhotonsSpecies, - particles::bremsstrahlung::FillScaledSpectrumMap< bmpl::_1 > - > fillScaledSpectrumMap; - fillScaledSpectrumMap(this->scaledBremsstrahlungSpectrumMap); - - this->bremsstrahlungPhotonAngle.init(); - } - - /* Create an empty allocator. This one is resized after all exchanges - * for particles are created */ - deviceHeap.reset(new DeviceHeap(0)); -#endif - - /* Allocate helper fields for FLYlite population kinetics for atomic physics - * (histograms, rate matrix, etc.) - */ - using AllFlyLiteIons = typename pmacc::particles::traits::FilterByFlag< - VectorAllSpecies, - populationKinetics<> - >::type; - - meta::ForEach< - AllFlyLiteIons, - particles::CallPopulationKineticsInit< bmpl::_1 >, - bmpl::_1 - > initPopulationKinetics; - initPopulationKinetics( - gridSizeLocal - ); - - // Allocate and initialize particle species with all left-over memory below - meta::ForEach< VectorAllSpecies, particles::CreateSpecies > createSpeciesMemory; - createSpeciesMemory( deviceHeap, cellDescription ); - - size_t freeGpuMem(0); - Environment<>::get().MemoryInfo().getMemoryInfo(&freeGpuMem); - if(freeGpuMem < reservedGpuMemorySize) - { - pmacc::log< picLog::MEMORY > ("%1% MiB free memory < %2% MiB required reserved memory") - % (freeGpuMem / 1024 / 1024) % (reservedGpuMemorySize / 1024 / 1024) ; - std::stringstream msg; - msg << "Cannot reserve " - << (reservedGpuMemorySize / 1024 / 1024) << " MiB as there is only " - << (freeGpuMem / 1024 / 1024) << " MiB free device memory left"; - throw std::runtime_error(msg.str()); - } - -#if( PMACC_CUDA_ENABLED == 1 ) - size_t heapSize = freeGpuMem - reservedGpuMemorySize; - - if( Environment<>::get().MemoryInfo().isSharedMemoryPool() ) - { - heapSize /= 2; - log ("Shared RAM between GPU and host detected - using only half of the 'device' memory."); - } - else - log ("RAM is NOT shared between GPU and host."); - - // initializing the heap for particles - deviceHeap->destructiveResize(heapSize); - auto mallocMCBuffer = pmacc::memory::makeUnique< MallocMCBuffer >( deviceHeap ); - dc.consume( std::move( mallocMCBuffer ) ); -#endif - meta::ForEach< VectorAllSpecies, particles::LogMemoryStatisticsForSpecies > logMemoryStatisticsForSpecies; - logMemoryStatisticsForSpecies( deviceHeap ); - - Environment<>::get().MemoryInfo().getMemoryInfo(&freeGpuMem); - log ("free mem after all mem is allocated %1% MiB") % (freeGpuMem / 1024 / 1024); - - IdProvider::init(); - -#if( PMACC_CUDA_ENABLED == 1 ) - /* add CUDA streams to the StreamController for concurrent execution */ - Environment<>::get().StreamController().addStreams(6); -#endif - } - - virtual uint32_t fillSimulation() - { - /* assume start (restart in initialiserController might change that) */ - uint32_t step = 0; - - /* set slideCounter properties for PIConGPU MovingWindow: assume start - * (restart in initialiserController might change this again) - */ - MovingWindow::getInstance().setSlideCounter(0, 0); - /* Update MPI domain decomposition: will also update SubGrid domain - * information such as local offsets in y-direction - */ - GridController &gc = Environment::get().GridController(); - gc.setStateAfterSlides(0); - - DataConnector &dc = Environment<>::get().DataConnector(); - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); - auto fieldB = dc.get< FieldB >( FieldB::getName(), true ); - - /* fill all objects registed in DataConnector */ - if (initialiserController) - { - initialiserController->printInformation(); - if (this->restartRequested) - { - /* we do not require '--checkpoint.restart.step' if a master checkpoint file is found */ - if (this->restartStep < 0) - { - std::vector checkpoints = readCheckpointMasterFile(); - - if (checkpoints.empty()) - { - throw std::runtime_error( - "Restart failed. You must provide the '--checkpoint.restart.step' argument. See picongpu --help." - ); - } else - this->restartStep = checkpoints.back(); - } - - initialiserController->restart((uint32_t)this->restartStep, this->restartDirectory); - step = this->restartStep; - - /** restore background fields in GUARD - * - * loads the outer GUARDS of the global domain for absorbing/open boundary condtions - * - * @todo as soon as we add GUARD fields to the checkpoint data, e.g. for PML boundary - * conditions, this section needs to be removed - */ - cellwiseOperation::CellwiseOperation< GUARD > guardBGField( *cellDescription ); - namespace nvfct = pmacc::nvidia::functors; - guardBGField( fieldE, nvfct::Add(), FieldBackgroundE( fieldE->getUnit() ), - step, FieldBackgroundE::InfluenceParticlePusher ); - guardBGField( fieldB, nvfct::Add(), FieldBackgroundB( fieldB->getUnit() ), - step, FieldBackgroundB::InfluenceParticlePusher ); - } - else - { - initialiserController->init(); - meta::ForEach< particles::InitPipeline, particles::CallFunctor > initSpecies; - initSpecies( step ); - } - } - - size_t freeGpuMem(0u); - Environment<>::get().MemoryInfo().getMemoryInfo(&freeGpuMem); - log ("free mem after all particles are initialized %1% MiB") % (freeGpuMem / 1024 / 1024); - - // generate valid GUARDS (overwrite) - EventTask eRfieldE = fieldE->asyncCommunication(__getTransactionEvent()); - __setTransactionEvent(eRfieldE); - EventTask eRfieldB = fieldB->asyncCommunication(__getTransactionEvent()); - __setTransactionEvent(eRfieldB); - - dc.releaseData( FieldE::getName() ); - dc.releaseData( FieldB::getName() ); - - return step; - } - - /** - * Run one simulation step. - * - * @param currentStep iteration number of the current step - */ - virtual void runOneStep(uint32_t currentStep) - { - using namespace simulation::stage; - MomentumBackup{ }( currentStep ); - ParticleIonization{ *cellDescription }( currentStep ); - PopulationKinetics{ }( currentStep ); - SynchrotronRadiation{ - *cellDescription, - synchrotronFunctions - }( currentStep ); -#if( PMACC_CUDA_ENABLED == 1 ) - Bremsstrahlung{ - *cellDescription, - scaledBremsstrahlungSpectrumMap, - bremsstrahlungPhotonAngle - }( currentStep ); -#endif - EventTask commEvent; - ParticlePush{ }( currentStep, commEvent ); - FieldBackground{ *cellDescription }( currentStep, nvidia::functors::Sub( ) ); - myFieldSolver->update_beforeCurrent( currentStep ); - CurrentReset{ }( currentStep ); - __setTransactionEvent( commEvent ); - CurrentBackground{ *cellDescription }( currentStep ); - CurrentDeposition{ }( currentStep ); - CurrentInterpolationAndAdditionToEMF{ }( currentStep ); - myFieldSolver->update_afterCurrent( currentStep ); - } - - virtual void movingWindowCheck(uint32_t currentStep) - { - if (MovingWindow::getInstance().slideInCurrentStep(currentStep)) - { - slide(currentStep); - } - - /* do not double-add background field on restarts - * (contained in checkpoint data) - */ - bool addBgFields = true; - if( this->restartRequested ) - { - if( this->restartStep == int32_t(currentStep) ) - addBgFields = false; - } - - if( addBgFields ) - { - /** add background field: the movingWindowCheck is just at the start - * of a time step before all the plugins are called (and the step - * itself is performed for this time step). - * Hence the background field is visible for all plugins - * in between the time steps. - */ - simulation::stage::FieldBackground{ *cellDescription }( - currentStep, nvidia::functors::Add( ) - ); - } - } - - virtual void resetAll(uint32_t currentStep) - { - resetFields( currentStep ); - meta::ForEach< - VectorAllSpecies, - particles::CallReset< bmpl::_1 > - > resetParticles; - resetParticles( currentStep ); - } - - void slide(uint32_t currentStep) - { - GridController& gc = Environment::get().GridController(); - - if (gc.slide()) - { - log ("slide in step %1%") % currentStep; - resetAll(currentStep); - initialiserController->slide(currentStep); - meta::ForEach< particles::InitPipeline, particles::CallFunctor< bmpl::_1 > > initSpecies; - initSpecies( currentStep ); - } - } - - virtual void setInitController(IInitPlugin *initController) - { - - PMACC_ASSERT(initController != nullptr); - this->initialiserController = initController; - } - - MappingDesc* getMappingDescription() - { - return cellDescription; - } - -protected: - - std::shared_ptr deviceHeap; - - fields::Solver* myFieldSolver; - -#if( PMACC_CUDA_ENABLED == 1 ) - // creates lookup tables for the bremsstrahlung effect - // map - std::map scaledBremsstrahlungSpectrumMap; - particles::bremsstrahlung::GetPhotonAngle bremsstrahlungPhotonAngle; -#endif - - // Synchrotron functions (used in synchrotronPhotons module) - particles::synchrotronPhotons::SynchrotronFunctions synchrotronFunctions; - - // output classes - - IInitPlugin* initialiserController; - - MappingDesc* cellDescription; - - // layout parameter - std::vector devices; - std::vector gridSize; - /** Without guards */ - DataSpace gridSizeLocal; - std::vector periodic; - - std::vector gridDistribution; - - bool slidingWindow; - int32_t endSlidingOnStep; - float_64 windowMovePoint; - bool showVersionOnce; - bool autoAdjustGrid = true; - -private: - - void initFields( DataConnector& dataConnector ) - { - using pmacc::memory::makeUnique; - auto fieldB = makeUnique< FieldB >( *cellDescription ); - dataConnector.consume( std::move( fieldB ) ); - auto fieldE = makeUnique< FieldE >( *cellDescription ); - dataConnector.consume( std::move( fieldE ) ); - auto fieldJ = makeUnique< FieldJ >( *cellDescription ); - dataConnector.consume( std::move( fieldJ ) ); - for( uint32_t slot = 0; slot < fieldTmpNumSlots; ++slot) - { - auto fieldTmp = makeUnique< FieldTmp >( *cellDescription, slot ); - dataConnector.consume( std::move( fieldTmp ) ); - } - } - - /** Reset all fields - * - * @param currentStep iteration number of the current step - */ - void resetFields( uint32_t const currentStep ) - { - auto resetField = [currentStep]( std::string const name ) - { - DataConnector & dc = Environment<>::get().DataConnector(); - auto const fieldExists = dc.hasId( name ); - if( fieldExists ) - { - using FieldHelper = SimulationFieldHelper< MappingDesc >; - auto field = std::dynamic_pointer_cast< FieldHelper >( - dc.get< ISimulationData >( name, true ) - ); - if( field ) - field->reset( currentStep ); - dc.releaseData( name ); - } - }; - - /* @todo for now the list of fields is hardcoded here, a more generic - * solution would require changes to design of DataConnector. - * FieldJ and FieldTmp are effectively cleared each time iteration and - * so do not need a reset. - */ - std::array< std::string, 4 > const fieldNames{ { - FieldE::getName(), - FieldB::getName(), - fields::maxwellSolver::yeePML::FieldE::getName(), - fields::maxwellSolver::yeePML::FieldB::getName() - } }; - std::for_each( - fieldNames.cbegin(), - fieldNames.cend(), - resetField - ); - } - -}; -} /* namespace picongpu */ - -#include "picongpu/fields/Fields.tpp" -#include "picongpu/particles/synchrotronPhotons/SynchrotronFunctions.tpp" - -#if( PMACC_CUDA_ENABLED == 1 ) -# include "picongpu/particles/bremsstrahlung/Bremsstrahlung.tpp" -# include "picongpu/particles/bremsstrahlung/ScaledSpectrum.tpp" -#endif diff --git a/include/picongpu/simulation/control/Simulation.hpp b/include/picongpu/simulation/control/Simulation.hpp new file mode 100644 index 0000000000..e72140f204 --- /dev/null +++ b/include/picongpu/simulation/control/Simulation.hpp @@ -0,0 +1,719 @@ +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, + * Richard Pausch, Alexander Debus, Marco Garten, + * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include "picongpu/simulation_defines.hpp" +#include "picongpu/versionFormat.hpp" +#include "picongpu/random/seed/ISeed.hpp" + +#include +#include +#include +#include +#include "picongpu/simulation/control/MovingWindow.hpp" +#include +#include + +#include "picongpu/fields/FieldE.hpp" +#include "picongpu/fields/FieldB.hpp" +#include "picongpu/fields/FieldJ.hpp" +#include "picongpu/fields/FieldTmp.hpp" +#include "picongpu/fields/MaxwellSolver/Solvers.hpp" +#include "picongpu/fields/MaxwellSolver/YeePML/Field.hpp" +#include "picongpu/fields/background/cellwiseOperation.hpp" +#include "picongpu/initialization/IInitPlugin.hpp" +#include "picongpu/initialization/ParserGridDistribution.hpp" +#include "picongpu/particles/Manipulate.hpp" +#include "picongpu/particles/manipulators/manipulators.hpp" +#include "picongpu/particles/filter/filter.hpp" +#include "picongpu/particles/flylite/NonLTE.tpp" +#include "picongpu/simulation/control/DomainAdjuster.hpp" +#include "picongpu/simulation/stage/Bremsstrahlung.hpp" +#include "picongpu/simulation/stage/CurrentBackground.hpp" +#include "picongpu/simulation/stage/CurrentDeposition.hpp" +#include "picongpu/simulation/stage/CurrentInterpolationAndAdditionToEMF.hpp" +#include "picongpu/simulation/stage/CurrentReset.hpp" +#include "picongpu/simulation/stage/FieldBackground.hpp" +#include "picongpu/simulation/stage/MomentumBackup.hpp" +#include "picongpu/simulation/stage/ParticleIonization.hpp" +#include "picongpu/simulation/stage/ParticlePush.hpp" +#include "picongpu/simulation/stage/PopulationKinetics.hpp" +#include "picongpu/simulation/stage/SynchrotronRadiation.hpp" +#include +#include + +#if(PMACC_CUDA_ENABLED == 1) +# include "picongpu/particles/bremsstrahlung/ScaledSpectrum.hpp" +# include "picongpu/particles/bremsstrahlung/PhotonEmissionAngle.hpp" +#endif + +#include "picongpu/particles/synchrotronPhotons/SynchrotronFunctions.hpp" + +#include +#include +#include +#include + +#include +#include + +#include +#include "picongpu/particles/ParticlesFunctors.hpp" +#include "picongpu/particles/InitFunctors.hpp" +#include +#include +#include +#include + +#include +#include +#include + + +namespace picongpu +{ + using namespace pmacc; + + /** + * Global simulation controller class. + * + * Initialises simulation data and defines the simulation steps + * for each iteration. + * + * @tparam DIM the dimension (2-3) for the simulation + */ + class Simulation : public SimulationHelper + { + public: + /** + * Constructor + */ + Simulation() + : myFieldSolver(nullptr) + , cellDescription(nullptr) + , initialiserController(nullptr) + , slidingWindow(false) + , windowMovePoint(0.0) + , endSlidingOnStep(-1) + , showVersionOnce(false) + { + } + + virtual void pluginRegisterHelp(po::options_description& desc) + { + SimulationHelper::pluginRegisterHelp(desc); + desc.add_options()( + "versionOnce", + po::value(&showVersionOnce)->zero_tokens(), + "print version information once and start") + + ("devices,d", + po::value>(&devices)->multitoken(), + "number of devices in each dimension") + + ("grid,g", + po::value>(&gridSize)->multitoken(), + "size of the simulation grid") + + ("gridDist", + po::value>(&gridDistribution)->multitoken(), + "Regex to describe the static distribution of the cells for each device," + "default: equal distribution over all devices\n" + " example:\n" + " -d 2 4 1\n" + " -g 128 192 12\n" + " --gridDist \"64{2}\" \"64,32{2},64\"\n") + + ("periodic", + po::value>(&periodic)->multitoken(), + "specifying whether the grid is periodic (1) or not (0) in each dimension, default: no " + "periodic dimensions") + + ("moving,m", + po::value(&slidingWindow)->zero_tokens(), + "enable sliding/moving window") + /* For now we still use the compile-time movePoint variable to set + * the default value and provide backward compatibility + */ + ("windowMovePoint", + po::value(&windowMovePoint)->default_value(movePoint), + "ratio of the global window size in y which defines when to " + "start sliding the window. " + "The window starts sliding at the time required to pass the " + "distance of windowMovePoint * (global window size in y) " + "when moving with the speed of light")( + "stopWindow", + po::value(&endSlidingOnStep)->default_value(-1), + "stops the window at stimulation step, " + "-1 means that window is never stopping")( + "autoAdjustGrid", + po::value(&autoAdjustGrid)->default_value(true), + "auto adjust the grid size if PIConGPU conditions are not fulfilled"); + } + + std::string pluginGetName() const + { + return "PIConGPU"; + } + + virtual void pluginLoad() + { + // fill periodic with 0 + while(periodic.size() < 3) + periodic.push_back(0); + + // check on correct number of devices. fill with default value 1 for missing dimensions + if(devices.size() > 3) + { + std::cerr << "Invalid number of devices.\nuse [-d dx=1 dy=1 dz=1]" << std::endl; + } + else + while(devices.size() < 3) + devices.push_back(1); + + // check on correct grid size. fill with default grid size value 1 for missing 3. dimension + if(gridSize.size() < 2 || gridSize.size() > 3) + { + std::cerr << "Invalid or missing grid size.\nuse -g width height [depth=1]" << std::endl; + } + else if(gridSize.size() == 2) + gridSize.push_back(1); + + if(slidingWindow && devices[1] == 1) + { + std::cerr << "Invalid configuration. Can't use moving window with one device in Y direction" + << std::endl; + } + + DataSpace gridSizeGlobal; + DataSpace gpus; + DataSpace isPeriodic; + + for(uint32_t i = 0; i < simDim; ++i) + { + gridSizeGlobal[i] = gridSize[i]; + gpus[i] = devices[i]; + isPeriodic[i] = periodic[i]; + } + + Environment::get().initDevices(gpus, isPeriodic); + pmacc::GridController& gc = pmacc::Environment::get().GridController(); + + DataSpace myGPUpos(gc.getPosition()); + + if(gc.getGlobalRank() == 0) + { + if(showVersionOnce) + { + void(getSoftwareVersions(std::cout)); + } + } + + // calculate the number of local grid cells and + // the local cell offset to the global box + for(uint32_t dim = 0; dim < gridDistribution.size() && dim < simDim; ++dim) + { + // parse string + ParserGridDistribution parserGD(gridDistribution.at(dim)); + + // verify number of blocks and devices in dimension match + parserGD.verifyDevices(gpus[dim]); + + // calculate local grid points & offset + gridSizeLocal[dim] = parserGD.getLocalSize(myGPUpos[dim]); + } + // by default: use an equal distributed box for all omitted params + for(uint32_t dim = gridDistribution.size(); dim < simDim; ++dim) + { + gridSizeLocal[dim] = gridSizeGlobal[dim] / gpus[dim]; + } + + DataSpace gridOffset; + + DomainAdjuster domainAdjuster(gpus, myGPUpos, isPeriodic, slidingWindow); + + if(!autoAdjustGrid) + domainAdjuster.validateOnly(); + + domainAdjuster(gridSizeGlobal, gridSizeLocal, gridOffset); + + Environment::get().initGrids(gridSizeGlobal, gridSizeLocal, gridOffset); + + if(!slidingWindow) + { + windowMovePoint = 0.0; + endSlidingOnStep = 0; + } + MovingWindow::getInstance().setMovePoint(windowMovePoint); + MovingWindow::getInstance().setEndSlideOnStep(endSlidingOnStep); + + log("rank %1%; localsize %2%; localoffset %3%;") % myGPUpos.toString() + % gridSizeLocal.toString() % gridOffset.toString(); + + SimulationHelper::pluginLoad(); + + GridLayout layout(gridSizeLocal, GuardSize::toRT() * SuperCellSize::toRT()); + cellDescription = new MappingDesc(layout.getDataSpace(), DataSpace(GuardSize::toRT())); + + if(gc.getGlobalRank() == 0) + { + if(MovingWindow::getInstance().isEnabled()) + log("Sliding Window is ON"); + else + log("Sliding Window is OFF"); + } + } + + virtual void pluginUnload() + { + DataConnector& dc = Environment<>::get().DataConnector(); + + SimulationHelper::pluginUnload(); + + __delete(myFieldSolver); + + /** unshare all registered ISimulationData sets + * + * @todo can be removed as soon as our Environment learns to shutdown in + * a distinct order, e.g. DataConnector before CUDA context + */ + dc.clean(); + + __delete(cellDescription); + } + + void notify(uint32_t) + { + } + + virtual void init() + { + namespace nvmem = pmacc::nvidia::memory; + + // This has to be called before initFields() + currentInterpolationAndAdditionToEMF.init(); + + DataConnector& dc = Environment<>::get().DataConnector(); + initFields(dc); + + // create field solver + this->myFieldSolver = new fields::Solver(*cellDescription); + + // Initialize random number generator and synchrotron functions, if there are synchrotron or bremsstrahlung + // Photons + using AllSynchrotronPhotonsSpecies = + typename pmacc::particles::traits::FilterByFlag>::type; + using AllBremsstrahlungPhotonsSpecies = + typename pmacc::particles::traits::FilterByFlag>::type; + + // create factory for the random number generator + const uint32_t userSeed = random::seed::ISeed{}(); + const uint32_t seed = std::hash{}(std::to_string(userSeed)); + + using RNGFactory = pmacc::random::RNGProvider; + auto rngFactory = std::make_unique(Environment::get().SubGrid().getLocalDomain().size); + if(Environment::get().GridController().getGlobalRank() == 0) + { + log("used Random Number Generator: %1% seed: %2%") % rngFactory->getName() % userSeed; + } + + // init and share random number generator + pmacc::GridController& gridCon = pmacc::Environment::get().GridController(); + rngFactory->init(gridCon.getScalarPosition() ^ seed); + dc.consume(std::move(rngFactory)); + + // Initialize synchrotron functions, if there are synchrotron photon species + if(!bmpl::empty::value) + { + this->synchrotronFunctions.init(); + } +#if(PMACC_CUDA_ENABLED == 1) + // Initialize bremsstrahlung lookup tables, if there are species containing bremsstrahlung photons + if(!bmpl::empty::value) + { + meta::ForEach< + AllBremsstrahlungPhotonsSpecies, + particles::bremsstrahlung::FillScaledSpectrumMap> + fillScaledSpectrumMap; + fillScaledSpectrumMap(this->scaledBremsstrahlungSpectrumMap); + + this->bremsstrahlungPhotonAngle.init(); + } +#endif + +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) + auto nativeCudaStream = cupla::manager::Stream::get().stream(0); + /* Create an empty allocator. This one is resized after all exchanges + * for particles are created */ + deviceHeap.reset( + + new DeviceHeap(cupla::manager::Device::get().current(), nativeCudaStream, 0u)); + cuplaStreamSynchronize(0); +#endif + + /* Allocate helper fields for FLYlite population kinetics for atomic physics + * (histograms, rate matrix, etc.) + */ + using AllFlyLiteIons = + typename pmacc::particles::traits::FilterByFlag>::type; + + meta::ForEach, bmpl::_1> + initPopulationKinetics; + initPopulationKinetics(gridSizeLocal); + + // Allocate and initialize particle species with all left-over memory below + meta::ForEach> createSpeciesMemory; + createSpeciesMemory(deviceHeap, cellDescription); + + size_t freeGpuMem(0); + Environment<>::get().MemoryInfo().getMemoryInfo(&freeGpuMem); + if(freeGpuMem < reservedGpuMemorySize) + { + pmacc::log("%1% MiB free memory < %2% MiB required reserved memory") + % (freeGpuMem / 1024 / 1024) % (reservedGpuMemorySize / 1024 / 1024); + std::stringstream msg; + msg << "Cannot reserve " << (reservedGpuMemorySize / 1024 / 1024) << " MiB as there is only " + << (freeGpuMem / 1024 / 1024) << " MiB free device memory left"; + throw std::runtime_error(msg.str()); + } + +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) + size_t heapSize = freeGpuMem - reservedGpuMemorySize; + + if(Environment<>::get().MemoryInfo().isSharedMemoryPool()) + { + heapSize /= 2; + log( + "Shared RAM between GPU and host detected - using only half of the 'device' memory."); + } + else + log("RAM is NOT shared between GPU and host."); + + // initializing the heap for particles + deviceHeap->destructiveResize( + cupla::manager::Device::get().current(), + nativeCudaStream, + heapSize); + cuplaStreamSynchronize(0); + + auto mallocMCBuffer = std::make_unique>(deviceHeap); + dc.consume(std::move(mallocMCBuffer)); + +#endif + + meta::ForEach> + logMemoryStatisticsForSpecies; + logMemoryStatisticsForSpecies(deviceHeap); + + Environment<>::get().MemoryInfo().getMemoryInfo(&freeGpuMem); + log("free mem after all mem is allocated %1% MiB") % (freeGpuMem / 1024 / 1024); + + IdProvider::init(); + +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) + /* add CUDA streams to the StreamController for concurrent execution */ + Environment<>::get().StreamController().addStreams(6); +#endif + } + + virtual uint32_t fillSimulation() + { + /* assume start (restart in initialiserController might change that) */ + uint32_t step = 0; + + /* set slideCounter properties for PIConGPU MovingWindow: assume start + * (restart in initialiserController might change this again) + */ + MovingWindow::getInstance().setSlideCounter(0, 0); + /* Update MPI domain decomposition: will also update SubGrid domain + * information such as local offsets in y-direction + */ + GridController& gc = Environment::get().GridController(); + gc.setStateAfterSlides(0); + + DataConnector& dc = Environment<>::get().DataConnector(); + auto fieldE = dc.get(FieldE::getName(), true); + auto fieldB = dc.get(FieldB::getName(), true); + + /* fill all objects registed in DataConnector */ + if(initialiserController) + { + initialiserController->printInformation(); + if(this->restartRequested) + { + /* we do not require '--checkpoint.restart.step' if a master checkpoint file is found */ + if(this->restartStep < 0) + { + std::vector checkpoints = readCheckpointMasterFile(); + + if(checkpoints.empty()) + { + throw std::runtime_error("Restart failed. You must provide the " + "'--checkpoint.restart.step' argument. See picongpu --help."); + } + else + this->restartStep = checkpoints.back(); + } + + initialiserController->restart((uint32_t) this->restartStep, this->restartDirectory); + step = this->restartStep; + + /** restore background fields in GUARD + * + * loads the outer GUARDS of the global domain for absorbing/open boundary condtions + * + * @todo as soon as we add GUARD fields to the checkpoint data, e.g. for PML boundary + * conditions, this section needs to be removed + */ + cellwiseOperation::CellwiseOperation guardBGField(*cellDescription); + namespace nvfct = pmacc::nvidia::functors; + guardBGField( + fieldE, + nvfct::Add(), + FieldBackgroundE(fieldE->getUnit()), + step, + FieldBackgroundE::InfluenceParticlePusher); + guardBGField( + fieldB, + nvfct::Add(), + FieldBackgroundB(fieldB->getUnit()), + step, + FieldBackgroundB::InfluenceParticlePusher); + } + else + { + initialiserController->init(); + meta::ForEach> initSpecies; + initSpecies(step); + } + } + + size_t freeGpuMem(0u); + Environment<>::get().MemoryInfo().getMemoryInfo(&freeGpuMem); + log("free mem after all particles are initialized %1% MiB") % (freeGpuMem / 1024 / 1024); + + // generate valid GUARDS (overwrite) + EventTask eRfieldE = fieldE->asyncCommunication(__getTransactionEvent()); + __setTransactionEvent(eRfieldE); + EventTask eRfieldB = fieldB->asyncCommunication(__getTransactionEvent()); + __setTransactionEvent(eRfieldB); + + dc.releaseData(FieldE::getName()); + dc.releaseData(FieldB::getName()); + + return step; + } + + /** + * Run one simulation step. + * + * @param currentStep iteration number of the current step + */ + virtual void runOneStep(uint32_t currentStep) + { + using namespace simulation::stage; + MomentumBackup{}(currentStep); + CurrentReset{}(currentStep); + ParticleIonization{*cellDescription}(currentStep); + PopulationKinetics{}(currentStep); + SynchrotronRadiation{*cellDescription, synchrotronFunctions}(currentStep); +#if(PMACC_CUDA_ENABLED == 1) + Bremsstrahlung{*cellDescription, scaledBremsstrahlungSpectrumMap, bremsstrahlungPhotonAngle}(currentStep); +#endif + EventTask commEvent; + ParticlePush{}(currentStep, commEvent); + FieldBackground{*cellDescription}(currentStep, nvidia::functors::Sub()); + myFieldSolver->update_beforeCurrent(currentStep); + __setTransactionEvent(commEvent); + CurrentBackground{*cellDescription}(currentStep); + CurrentDeposition{}(currentStep); + currentInterpolationAndAdditionToEMF(currentStep); + myFieldSolver->update_afterCurrent(currentStep); + } + + virtual void movingWindowCheck(uint32_t currentStep) + { + if(MovingWindow::getInstance().slideInCurrentStep(currentStep)) + { + slide(currentStep); + } + + /* do not double-add background field on restarts + * (contained in checkpoint data) + */ + bool addBgFields = true; + if(this->restartRequested) + { + if(this->restartStep == int32_t(currentStep)) + addBgFields = false; + } + + if(addBgFields) + { + /** add background field: the movingWindowCheck is just at the start + * of a time step before all the plugins are called (and the step + * itself is performed for this time step). + * Hence the background field is visible for all plugins + * in between the time steps. + */ + simulation::stage::FieldBackground{*cellDescription}(currentStep, nvidia::functors::Add()); + } + } + + virtual void resetAll(uint32_t currentStep) + { + resetFields(currentStep); + meta::ForEach> resetParticles; + resetParticles(currentStep); + } + + void slide(uint32_t currentStep) + { + GridController& gc = Environment::get().GridController(); + + if(gc.slide()) + { + log("slide in step %1%") % currentStep; + resetAll(currentStep); + initialiserController->slide(currentStep); + meta::ForEach> initSpecies; + initSpecies(currentStep); + } + } + + virtual void setInitController(IInitPlugin* initController) + { + PMACC_ASSERT(initController != nullptr); + this->initialiserController = initController; + } + + MappingDesc* getMappingDescription() + { + return cellDescription; + } + + protected: + std::shared_ptr deviceHeap; + + fields::Solver* myFieldSolver; + simulation::stage::CurrentInterpolationAndAdditionToEMF currentInterpolationAndAdditionToEMF; + +#if(PMACC_CUDA_ENABLED == 1) + // creates lookup tables for the bremsstrahlung effect + // map + std::map scaledBremsstrahlungSpectrumMap; + particles::bremsstrahlung::GetPhotonAngle bremsstrahlungPhotonAngle; +#endif + + // Synchrotron functions (used in synchrotronPhotons module) + particles::synchrotronPhotons::SynchrotronFunctions synchrotronFunctions; + + // output classes + + IInitPlugin* initialiserController; + + MappingDesc* cellDescription; + + // layout parameter + std::vector devices; + std::vector gridSize; + /** Without guards */ + DataSpace gridSizeLocal; + std::vector periodic; + + std::vector gridDistribution; + + bool slidingWindow; + int32_t endSlidingOnStep; + float_64 windowMovePoint; + bool showVersionOnce; + bool autoAdjustGrid = true; + + private: + void initFields(DataConnector& dataConnector) + { + auto fieldB = std::make_unique(*cellDescription); + dataConnector.consume(std::move(fieldB)); + auto fieldE = std::make_unique(*cellDescription); + dataConnector.consume(std::move(fieldE)); + auto fieldJ = std::make_unique(*cellDescription); + dataConnector.consume(std::move(fieldJ)); + for(uint32_t slot = 0; slot < fieldTmpNumSlots; ++slot) + { + auto fieldTmp = std::make_unique(*cellDescription, slot); + dataConnector.consume(std::move(fieldTmp)); + } + } + + /** Reset all fields + * + * @param currentStep iteration number of the current step + */ + void resetFields(uint32_t const currentStep) + { + auto resetField = [currentStep](std::string const name) { + DataConnector& dc = Environment<>::get().DataConnector(); + auto const fieldExists = dc.hasId(name); + if(fieldExists) + { + using FieldHelper = SimulationFieldHelper; + auto field = std::dynamic_pointer_cast(dc.get(name, true)); + if(field) + field->reset(currentStep); + dc.releaseData(name); + } + }; + + /* @todo for now the list of fields is hardcoded here, a more generic + * solution would require changes to design of DataConnector. + * FieldJ and FieldTmp are effectively cleared each time iteration and + * so do not need a reset. + */ + std::array const fieldNames{ + {FieldE::getName(), + FieldB::getName(), + fields::maxwellSolver::yeePML::FieldE::getName(), + fields::maxwellSolver::yeePML::FieldB::getName()}}; + std::for_each(fieldNames.cbegin(), fieldNames.cend(), resetField); + } + }; +} /* namespace picongpu */ + +#include "picongpu/fields/Fields.tpp" +#include "picongpu/particles/synchrotronPhotons/SynchrotronFunctions.tpp" + +#if(PMACC_CUDA_ENABLED == 1) +# include "picongpu/particles/bremsstrahlung/Bremsstrahlung.tpp" +# include "picongpu/particles/bremsstrahlung/ScaledSpectrum.tpp" +#endif diff --git a/include/picongpu/simulation/control/SimulationStarter.hpp b/include/picongpu/simulation/control/SimulationStarter.hpp index 808d7b2c3f..a98bfe1371 100644 --- a/include/picongpu/simulation/control/SimulationStarter.hpp +++ b/include/picongpu/simulation/control/SimulationStarter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -49,8 +49,8 @@ namespace picongpu MappingDesc* mappingDesc; - public: + public: SimulationStarter() : mappingDesc(nullptr) { simulationClass = new SimulationClass(); @@ -75,7 +75,7 @@ namespace picongpu { PluginConnector& pluginConnector = Environment<>::get().PluginConnector(); pluginConnector.loadPlugins(); - log ("Startup"); + log("Startup"); simulationClass->setInitController(initClass); simulationClass->startSimulation(); } @@ -88,7 +88,7 @@ namespace picongpu { } - ArgsParser::Status parseConfigs(int argc, char **argv) + ArgsParser::Status parseConfigs(int argc, char** argv) { ArgsParser& ap = ArgsParser::getInstance(); PluginConnector& pluginConnector = Environment<>::get().PluginConnector(); @@ -108,8 +108,7 @@ namespace picongpu // setup all boost::program_options and add to ArgsParser BoostOptionsList options = pluginConnector.registerHelp(); - for (BoostOptionsList::const_iterator iter = options.begin(); - iter != options.end(); ++iter) + for(BoostOptionsList::const_iterator iter = options.begin(); iter != options.end(); ++iter) { ap.addOptions(*iter); } @@ -117,8 +116,8 @@ namespace picongpu // parse environment variables, config files and command line return ap.parse(argc, argv); } - protected: + protected: void pluginLoad() { simulationClass->load(); @@ -135,17 +134,16 @@ namespace picongpu pluginClass->unload(); simulationClass->unload(); } - private: - void printStartParameters(int argc, char **argv) + private: + void printStartParameters(int argc, char** argv) { std::cout << "Start Parameters: "; - for (int i = 0; i < argc; ++i) + for(int i = 0; i < argc; ++i) { std::cout << argv[i] << " "; } std::cout << std::endl; } }; -} - +} // namespace picongpu diff --git a/include/picongpu/simulation/control/Window.hpp b/include/picongpu/simulation/control/Window.hpp index 2f3c5354dd..fa43c20d92 100644 --- a/include/picongpu/simulation/control/Window.hpp +++ b/include/picongpu/simulation/control/Window.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * @@ -24,21 +24,20 @@ namespace picongpu { -using namespace pmacc; + using namespace pmacc; -/** - * Window describes sizes and offsets. - * - * For a detailed description of windows, see the PIConGPU wiki page: - * https://github.com/ComputationalRadiationPhysics/picongpu/wiki/PIConGPU-domain-definitions - */ -struct Window -{ - /* Dimensions (size/offset) of the global virtual window over all GPUs */ - Selection globalDimensions; - - /* Dimensions (size/offset) of the local virtual window on this GPU */ - Selection localDimensions; -}; -} + /** + * Window describes sizes and offsets. + * + * For a detailed description of windows, see the PIConGPU wiki page: + * https://github.com/ComputationalRadiationPhysics/picongpu/wiki/PIConGPU-domain-definitions + */ + struct Window + { + /* Dimensions (size/offset) of the global virtual window over all GPUs */ + Selection globalDimensions; + /* Dimensions (size/offset) of the local virtual window on this GPU */ + Selection localDimensions; + }; +} // namespace picongpu diff --git a/include/picongpu/simulation/stage/Bremsstrahlung.hpp b/include/picongpu/simulation/stage/Bremsstrahlung.hpp index b6d3d4e34a..25052afe46 100644 --- a/include/picongpu/simulation/stage/Bremsstrahlung.hpp +++ b/include/picongpu/simulation/stage/Bremsstrahlung.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Richard Pausch, Alexander Debus, Marco Garten, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * @@ -22,97 +22,79 @@ #pragma once // Bremsstrahlung is available only with CUDA -#if( PMACC_CUDA_ENABLED == 1 ) +#if(PMACC_CUDA_ENABLED == 1) -#include "picongpu/particles/bremsstrahlung/PhotonEmissionAngle.hpp" -#include "picongpu/particles/bremsstrahlung/ScaledSpectrum.hpp" -#include "picongpu/particles/ParticlesFunctors.hpp" +# include "picongpu/particles/bremsstrahlung/PhotonEmissionAngle.hpp" +# include "picongpu/particles/bremsstrahlung/ScaledSpectrum.hpp" +# include "picongpu/particles/ParticlesFunctors.hpp" -#include -#include +# include +# include -#include -#include +# include +# include namespace picongpu { -namespace simulation -{ -namespace stage -{ - - /** Functor for the stage of the PIC loop computing Bremsstrahlung - * - * Only affects particle species with the bremsstrahlungIons attribute. - */ - class Bremsstrahlung + namespace simulation { - public: - - using ScaledSpectrumMap = std::map< - float_X, - particles::bremsstrahlung::ScaledSpectrum - >; - - /** Create a Bremsstrahlung functor - * - * Having this in constructor is a temporary solution. - * - * @param cellDescription mapping for kernels - * @param scaledSpectrumMap initialized spectrum lookup table - * @param photonAngle initialized photon angle lookup table - */ - Bremsstrahlung( - MappingDesc const cellDescription, - ScaledSpectrumMap & scaledSpectrumMap, - particles::bremsstrahlung::GetPhotonAngle & photonAngle - ): - cellDescription( cellDescription ), - scaledSpectrumMap( scaledSpectrumMap ), - photonAngle( photonAngle ) - { - } - - /** Ionize particles - * - * @param step index of time iteration - */ - void operator( )( uint32_t const step ) const + namespace stage { - using pmacc::particles::traits::FilterByFlag; - using SpeciesWithBremsstrahlung = typename FilterByFlag - < - VectorAllSpecies, - bremsstrahlungIons< > - >::type; - pmacc::meta::ForEach< - SpeciesWithBremsstrahlung, - particles::CallBremsstrahlung< bmpl::_1 > - > particleBremsstrahlung; - particleBremsstrahlung( - cellDescription, - step, - scaledSpectrumMap, - photonAngle - ); - } - - private: - - //! Mapping for kernels - MappingDesc cellDescription; - - //! Loopup table: atomic number -> scaled bremsstrahlung spectrum - ScaledSpectrumMap & scaledSpectrumMap; - - //! Loopup table for photon angle - particles::bremsstrahlung::GetPhotonAngle & photonAngle; - - }; - -} // namespace stage -} // namespace simulation + /** Functor for the stage of the PIC loop computing Bremsstrahlung + * + * Only affects particle species with the bremsstrahlungIons attribute. + */ + class Bremsstrahlung + { + public: + using ScaledSpectrumMap = std::map; + + /** Create a Bremsstrahlung functor + * + * Having this in constructor is a temporary solution. + * + * @param cellDescription mapping for kernels + * @param scaledSpectrumMap initialized spectrum lookup table + * @param photonAngle initialized photon angle lookup table + */ + Bremsstrahlung( + MappingDesc const cellDescription, + ScaledSpectrumMap& scaledSpectrumMap, + particles::bremsstrahlung::GetPhotonAngle& photonAngle) + : cellDescription(cellDescription) + , scaledSpectrumMap(scaledSpectrumMap) + , photonAngle(photonAngle) + { + } + + /** Ionize particles + * + * @param step index of time iteration + */ + void operator()(uint32_t const step) const + { + using pmacc::particles::traits::FilterByFlag; + using SpeciesWithBremsstrahlung = + typename FilterByFlag>::type; + pmacc::meta::ForEach> + particleBremsstrahlung; + particleBremsstrahlung(cellDescription, step, scaledSpectrumMap, photonAngle); + } + + private: + //! Mapping for kernels + MappingDesc cellDescription; + + //! Loopup table: atomic number -> scaled bremsstrahlung spectrum + ScaledSpectrumMap& scaledSpectrumMap; + + //! Loopup table for photon angle + particles::bremsstrahlung::GetPhotonAngle& photonAngle; + }; + + } // namespace stage + } // namespace simulation } // namespace picongpu #endif // ( PMACC_CUDA_ENABLED == 1 ) diff --git a/include/picongpu/simulation/stage/CurrentBackground.hpp b/include/picongpu/simulation/stage/CurrentBackground.hpp index 3d43e292a3..64abe3b7fe 100644 --- a/include/picongpu/simulation/stage/CurrentBackground.hpp +++ b/include/picongpu/simulation/stage/CurrentBackground.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Richard Pausch, Alexander Debus, Marco Garten, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * @@ -34,57 +34,49 @@ namespace picongpu { -namespace simulation -{ -namespace stage -{ - - //! Functor for the stage of the PIC loop applying current background - class CurrentBackground + namespace simulation { - public: - - /** Create a current background functor - * - * Having this in constructor is a temporary solution. - * - * @param cellDescription mapping for kernels - */ - CurrentBackground( MappingDesc const cellDescription ): - cellDescription( cellDescription ) + namespace stage { - } - - /** Add the current background to the current density - * - * @param step index of time iteration - */ - void operator( )( uint32_t const step ) const - { - using namespace pmacc; - DataConnector & dc = Environment< >::get( ).DataConnector( ); - auto & fieldJ = *dc.get< FieldJ >( FieldJ::getName( ), true ); - using CurrentBackground = cellwiseOperation::CellwiseOperation< - type::CORE + type::BORDER - >; - CurrentBackground currentBackground( cellDescription ); - currentBackground( - &fieldJ, - nvidia::functors::Add( ), - FieldBackgroundJ( fieldJ.getUnit() ), - step, - FieldBackgroundJ::activated - ); - dc.releaseData( FieldJ::getName( ) ); - } - - private: + //! Functor for the stage of the PIC loop applying current background + class CurrentBackground + { + public: + /** Create a current background functor + * + * Having this in constructor is a temporary solution. + * + * @param cellDescription mapping for kernels + */ + CurrentBackground(MappingDesc const cellDescription) : cellDescription(cellDescription) + { + } - //! Mapping for kernels - MappingDesc cellDescription; + /** Add the current background to the current density + * + * @param step index of time iteration + */ + void operator()(uint32_t const step) const + { + using namespace pmacc; + DataConnector& dc = Environment<>::get().DataConnector(); + auto& fieldJ = *dc.get(FieldJ::getName(), true); + using CurrentBackground = cellwiseOperation::CellwiseOperation; + CurrentBackground currentBackground(cellDescription); + currentBackground( + &fieldJ, + nvidia::functors::Add(), + FieldBackgroundJ(fieldJ.getUnit()), + step, + FieldBackgroundJ::activated); + dc.releaseData(FieldJ::getName()); + } - }; + private: + //! Mapping for kernels + MappingDesc cellDescription; + }; -} // namespace stage -} // namespace simulation + } // namespace stage + } // namespace simulation } // namespace picongpu diff --git a/include/picongpu/simulation/stage/CurrentDeposition.hpp b/include/picongpu/simulation/stage/CurrentDeposition.hpp index 597d13ab81..7725072338 100644 --- a/include/picongpu/simulation/stage/CurrentDeposition.hpp +++ b/include/picongpu/simulation/stage/CurrentDeposition.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Richard Pausch, Alexander Debus, Marco Garten, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * @@ -35,65 +35,52 @@ namespace picongpu { -namespace simulation -{ -namespace stage -{ -namespace detail -{ - - template< - typename T_SpeciesType, - typename T_Area - > - struct CurrentDeposition + namespace simulation { - using SpeciesType = T_SpeciesType; - using FrameType = typename SpeciesType::FrameType; - - HINLINE void operator( )( - const uint32_t currentStep, - FieldJ & fieldJ, - pmacc::DataConnector & dc - ) const + namespace stage { - auto species = dc.get< SpeciesType >( FrameType::getName(), true ); - fieldJ.computeCurrent< T_Area::value, SpeciesType >( *species, currentStep ); - dc.releaseData( FrameType::getName() ); - } - }; + namespace detail + { + template + struct CurrentDeposition + { + using SpeciesType = T_SpeciesType; + using FrameType = typename SpeciesType::FrameType; -} // namespace detail + HINLINE void operator()(const uint32_t currentStep, FieldJ& fieldJ, pmacc::DataConnector& dc) const + { + auto species = dc.get(FrameType::getName(), true); + fieldJ.computeCurrent(*species, currentStep); + dc.releaseData(FrameType::getName()); + } + }; - //! Functor for the stage of the PIC loop performing current deposition - struct CurrentDeposition - { - /** Compute the current created by particles and add it to the current - * density - * - * @param step index of time iteration - */ - void operator( )( uint32_t const step ) const - { - using namespace pmacc; - DataConnector & dc = Environment< >::get( ).DataConnector( ); - auto & fieldJ = *dc.get< FieldJ >( FieldJ::getName( ), true ); - using SpeciesWithCurrentSolver = typename pmacc::particles::traits::FilterByFlag< - VectorAllSpecies, - current< > - >::type; - meta::ForEach< - SpeciesWithCurrentSolver, - detail::CurrentDeposition< - bmpl::_1, - bmpl::int_< type::CORE + type::BORDER > - > - > depositCurrent; - depositCurrent( step, fieldJ, dc ); - dc.releaseData( FieldJ::getName( ) ); - } - }; + } // namespace detail + + //! Functor for the stage of the PIC loop performing current deposition + struct CurrentDeposition + { + /** Compute the current created by particles and add it to the current + * density + * + * @param step index of time iteration + */ + void operator()(uint32_t const step) const + { + using namespace pmacc; + DataConnector& dc = Environment<>::get().DataConnector(); + auto& fieldJ = *dc.get(FieldJ::getName(), true); + using SpeciesWithCurrentSolver = + typename pmacc::particles::traits::FilterByFlag>::type; + meta::ForEach< + SpeciesWithCurrentSolver, + detail::CurrentDeposition>> + depositCurrent; + depositCurrent(step, fieldJ, dc); + dc.releaseData(FieldJ::getName()); + } + }; -} // namespace stage -} // namespace simulation + } // namespace stage + } // namespace simulation } // namespace picongpu diff --git a/include/picongpu/simulation/stage/CurrentInterpolationAndAdditionToEMF.hpp b/include/picongpu/simulation/stage/CurrentInterpolationAndAdditionToEMF.hpp index 5a3a3c8b82..42ab2504a4 100644 --- a/include/picongpu/simulation/stage/CurrentInterpolationAndAdditionToEMF.hpp +++ b/include/picongpu/simulation/stage/CurrentInterpolationAndAdditionToEMF.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Richard Pausch, Alexander Debus, Marco Garten, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * @@ -35,75 +35,108 @@ #include #include +#include +#include namespace picongpu { -namespace simulation -{ -namespace stage -{ - - /** Functor for the stage of the PIC loop performing current interpolation - * and addition to grid values of the electromagnetic field - */ - struct CurrentInterpolationAndAdditionToEMF + namespace simulation { - /** Compute the current created by particles and add it to the current - * density - * - * @param step index of time iteration - */ - void operator( )( uint32_t const step ) const + namespace stage { - using namespace pmacc; - using SpeciesWithCurrentSolver = typename pmacc::particles::traits::FilterByFlag< - VectorAllSpecies, - current< > - >::type; - auto const numSpeciesWithCurrentSolver = - bmpl::size< SpeciesWithCurrentSolver >::type::value; - auto const existsCurrent = numSpeciesWithCurrentSolver > 0; - if( !existsCurrent ) - return; + /** Functor for the stage of the PIC loop performing current interpolation + * and addition to grid values of the electromagnetic field + */ + class CurrentInterpolationAndAdditionToEMF + { + public: + /** Initialize the current interpolation stage + * + * This method has to be called during initialization of the simulation. + * Before this method is called, the instance of CurrentInterpolation cannot be used safely. + */ + void init() + { + // Convert compile-time current interpolation set in the field solver to a runtime value + using namespace currentInterpolation; + auto& interpolation = CurrentInterpolationInfo::get(); + if(std::is_same::value) + interpolation.kind = CurrentInterpolationInfo::Kind::None; + else if(std::is_same::value) + interpolation.kind = CurrentInterpolationInfo::Kind::Binomial; + else + throw std::runtime_error("Unsupported current interpolation type used in the field solver"); + } - DataConnector & dc = Environment< >::get( ).DataConnector( ); - auto & fieldJ = *dc.get< FieldJ >( FieldJ::getName( ), true ); - auto eRecvCurrent = fieldJ.asyncCommunication( __getTransactionEvent() ); - using CurrentInterpolation = fields::Solver::CurrentInterpolation; - CurrentInterpolation currentInterpolation; - using Margin = traits::GetMargin< CurrentInterpolation >; - DataSpace< simDim > const currentRecvLower( Margin::LowerMargin().toRT() ); - DataSpace< simDim > const currentRecvUpper( Margin::UpperMargin().toRT() ); + /** Compute the current created by particles and add it to the current density + * + * @param step index of time iteration + */ + void operator()(uint32_t const step) const + { + using namespace pmacc; + using SpeciesWithCurrentSolver = + typename pmacc::particles::traits::FilterByFlag>::type; + auto const numSpeciesWithCurrentSolver = bmpl::size::type::value; + auto const existsCurrent = numSpeciesWithCurrentSolver > 0; + if(!existsCurrent) + return; - /* without interpolation, we do not need to access the FieldJ GUARD - * and can therefore overlap communication of GUARD->(ADD)BORDER & computation of CORE */ - if( currentRecvLower == DataSpace< simDim >::create( 0 ) && - currentRecvUpper == DataSpace< simDim >::create( 0 ) - ) - { - fieldJ.addCurrentToEMF< type::CORE >( currentInterpolation ); - __setTransactionEvent( eRecvCurrent ); - fieldJ.addCurrentToEMF< type::BORDER >( currentInterpolation ); - } - else - { - /* in case we perform a current interpolation/filter, we need - * to access the BORDER area from the CORE (and the GUARD area - * from the BORDER) - * `fieldJ->asyncCommunication` first adds the neighbors' values - * to BORDER (send) and then updates the GUARD (receive) - * \todo split the last `receive` part in a separate method to - * allow already a computation of CORE */ - __setTransactionEvent( eRecvCurrent ); - fieldJ.addCurrentToEMF< - type::CORE + type::BORDER - >( currentInterpolation ); - } - dc.releaseData( FieldJ::getName( ) ); - } - }; + DataConnector& dc = Environment<>::get().DataConnector(); + auto& fieldJ = *dc.get(FieldJ::getName(), true); + auto eRecvCurrent = fieldJ.asyncCommunication(__getTransactionEvent()); + auto& interpolation = currentInterpolation::CurrentInterpolationInfo::get(); + auto const currentRecvLower = interpolation.getLowerMargin(); + auto const currentRecvUpper = interpolation.getUpperMargin(); + + /* without interpolation, we do not need to access the FieldJ GUARD + * and can therefore overlap communication of GUARD->(ADD)BORDER & computation of CORE + */ + if(currentRecvLower == DataSpace::create(0) + && currentRecvUpper == DataSpace::create(0)) + { + addCurrentToEMF(fieldJ); + __setTransactionEvent(eRecvCurrent); + addCurrentToEMF(fieldJ); + } + else + { + /* in case we perform a current interpolation/filter, we need + * to access the BORDER area from the CORE (and the GUARD area + * from the BORDER) + * `fieldJ->asyncCommunication` first adds the neighbors' values + * to BORDER (send) and then updates the GUARD (receive) + * \todo split the last `receive` part in a separate method to + * allow already a computation of CORE */ + __setTransactionEvent(eRecvCurrent); + addCurrentToEMF(fieldJ); + } + dc.releaseData(FieldJ::getName()); + } + + private: + /* Call addCurrentToEMF method of fieldJ for the given area + * + * This function performs a transition from the run-time realm of CurrentInterpolation into the + * template realm of fieldJ.addCurrentToEMF() operating with interpolation functors. + * + * @tparam T_area area to operate once + * + * @param fieldJ object representing the current field + */ + template + void addCurrentToEMF(FieldJ& fieldJ) const + { + using currentInterpolation::CurrentInterpolationInfo; + auto const kind = CurrentInterpolationInfo::get().kind; + if(kind == CurrentInterpolationInfo::Kind::None) + fieldJ.addCurrentToEMF(currentInterpolation::None{}); + else + fieldJ.addCurrentToEMF(currentInterpolation::Binomial{}); + } + }; -} // namespace stage -} // namespace simulation + } // namespace stage + } // namespace simulation } // namespace picongpu diff --git a/include/picongpu/simulation/stage/CurrentReset.hpp b/include/picongpu/simulation/stage/CurrentReset.hpp index e0e008b4aa..94d6913f21 100644 --- a/include/picongpu/simulation/stage/CurrentReset.hpp +++ b/include/picongpu/simulation/stage/CurrentReset.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Richard Pausch, Alexander Debus, Marco Garten, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * @@ -31,29 +31,28 @@ namespace picongpu { -namespace simulation -{ -namespace stage -{ - - //! Functor for the stage of the PIC loop setting the current values to zero - struct CurrentReset + namespace simulation { - /** Set all current density values to zero - * - * @param step index of time iteration - */ - void operator( )( uint32_t const ) const + namespace stage { - using namespace pmacc; - DataConnector & dc = Environment< >::get( ).DataConnector( ); - auto & fieldJ = *dc.get< FieldJ >( FieldJ::getName( ), true ); - FieldJ::ValueType zeroJ( FieldJ::ValueType::create( 0._X ) ); - fieldJ.assign( zeroJ ); - dc.releaseData( FieldJ::getName( ) ); - } - }; - -} // namespace stage -} // namespace simulation + //! Functor for the stage of the PIC loop setting the current values to zero + struct CurrentReset + { + /** Set all current density values to zero + * + * @param step index of time iteration + */ + void operator()(uint32_t const) const + { + using namespace pmacc; + DataConnector& dc = Environment<>::get().DataConnector(); + auto& fieldJ = *dc.get(FieldJ::getName(), true); + FieldJ::ValueType zeroJ(FieldJ::ValueType::create(0._X)); + fieldJ.assign(zeroJ); + dc.releaseData(FieldJ::getName()); + } + }; + + } // namespace stage + } // namespace simulation } // namespace picongpu diff --git a/include/picongpu/simulation/stage/FieldBackground.hpp b/include/picongpu/simulation/stage/FieldBackground.hpp index 34454dedc3..cec8c49461 100644 --- a/include/picongpu/simulation/stage/FieldBackground.hpp +++ b/include/picongpu/simulation/stage/FieldBackground.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Richard Pausch, Alexander Debus, Marco Garten, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * @@ -34,70 +34,61 @@ namespace picongpu { -namespace simulation -{ -namespace stage -{ - - //! Functor for the stage of the PIC loop applying field background - class FieldBackground + namespace simulation { - public: - - /** Create a field background functor - * - * Having this in constructor is a temporary solution. - * - * @param cellDescription mapping for kernels - */ - FieldBackground( MappingDesc const cellDescription ): - cellDescription( cellDescription ) + namespace stage { - } - - /** Add the field background to the current density - * - * @tparam T_Functor functor type compatible to nvidia::functors - * - * @param step index of time iteration - * @param functor functor to apply to the background - */ - template< typename T_Functor > - void operator( )( uint32_t const step, T_Functor functor ) const - { - using namespace pmacc; - DataConnector & dc = Environment< >::get( ).DataConnector( ); - auto fieldE = dc.get< FieldE >( FieldE::getName( ), true ); - auto fieldB = dc.get< FieldB >( FieldB::getName( ), true ); - using Background = cellwiseOperation::CellwiseOperation< - CORE + BORDER + GUARD - >; - Background background( cellDescription ); - background( - fieldE, - functor, - FieldBackgroundE( fieldE->getUnit( ) ), - step, - FieldBackgroundE::InfluenceParticlePusher - ); - background( - fieldB, - functor, - FieldBackgroundB( fieldB->getUnit( ) ), - step, - FieldBackgroundB::InfluenceParticlePusher - ); - dc.releaseData( FieldE::getName( ) ); - dc.releaseData( FieldB::getName( ) ); - } - - private: + //! Functor for the stage of the PIC loop applying field background + class FieldBackground + { + public: + /** Create a field background functor + * + * Having this in constructor is a temporary solution. + * + * @param cellDescription mapping for kernels + */ + FieldBackground(MappingDesc const cellDescription) : cellDescription(cellDescription) + { + } - //! Mapping for kernels - MappingDesc cellDescription; + /** Add the field background to the current density + * + * @tparam T_Functor functor type compatible to nvidia::functors + * + * @param step index of time iteration + * @param functor functor to apply to the background + */ + template + void operator()(uint32_t const step, T_Functor functor) const + { + using namespace pmacc; + DataConnector& dc = Environment<>::get().DataConnector(); + auto fieldE = dc.get(FieldE::getName(), true); + auto fieldB = dc.get(FieldB::getName(), true); + using Background = cellwiseOperation::CellwiseOperation; + Background background(cellDescription); + background( + fieldE, + functor, + FieldBackgroundE(fieldE->getUnit()), + step, + FieldBackgroundE::InfluenceParticlePusher); + background( + fieldB, + functor, + FieldBackgroundB(fieldB->getUnit()), + step, + FieldBackgroundB::InfluenceParticlePusher); + dc.releaseData(FieldE::getName()); + dc.releaseData(FieldB::getName()); + } - }; + private: + //! Mapping for kernels + MappingDesc cellDescription; + }; -} // namespace stage -} // namespace simulation + } // namespace stage + } // namespace simulation } // namespace picongpu diff --git a/include/picongpu/simulation/stage/MomentumBackup.hpp b/include/picongpu/simulation/stage/MomentumBackup.hpp index 615e2b31e6..75d0ec5609 100644 --- a/include/picongpu/simulation/stage/MomentumBackup.hpp +++ b/include/picongpu/simulation/stage/MomentumBackup.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Richard Pausch, Alexander Debus, Marco Garten, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * @@ -30,40 +30,31 @@ namespace picongpu { -namespace simulation -{ -namespace stage -{ - - /** Functor for the stage of the PIC loop copying particles' momentums - * to momentumPrev1 - * - * Only affects particle species with the momentumPrev1 attribute. - */ - struct MomentumBackup + namespace simulation { - /** Copy the momentums - * - * @param step index of time iteration - */ - void operator( )( uint32_t const step ) const + namespace stage { - using pmacc::particles::traits::FilterByIdentifier; - using SpeciesWithMomentumPrev1 = typename FilterByIdentifier< - VectorAllSpecies, - momentumPrev1 - >::type; - using CopyMomentum = particles::manipulators::unary::CopyAttribute< - momentumPrev1, - momentum - >; - particles::manipulate< - CopyMomentum, - SpeciesWithMomentumPrev1 - >( step ); - } - }; - -} // namespace stage -} // namespace simulation + /** Functor for the stage of the PIC loop copying particles' momentums + * to momentumPrev1 + * + * Only affects particle species with the momentumPrev1 attribute. + */ + struct MomentumBackup + { + /** Copy the momentums + * + * @param step index of time iteration + */ + void operator()(uint32_t const step) const + { + using pmacc::particles::traits::FilterByIdentifier; + using SpeciesWithMomentumPrev1 = + typename FilterByIdentifier::type; + using CopyMomentum = particles::manipulators::unary::CopyAttribute; + particles::manipulate(step); + } + }; + + } // namespace stage + } // namespace simulation } // namespace picongpu diff --git a/include/picongpu/simulation/stage/ParticleIonization.hpp b/include/picongpu/simulation/stage/ParticleIonization.hpp index cf74d9b8fc..638af55f8c 100644 --- a/include/picongpu/simulation/stage/ParticleIonization.hpp +++ b/include/picongpu/simulation/stage/ParticleIonization.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Richard Pausch, Alexander Debus, Marco Garten, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * @@ -29,55 +29,44 @@ namespace picongpu { -namespace simulation -{ -namespace stage -{ - - /** Functor for the stage of the PIC loop performing particle ionization - * - * Only affects particle species with the ionizers attribute. - */ - class ParticleIonization + namespace simulation { - public: - - /** Create a particle ionization functor - * - * Having this in constructor is a temporary solution. - * - * @param cellDescription mapping for kernels - */ - ParticleIonization( MappingDesc const cellDescription ): - cellDescription( cellDescription ) + namespace stage { - } - - /** Ionize particles - * - * @param step index of time iteration - */ - void operator( )( uint32_t const step ) const - { - using pmacc::particles::traits::FilterByFlag; - using SpeciesWithIonizers = typename FilterByFlag< - VectorAllSpecies, - ionizers< > - >::type; - pmacc::meta::ForEach< - SpeciesWithIonizers, - particles::CallIonization< bmpl::_1 > - > particleIonization; - particleIonization( cellDescription, step ); - } - - private: + /** Functor for the stage of the PIC loop performing particle ionization + * + * Only affects particle species with the ionizers attribute. + */ + class ParticleIonization + { + public: + /** Create a particle ionization functor + * + * Having this in constructor is a temporary solution. + * + * @param cellDescription mapping for kernels + */ + ParticleIonization(MappingDesc const cellDescription) : cellDescription(cellDescription) + { + } - //! Mapping for kernels - MappingDesc cellDescription; + /** Ionize particles + * + * @param step index of time iteration + */ + void operator()(uint32_t const step) const + { + using pmacc::particles::traits::FilterByFlag; + using SpeciesWithIonizers = typename FilterByFlag>::type; + pmacc::meta::ForEach> particleIonization; + particleIonization(cellDescription, step); + } - }; + private: + //! Mapping for kernels + MappingDesc cellDescription; + }; -} // namespace stage -} // namespace simulation + } // namespace stage + } // namespace simulation } // namespace picongpu diff --git a/include/picongpu/simulation/stage/ParticlePush.hpp b/include/picongpu/simulation/stage/ParticlePush.hpp index 51c5440f16..7d161200f8 100644 --- a/include/picongpu/simulation/stage/ParticlePush.hpp +++ b/include/picongpu/simulation/stage/ParticlePush.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Richard Pausch, Alexander Debus, Marco Garten, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * @@ -30,33 +30,28 @@ namespace picongpu { -namespace simulation -{ -namespace stage -{ - - //! Functor for the stage of the PIC loop performing particle push - struct ParticlePush + namespace simulation { - /** Push all particle species - * - * @param step index of time iteration - * @param[out] commEvent particle communication event - */ - void operator( )( uint32_t const step, pmacc::EventTask & commEvent ) const + namespace stage { - pmacc::EventTask initEvent = __getTransactionEvent( ); - pmacc::EventTask updateEvent; - particles::PushAllSpecies pushAllSpecies; - pushAllSpecies( - step, initEvent, - updateEvent, - commEvent - ); - __setTransactionEvent( updateEvent ); - } - }; - -} // namespace stage -} // namespace simulation + //! Functor for the stage of the PIC loop performing particle push + struct ParticlePush + { + /** Push all particle species + * + * @param step index of time iteration + * @param[out] commEvent particle communication event + */ + void operator()(uint32_t const step, pmacc::EventTask& commEvent) const + { + pmacc::EventTask initEvent = __getTransactionEvent(); + pmacc::EventTask updateEvent; + particles::PushAllSpecies pushAllSpecies; + pushAllSpecies(step, initEvent, updateEvent, commEvent); + __setTransactionEvent(updateEvent); + } + }; + + } // namespace stage + } // namespace simulation } // namespace picongpu diff --git a/include/picongpu/simulation/stage/PopulationKinetics.hpp b/include/picongpu/simulation/stage/PopulationKinetics.hpp index dcdbae0e9b..435b327eb6 100644 --- a/include/picongpu/simulation/stage/PopulationKinetics.hpp +++ b/include/picongpu/simulation/stage/PopulationKinetics.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Richard Pausch, Alexander Debus, Marco Garten, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * @@ -29,38 +29,31 @@ namespace picongpu { -namespace simulation -{ -namespace stage -{ - - /** Functor for the stage of the PIC loop performing FLYlite population - * kinetics for atomic physics - * - * Only affects particle species with the populationKinetics attribute. - */ - struct PopulationKinetics + namespace simulation { - /** Perform FLYlite population kinetics for atomic physics - * - * @param step index of time iteration - */ - void operator( )( uint32_t const step ) const + namespace stage { - using pmacc::particles::traits::FilterByFlag; - using FlyLiteIons = typename FilterByFlag< - VectorAllSpecies, - populationKinetics< > - >::type; - pmacc::meta::ForEach< - FlyLiteIons, - particles::CallPopulationKinetics< bmpl::_1 >, - bmpl::_1 - > populationKinetics; - populationKinetics( step ); - } - }; + /** Functor for the stage of the PIC loop performing FLYlite population + * kinetics for atomic physics + * + * Only affects particle species with the populationKinetics attribute. + */ + struct PopulationKinetics + { + /** Perform FLYlite population kinetics for atomic physics + * + * @param step index of time iteration + */ + void operator()(uint32_t const step) const + { + using pmacc::particles::traits::FilterByFlag; + using FlyLiteIons = typename FilterByFlag>::type; + pmacc::meta::ForEach, bmpl::_1> + populationKinetics; + populationKinetics(step); + } + }; -} // namespace stage -} // namespace simulation + } // namespace stage + } // namespace simulation } // namespace picongpu diff --git a/include/picongpu/simulation/stage/SynchrotronRadiation.hpp b/include/picongpu/simulation/stage/SynchrotronRadiation.hpp index 64c7a39a14..7617f4da75 100644 --- a/include/picongpu/simulation/stage/SynchrotronRadiation.hpp +++ b/include/picongpu/simulation/stage/SynchrotronRadiation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Richard Pausch, Alexander Debus, Marco Garten, * Benjamin Worpitz, Alexander Grund, Sergei Bastrakov * @@ -31,67 +31,54 @@ namespace picongpu { -namespace simulation -{ -namespace stage -{ - - /** Functor for the stage of the PIC loop computing synchrotron radiation - * - * Only affects particle species with the synchrotronPhotons attribute. - */ - class SynchrotronRadiation + namespace simulation { - public: - - /** Create a synchrotron radiation functor - * - * Having this in constructor is a temporary solution. - * - * @param cellDescription mapping for kernels - * @param functions initialized synchrotron functions - */ - SynchrotronRadiation( - MappingDesc const cellDescription, - particles::synchrotronPhotons::SynchrotronFunctions & functions - ): - cellDescription( cellDescription ), - functions( functions ) + namespace stage { - } - - /** Ionize particles - * - * @param step index of time iteration - */ - void operator( )( uint32_t const step ) const - { - using pmacc::particles::traits::FilterByFlag; - using SynchrotronPhotonsSpecies = typename FilterByFlag< - VectorAllSpecies, - synchrotronPhotons< > - >::type; - pmacc::meta::ForEach< - SynchrotronPhotonsSpecies, - particles::CallSynchrotronPhotons< bmpl::_1 > - > synchrotronRadiation; - synchrotronRadiation( - cellDescription, - step, - functions - ); - } - - private: + /** Functor for the stage of the PIC loop computing synchrotron radiation + * + * Only affects particle species with the synchrotronPhotons attribute. + */ + class SynchrotronRadiation + { + public: + /** Create a synchrotron radiation functor + * + * Having this in constructor is a temporary solution. + * + * @param cellDescription mapping for kernels + * @param functions initialized synchrotron functions + */ + SynchrotronRadiation( + MappingDesc const cellDescription, + particles::synchrotronPhotons::SynchrotronFunctions& functions) + : cellDescription(cellDescription) + , functions(functions) + { + } - //! Mapping for kernels - MappingDesc cellDescription; + /** Ionize particles + * + * @param step index of time iteration + */ + void operator()(uint32_t const step) const + { + using pmacc::particles::traits::FilterByFlag; + using SynchrotronPhotonsSpecies = + typename FilterByFlag>::type; + pmacc::meta::ForEach> + synchrotronRadiation; + synchrotronRadiation(cellDescription, step, functions); + } - //! Initialized synchrotron functions - particles::synchrotronPhotons::SynchrotronFunctions & functions; + private: + //! Mapping for kernels + MappingDesc cellDescription; - }; + //! Initialized synchrotron functions + particles::synchrotronPhotons::SynchrotronFunctions& functions; + }; -} // namespace stage -} // namespace simulation + } // namespace stage + } // namespace simulation } // namespace picongpu diff --git a/include/picongpu/simulation_classTypes.hpp b/include/picongpu/simulation_classTypes.hpp index abf9921976..2606037340 100644 --- a/include/picongpu/simulation_classTypes.hpp +++ b/include/picongpu/simulation_classTypes.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -34,7 +34,7 @@ namespace picongpu { using namespace pmacc; - //short name for access verbose types of picongpu + // short name for access verbose types of picongpu typedef PIConGPUVerbose picLog; -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/simulation_defines.hpp b/include/picongpu/simulation_defines.hpp index 1ab582d3fc..b0f5823f7f 100644 --- a/include/picongpu/simulation_defines.hpp +++ b/include/picongpu/simulation_defines.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -40,7 +40,7 @@ namespace picongpu #include #include -//load starter after all user extension +// load starter after all user extension #include #include @@ -49,5 +49,5 @@ namespace picongpu // ##### load unitless #include #include -//load starter after user extensions and all params are loaded +// load starter after user extensions and all params are loaded #include diff --git a/include/picongpu/simulation_types.hpp b/include/picongpu/simulation_types.hpp index ce46b4a3df..763dfbcd45 100644 --- a/include/picongpu/simulation_types.hpp +++ b/include/picongpu/simulation_types.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "picongpu/traits/GetMargin.hpp" #include @@ -33,41 +34,32 @@ namespace picongpu { + //! define all elements which can send and resive -//! define all elements which can send and resive + enum CommunicationTag + { + NO_COMMUNICATION = 0u, + FIELD_B = 1u, + FIELD_E = 2u, + FIELD_J = 3u, + FIELD_JRECV = 4u, + SPECIES_FIRSTTAG = 42u + }; -enum CommunicationTag -{ - NO_COMMUNICATION = 0u, - FIELD_B = 1u, - FIELD_E = 2u, - FIELD_J = 3u, - FIELD_JRECV = 4u, - SPECIES_FIRSTTAG = 42u -}; - - -//! defines field types some various methods (e.g. Laser::manipulate) - -enum FieldType -{ - FIELD_TYPE_E, FIELD_TYPE_B, FIELD_TYPE_TMP -}; - -namespace precision32Bit -{ -using precisionType = float; -} + namespace precision32Bit + { + using precisionType = float; + } -namespace precision64Bit -{ -using precisionType = double; -} + namespace precision64Bit + { + using precisionType = double; + } -namespace math = pmacc::algorithms::math; -using namespace pmacc::algorithms::precisionCast; -using namespace pmacc::algorithms::promoteType; -using namespace pmacc::traits; -using namespace picongpu::traits; + namespace math = cupla::device::math; + using namespace pmacc::algorithms::precisionCast; + using namespace pmacc::algorithms::promoteType; + using namespace pmacc::traits; + using namespace picongpu::traits; -} +} // namespace picongpu diff --git a/include/picongpu/traits/AdiosToPIC.hpp b/include/picongpu/traits/AdiosToPIC.hpp index 8d313c507c..9069fe644a 100644 --- a/include/picongpu/traits/AdiosToPIC.hpp +++ b/include/picongpu/traits/AdiosToPIC.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt * * This file is part of PIConGPU. * @@ -23,20 +23,19 @@ namespace picongpu { + namespace traits + { + /** Convert an Adios type to a PIConGPU Type + * + * implements a public type as result of the trait + * + * @tparam T_AdiosType Adios data type + */ + template + struct AdiosToPIC; -namespace traits -{ - /** Convert an Adios type to a PIConGPU Type - * - * implements a public type as result of the trait - * - * @tparam T_AdiosType Adios data type - */ - template - struct AdiosToPIC; - -} //namespace traits + } // namespace traits -}// namespace picongpu +} // namespace picongpu #include "AdiosToPIC.tpp" diff --git a/include/picongpu/traits/AdiosToPIC.tpp b/include/picongpu/traits/AdiosToPIC.tpp index e82c6c8c3f..5274c7071b 100644 --- a/include/picongpu/traits/AdiosToPIC.tpp +++ b/include/picongpu/traits/AdiosToPIC.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Alexander Debus +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Alexander Debus * * This file is part of PIConGPU. * @@ -19,67 +19,65 @@ #pragma once -#if (ENABLE_ADIOS==1) -#include +#if(ENABLE_ADIOS == 1) +# include -#include "picongpu/simulation_defines.hpp" +# include "picongpu/simulation_defines.hpp" namespace picongpu { - -namespace traits -{ - - template<> - struct AdiosToPIC - { - typedef int16_t type; - }; - - template<> - struct AdiosToPIC + namespace traits { - typedef uint16_t type; - }; - - template<> - struct AdiosToPIC - { - typedef int32_t type; - }; - - template<> - struct AdiosToPIC - { - typedef uint32_t type; - }; - - template<> - struct AdiosToPIC - { - typedef int64_t type; - }; - - template<> - struct AdiosToPIC - { - typedef uint64_t type; - }; - - template<> - struct AdiosToPIC - { - typedef float_32 type; - }; - - template<> - struct AdiosToPIC - { - typedef float_64 type; - }; - -} //namespace traits - -}// namespace picongpu + template<> + struct AdiosToPIC + { + typedef int16_t type; + }; + + template<> + struct AdiosToPIC + { + typedef uint16_t type; + }; + + template<> + struct AdiosToPIC + { + typedef int32_t type; + }; + + template<> + struct AdiosToPIC + { + typedef uint32_t type; + }; + + template<> + struct AdiosToPIC + { + typedef int64_t type; + }; + + template<> + struct AdiosToPIC + { + typedef uint64_t type; + }; + + template<> + struct AdiosToPIC + { + typedef float_32 type; + }; + + template<> + struct AdiosToPIC + { + typedef float_64 type; + }; + + } // namespace traits + +} // namespace picongpu #endif // (ENABLE_ADIOS==1) diff --git a/include/picongpu/traits/FieldPosition.hpp b/include/picongpu/traits/FieldPosition.hpp index 6f8ae40bf8..d463db31a1 100644 --- a/include/picongpu/traits/FieldPosition.hpp +++ b/include/picongpu/traits/FieldPosition.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -24,14 +24,10 @@ namespace picongpu { -namespace traits -{ - template< - typename T_CellType, - typename T_Field, - uint32_t T_simDim = simDim - > - struct FieldPosition; + namespace traits + { + template + struct FieldPosition; -} // namespace traits + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/traits/GetCellType.hpp b/include/picongpu/traits/GetCellType.hpp new file mode 100644 index 0000000000..a446c0ebbe --- /dev/null +++ b/include/picongpu/traits/GetCellType.hpp @@ -0,0 +1,46 @@ +/* Copyright 2020-2021 Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + + +namespace picongpu +{ + namespace traits + { + /** Trait for cell type of a field solver + * + * Defines the resulting type as ::type. + * By default falls back to T_FieldSolver::CellType. + * + * Note: it was originally indented to be put to a new namespace + * picongpu::fields::traits, but this was not possible due to conflicts + * with pmacc names lookup. + * + * @tparam T_FieldSolver field solver type + */ + template + struct GetCellType + { + //! Cell type, one of fields::cellType:: types + using type = typename T_FieldSolver::CellType; + }; + + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/traits/GetDataBoxType.hpp b/include/picongpu/traits/GetDataBoxType.hpp index 572feb8b3e..f94727257d 100644 --- a/include/picongpu/traits/GetDataBoxType.hpp +++ b/include/picongpu/traits/GetDataBoxType.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,15 +22,15 @@ namespace picongpu { -namespace traits -{ -/** Get data box type of a buffer - * - * \tparam T_Type type from which you need the DataBoxType - * \treturn ::type - */ -template -struct GetDataBoxType; + namespace traits + { + /** Get data box type of a buffer + * + * \tparam T_Type type from which you need the DataBoxType + * \treturn ::type + */ + template + struct GetDataBoxType; -} //namespace traits -}// namespace picongpu + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/traits/GetMargin.hpp b/include/picongpu/traits/GetMargin.hpp index ffa0cde79d..ec82fceee8 100644 --- a/include/picongpu/traits/GetMargin.hpp +++ b/include/picongpu/traits/GetMargin.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -23,37 +23,36 @@ namespace picongpu { - -namespace traits -{ -/**Get margin of a solver - * class must define a LowerMargin and UpperMargin for any valid solver - * - * \tparam Solver solver which needs ghost cells for solving a problem - * if solver not define `LowerMargin` and `UpperMargin` this trait (GetMargin) - * must be specialized - * \tparam SubSetName a optional name (id) if solver needs different ghost cells - * for different objects - */ -template -struct GetMargin -{ - using LowerMargin = typename Solver::LowerMargin; - using UpperMargin = typename Solver::UpperMargin; -}; - -template -struct GetLowerMargin -{ - typedef typename traits::GetMargin::LowerMargin type; -}; - -template -struct GetUpperMargin -{ - typedef typename traits::GetMargin::UpperMargin type; -}; - -} //namespace traits - -}// namespace picongpu + namespace traits + { + /**Get margin of a solver + * class must define a LowerMargin and UpperMargin for any valid solver + * + * \tparam Solver solver which needs ghost cells for solving a problem + * if solver not define `LowerMargin` and `UpperMargin` this trait (GetMargin) + * must be specialized + * \tparam SubSetName a optional name (id) if solver needs different ghost cells + * for different objects + */ + template + struct GetMargin + { + using LowerMargin = typename Solver::LowerMargin; + using UpperMargin = typename Solver::UpperMargin; + }; + + template + struct GetLowerMargin + { + typedef typename traits::GetMargin::LowerMargin type; + }; + + template + struct GetUpperMargin + { + typedef typename traits::GetMargin::UpperMargin type; + }; + + } // namespace traits + +} // namespace picongpu diff --git a/include/picongpu/traits/IsFieldDomainBound.hpp b/include/picongpu/traits/IsFieldDomainBound.hpp index e26ff54a52..76199b797d 100644 --- a/include/picongpu/traits/IsFieldDomainBound.hpp +++ b/include/picongpu/traits/IsFieldDomainBound.hpp @@ -1,4 +1,4 @@ -/* Copyright 2020 Sergei Bastrakov +/* Copyright 2020-2021 Sergei Bastrakov * * This file is part of PIConGPU. * @@ -26,20 +26,19 @@ namespace picongpu { -namespace traits -{ - - /** Whether a field is geometrically bound to the domain decomposition - * with respect to size, guard size, and offset - * - * Inherits std::true_type, std::false_type or a compatible type. - * - * @tparam T_Field field type - */ - template< typename T_Field > - struct IsFieldDomainBound : std::true_type + namespace traits { - }; + /** Whether a field is geometrically bound to the domain decomposition + * with respect to size, guard size, and offset + * + * Inherits std::true_type, std::false_type or a compatible type. + * + * @tparam T_Field field type + */ + template + struct IsFieldDomainBound : std::true_type + { + }; -} // namespace traits + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/traits/PICToAdios.hpp b/include/picongpu/traits/PICToAdios.hpp index b0a2550686..fed4a6ee36 100644 --- a/include/picongpu/traits/PICToAdios.hpp +++ b/include/picongpu/traits/PICToAdios.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt * * This file is part of PIConGPU. * @@ -23,19 +23,18 @@ namespace picongpu { + namespace traits + { + /** Convert a PIConGPU Type to an Adios data type + * + * \tparam T_Type Typename in PIConGPU + * \return \p ::type as public typedef of an Adios type + */ + template + struct PICToAdios; -namespace traits -{ - /** Convert a PIConGPU Type to an Adios data type - * - * \tparam T_Type Typename in PIConGPU - * \return \p ::type as public typedef of an Adios type - */ - template - struct PICToAdios; - -} //namespace traits + } // namespace traits -}// namespace picongpu +} // namespace picongpu #include "PICToAdios.tpp" diff --git a/include/picongpu/traits/PICToAdios.tpp b/include/picongpu/traits/PICToAdios.tpp index 2da5b1db4d..2581459f44 100644 --- a/include/picongpu/traits/PICToAdios.tpp +++ b/include/picongpu/traits/PICToAdios.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Alexander Debus +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Alexander Debus * * This file is part of PIConGPU. * @@ -19,124 +19,125 @@ #pragma once -#if (ENABLE_ADIOS==1) -#include +#if(ENABLE_ADIOS == 1) +# include -#include "picongpu/simulation_defines.hpp" -#include -#include +# include "picongpu/simulation_defines.hpp" +# include +# include namespace picongpu { - -namespace traits -{ - template<> - struct PICToAdios - { - ADIOS_DATATYPES type; - - PICToAdios() : - type(adios_unsigned_byte) {} - - PMACC_STATIC_ASSERT_MSG( - sizeof(bool) == 1, - ADIOS_Plugin__Can_not_find_a_one_byte_representation_of_bool - ); - }; - - template<> - struct PICToAdios - { - ADIOS_DATATYPES type; - - PICToAdios() : - type(adios_short) {} - }; - - template<> - struct PICToAdios - { - ADIOS_DATATYPES type; - - PICToAdios() : - type(adios_unsigned_short) {} - }; - - template<> - struct PICToAdios - { - ADIOS_DATATYPES type; - - PICToAdios() : - type(adios_integer) {} - }; - - template<> - struct PICToAdios - { - ADIOS_DATATYPES type; - - PICToAdios() : - type(adios_unsigned_integer) {} - }; - - template<> - struct PICToAdios - { - ADIOS_DATATYPES type; - - PICToAdios() : - type(adios_long) {} - }; - - template<> - struct PICToAdios + namespace traits { - ADIOS_DATATYPES type; - - PICToAdios() : - type(adios_unsigned_long) {} - }; - - /** Specialization for uint64_cu. - * If uint64_cu happens to be the same as uint64_t we use an unused dummy type - * to avoid duplicate specialization - */ - struct uint64_cu_unused_adios; - template<> - struct PICToAdios< - typename bmpl::if_< - typename bmpl::or_< - boost::is_same, - bmpl::bool_ - >::type, - uint64_cu_unused_adios, - uint64_cu - >::type - >: public PICToAdios - {}; - - template<> - struct PICToAdios - { - ADIOS_DATATYPES type; - - PICToAdios() : - type(adios_real) {} - }; - - template<> - struct PICToAdios - { - ADIOS_DATATYPES type; - - PICToAdios() : - type(adios_double) {} - }; - -} //namespace traits - -}// namespace picongpu + template<> + struct PICToAdios + { + ADIOS_DATATYPES type; + + PICToAdios() : type(adios_unsigned_byte) + { + } + + PMACC_STATIC_ASSERT_MSG(sizeof(bool) == 1, ADIOS_Plugin__Can_not_find_a_one_byte_representation_of_bool); + }; + + template<> + struct PICToAdios + { + ADIOS_DATATYPES type; + + PICToAdios() : type(adios_short) + { + } + }; + + template<> + struct PICToAdios + { + ADIOS_DATATYPES type; + + PICToAdios() : type(adios_unsigned_short) + { + } + }; + + template<> + struct PICToAdios + { + ADIOS_DATATYPES type; + + PICToAdios() : type(adios_integer) + { + } + }; + + template<> + struct PICToAdios + { + ADIOS_DATATYPES type; + + PICToAdios() : type(adios_unsigned_integer) + { + } + }; + + template<> + struct PICToAdios + { + ADIOS_DATATYPES type; + + PICToAdios() : type(adios_long) + { + } + }; + + template<> + struct PICToAdios + { + ADIOS_DATATYPES type; + + PICToAdios() : type(adios_unsigned_long) + { + } + }; + + /** Specialization for uint64_cu. + * If uint64_cu happens to be the same as uint64_t we use an unused dummy type + * to avoid duplicate specialization + */ + struct uint64_cu_unused_adios; + template<> + struct PICToAdios, bmpl::bool_>::type, + uint64_cu_unused_adios, + uint64_cu>::type> : public PICToAdios + { + }; + + template<> + struct PICToAdios + { + ADIOS_DATATYPES type; + + PICToAdios() : type(adios_real) + { + } + }; + + template<> + struct PICToAdios + { + ADIOS_DATATYPES type; + + PICToAdios() : type(adios_double) + { + } + }; + + } // namespace traits + +} // namespace picongpu #endif // (ENABLE_ADIOS==1) diff --git a/include/picongpu/traits/PICToOpenPMD.hpp b/include/picongpu/traits/PICToOpenPMD.hpp index 719347a7c0..35ca277eb5 100644 --- a/include/picongpu/traits/PICToOpenPMD.hpp +++ b/include/picongpu/traits/PICToOpenPMD.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Axel Huebl +/* Copyright 2016-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -30,23 +30,23 @@ namespace picongpu { -namespace traits -{ - /** Reinterpret attributes for openPMD - * - * Currently, this conversion tables are used to translate the PIConGPU - * totalCellIdx (unitless cell index) to the openPMD positionOffset (length) - */ - template - struct OpenPMDName; - - template - struct OpenPMDUnit; - - template - struct OpenPMDUnitDimension; - -} // namespace traits + namespace traits + { + /** Reinterpret attributes for openPMD + * + * Currently, this conversion tables are used to translate the PIConGPU + * totalCellIdx (unitless cell index) to the openPMD positionOffset (length) + */ + template + struct OpenPMDName; + + template + struct OpenPMDUnit; + + template + struct OpenPMDUnitDimension; + + } // namespace traits } // namespace picongpu #include "PICToOpenPMD.tpp" diff --git a/include/picongpu/traits/PICToOpenPMD.tpp b/include/picongpu/traits/PICToOpenPMD.tpp index 3a0a780171..15e1ef3b33 100644 --- a/include/picongpu/traits/PICToOpenPMD.tpp +++ b/include/picongpu/traits/PICToOpenPMD.tpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Axel Huebl +/* Copyright 2016-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -21,103 +21,103 @@ namespace picongpu { -namespace traits -{ - /** Forward names that are identical in PIConGPU & openPMD - */ - template - struct OpenPMDName + namespace traits { - std::string operator()() const + /** Forward names that are identical in PIConGPU & openPMD + */ + template + struct OpenPMDName { - return T_Identifier::getName(); - } - }; + std::string operator()() const + { + return T_Identifier::getName(); + } + }; - /** Translate the totalCellIdx (unitless index) into the openPMD - * positionOffset (3D position vector, length) - */ - template<> - struct OpenPMDName - { - std::string operator()() const + /** Translate the totalCellIdx (unitless index) into the openPMD + * positionOffset (3D position vector, length) + */ + template<> + struct OpenPMDName { - return std::string("positionOffset"); - } - }; + std::string operator()() const + { + return std::string("positionOffset"); + } + }; - /** Translate the particleId (unitless, global) into the openPMD - * id (unitless, global) - */ - template<> - struct OpenPMDName - { - std::string operator()() const + /** Translate the particleId (unitless, global) into the openPMD + * id (unitless, global) + */ + template<> + struct OpenPMDName { - return std::string("id"); - } - }; + std::string operator()() const + { + return std::string("id"); + } + }; - /** Forward units that are identical in PIConGPU & openPMD - */ - template - struct OpenPMDUnit - { - std::vector operator()() const + /** Forward units that are identical in PIConGPU & openPMD + */ + template + struct OpenPMDUnit { - return Unit::get(); - } - }; + std::vector operator()() const + { + return Unit::get(); + } + }; - /** the totalCellIdx can be converted into a positionOffset - * until the beginning of the cell by multiplying with the component-wise - * cell size in SI - */ - template<> - struct OpenPMDUnit - { - std::vector operator()() const + /** the totalCellIdx can be converted into a positionOffset + * until the beginning of the cell by multiplying with the component-wise + * cell size in SI + */ + template<> + struct OpenPMDUnit { - std::vector unit(simDim); - /* cell positionOffset needs two transformations to get to SI: - cell begin -> dimensionless scaling to grid -> SI */ - for( uint32_t i=0; i < simDim; ++i ) - unit[i] = cellSize[i] * UNIT_LENGTH; + std::vector operator()() const + { + std::vector unit(simDim); + /* cell positionOffset needs two transformations to get to SI: + cell begin -> dimensionless scaling to grid -> SI */ + for(uint32_t i = 0; i < simDim; ++i) + unit[i] = cellSize[i] * UNIT_LENGTH; - return unit; - } - }; + return unit; + } + }; - /** Forward dimensionalities that are identical in PIConGPU & openPMD - */ - template - struct OpenPMDUnitDimension - { - std::vector operator()() const + /** Forward dimensionalities that are identical in PIConGPU & openPMD + */ + template + struct OpenPMDUnitDimension { - return UnitDimension::get(); - } - }; + std::vector operator()() const + { + return UnitDimension::get(); + } + }; - /** the openPMD positionOffset is an actual (vector) with a lengths that - * is added to the position (vector) attribute - */ - template<> - struct OpenPMDUnitDimension - { - std::vector operator()() const + /** the openPMD positionOffset is an actual (vector) with a lengths that + * is added to the position (vector) attribute + */ + template<> + struct OpenPMDUnitDimension { - /* L, M, T, I, theta, N, J - * - * positionOffset is in meter: m - * -> L - */ - std::vector unitDimension( NUnitDimension, 0.0 ); - unitDimension.at(SIBaseUnits::length) = 1.0; + std::vector operator()() const + { + /* L, M, T, I, theta, N, J + * + * positionOffset is in meter: m + * -> L + */ + std::vector unitDimension(NUnitDimension, 0.0); + unitDimension.at(SIBaseUnits::length) = 1.0; - return unitDimension; - } - }; + return unitDimension; + } + }; -} // namespace traits + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/traits/PICToSplash.hpp b/include/picongpu/traits/PICToSplash.hpp index 11238bdff3..3829be1850 100644 --- a/include/picongpu/traits/PICToSplash.hpp +++ b/include/picongpu/traits/PICToSplash.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl +/* Copyright 2013-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -23,19 +23,18 @@ namespace picongpu { + namespace traits + { + /** Convert a PIConGPU Type to a Splash CollectionType + * + * \tparam T_Type Typename in PIConGPU + * \return \p ::type as public typedef of a Splash CollectionType + */ + template + struct PICToSplash; -namespace traits -{ - /** Convert a PIConGPU Type to a Splash CollectionType - * - * \tparam T_Type Typename in PIConGPU - * \return \p ::type as public typedef of a Splash CollectionType - */ - template - struct PICToSplash; - -} //namespace traits + } // namespace traits -}// namespace picongpu +} // namespace picongpu #include "PICToSplash.tpp" diff --git a/include/picongpu/traits/PICToSplash.tpp b/include/picongpu/traits/PICToSplash.tpp index a0e14055c0..ce7dec98c8 100644 --- a/include/picongpu/traits/PICToSplash.tpp +++ b/include/picongpu/traits/PICToSplash.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl +/* Copyright 2013-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -19,99 +19,93 @@ #pragma once -#if (ENABLE_HDF5==1) -#include +#if(ENABLE_HDF5 == 1) +# include -#include "picongpu/simulation_defines.hpp" -#include -#include +# include "picongpu/simulation_defines.hpp" +# include +# include namespace picongpu { - -namespace traits -{ - - template<> - struct PICToSplash - { - typedef splash::ColTypeBool type; - }; - - template<> - struct PICToSplash - { - typedef splash::ColTypeFloat type; - }; - - template<> - struct PICToSplash - { - typedef splash::ColTypeDouble type; - }; - - template<> - struct PICToSplash - { - typedef splash::ColTypeInt16 type; - }; - - template<> - struct PICToSplash + namespace traits { - typedef splash::ColTypeUInt16 type; - }; - - template<> - struct PICToSplash - { - typedef splash::ColTypeInt32 type; - }; - - template<> - struct PICToSplash - { - typedef splash::ColTypeUInt32 type; - }; - - template<> - struct PICToSplash - { - typedef splash::ColTypeInt64 type; - }; - - template<> - struct PICToSplash - { - typedef splash::ColTypeUInt64 type; - }; - - /** Specialization for uint64_cu. - * If uint64_cu happens to be the same as uint64_t we use an unused dummy type - * to avoid duplicate specialization - */ - struct uint64_cu_unused_splash; - template<> - struct PICToSplash< - typename bmpl::if_< - typename bmpl::or_< - boost::is_same, - bmpl::bool_ - >::type, - uint64_cu_unused_splash, - uint64_cu - >::type - >: public PICToSplash - {}; - - template<> - struct PICToSplash - { - typedef splash::ColTypeDim type; - }; - -} //namespace traits - -}// namespace picongpu + template<> + struct PICToSplash + { + typedef splash::ColTypeBool type; + }; + + template<> + struct PICToSplash + { + typedef splash::ColTypeFloat type; + }; + + template<> + struct PICToSplash + { + typedef splash::ColTypeDouble type; + }; + + template<> + struct PICToSplash + { + typedef splash::ColTypeInt16 type; + }; + + template<> + struct PICToSplash + { + typedef splash::ColTypeUInt16 type; + }; + + template<> + struct PICToSplash + { + typedef splash::ColTypeInt32 type; + }; + + template<> + struct PICToSplash + { + typedef splash::ColTypeUInt32 type; + }; + + template<> + struct PICToSplash + { + typedef splash::ColTypeInt64 type; + }; + + template<> + struct PICToSplash + { + typedef splash::ColTypeUInt64 type; + }; + + /** Specialization for uint64_cu. + * If uint64_cu happens to be the same as uint64_t we use an unused dummy type + * to avoid duplicate specialization + */ + struct uint64_cu_unused_splash; + template<> + struct PICToSplash, bmpl::bool_>::type, + uint64_cu_unused_splash, + uint64_cu>::type> : public PICToSplash + { + }; + + template<> + struct PICToSplash + { + typedef splash::ColTypeDim type; + }; + + } // namespace traits + +} // namespace picongpu #endif // (ENABLE_HDF5==1) diff --git a/include/picongpu/traits/SIBaseUnits.hpp b/include/picongpu/traits/SIBaseUnits.hpp index 126b7d57f7..da21557850 100644 --- a/include/picongpu/traits/SIBaseUnits.hpp +++ b/include/picongpu/traits/SIBaseUnits.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -21,27 +21,27 @@ namespace picongpu { -namespace traits -{ - - /* openPMD uses the powers of the 7 SI base measures to describe - * the unit of a record - * \see http://git.io/vROmP */ - constexpr uint32_t NUnitDimension = 7; - - // pre-C++11 "scoped enumerator" work-around - namespace SIBaseUnits { - enum SIBaseUnits_t + namespace traits { - length = 0, // L - mass = 1, // M - time = 2, // T - electricCurrent = 3, // I - thermodynamicTemperature = 4, // theta - amountOfSubstance = 5, // N - luminousIntensity = 6, // J - }; - } + /* openPMD uses the powers of the 7 SI base measures to describe + * the unit of a record + * \see http://git.io/vROmP */ + constexpr uint32_t NUnitDimension = 7; + + // pre-C++11 "scoped enumerator" work-around + namespace SIBaseUnits + { + enum SIBaseUnits_t + { + length = 0, // L + mass = 1, // M + time = 2, // T + electricCurrent = 3, // I + thermodynamicTemperature = 4, // theta + amountOfSubstance = 5, // N + luminousIntensity = 6, // J + }; + } -} // namespace traits + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/traits/SplashToPIC.hpp b/include/picongpu/traits/SplashToPIC.hpp index f15ac07f07..f0bb259462 100644 --- a/include/picongpu/traits/SplashToPIC.hpp +++ b/include/picongpu/traits/SplashToPIC.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl +/* Copyright 2013-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -23,19 +23,18 @@ namespace picongpu { + namespace traits + { + /** Convert a Splash CollectionType to a PIConGPU Type + * + * \tparam T_SplashType Splash CollectionType + * \return \p ::type as public typedef + */ + template + struct SplashToPIC; -namespace traits -{ - /** Convert a Splash CollectionType to a PIConGPU Type - * - * \tparam T_SplashType Splash CollectionType - * \return \p ::type as public typedef - */ - template - struct SplashToPIC; - -} //namespace traits + } // namespace traits -}// namespace picongpu +} // namespace picongpu #include "SplashToPIC.tpp" diff --git a/include/picongpu/traits/SplashToPIC.tpp b/include/picongpu/traits/SplashToPIC.tpp index 05e9a3df8f..d9c1d3c520 100644 --- a/include/picongpu/traits/SplashToPIC.tpp +++ b/include/picongpu/traits/SplashToPIC.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl +/* Copyright 2013-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -19,73 +19,72 @@ #pragma once -#if (ENABLE_HDF5==1) -#include +#if(ENABLE_HDF5 == 1) +# include -#include "picongpu/simulation_defines.hpp" +# include "picongpu/simulation_defines.hpp" namespace picongpu { - -namespace traits -{ - template<> - struct SplashToPIC - { - typedef bool type; - }; - - template<> - struct SplashToPIC - { - typedef float_32 type; - }; - - template<> - struct SplashToPIC - { - typedef float_64 type; - }; - - /** Native int */ - template<> - struct SplashToPIC - { - typedef int type; - }; - - template<> - struct SplashToPIC + namespace traits { - typedef int32_t type; - }; - - template<> - struct SplashToPIC - { - typedef uint32_t type; - }; - - template<> - struct SplashToPIC - { - typedef int64_t type; - }; - - template<> - struct SplashToPIC - { - typedef uint64_t type; - }; - - template<> - struct SplashToPIC - { - typedef splash::Dimensions type; - }; - -} //namespace traits - -}// namespace picongpu + template<> + struct SplashToPIC + { + typedef bool type; + }; + + template<> + struct SplashToPIC + { + typedef float_32 type; + }; + + template<> + struct SplashToPIC + { + typedef float_64 type; + }; + + /** Native int */ + template<> + struct SplashToPIC + { + typedef int type; + }; + + template<> + struct SplashToPIC + { + typedef int32_t type; + }; + + template<> + struct SplashToPIC + { + typedef uint32_t type; + }; + + template<> + struct SplashToPIC + { + typedef int64_t type; + }; + + template<> + struct SplashToPIC + { + typedef uint64_t type; + }; + + template<> + struct SplashToPIC + { + typedef splash::Dimensions type; + }; + + } // namespace traits + +} // namespace picongpu #endif // (ENABLE_HDF5==1) diff --git a/include/picongpu/traits/Unit.hpp b/include/picongpu/traits/Unit.hpp index 39f5f7c316..bd96b354e6 100644 --- a/include/picongpu/traits/Unit.hpp +++ b/include/picongpu/traits/Unit.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -23,21 +23,20 @@ namespace picongpu { + namespace traits + { + /** Get unit of a date that is represented by an identifier + * + * \tparam T_Identifier any PIConGPU identifier + * \return \p std::vector ::get() as static public method + * + * Unitless identifies, see \UnitDimension, can still be scaled by a + * factor. If they are not scaled, implement the unit as 1.0; + * \see unitless/speciesAttributes.unitless + */ + template + struct Unit; -namespace traits -{ - /** Get unit of a date that is represented by an identifier - * - * \tparam T_Identifier any PIConGPU identifier - * \return \p std::vector ::get() as static public method - * - * Unitless identifies, see \UnitDimension, can still be scaled by a - * factor. If they are not scaled, implement the unit as 1.0; - * \see unitless/speciesAttributes.unitless - */ - template - struct Unit; - -} //namespace traits + } // namespace traits -}// namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/traits/UnitDimension.hpp b/include/picongpu/traits/UnitDimension.hpp index d48ed14128..b0bbc4d1c5 100644 --- a/include/picongpu/traits/UnitDimension.hpp +++ b/include/picongpu/traits/UnitDimension.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -23,26 +23,25 @@ namespace picongpu { + namespace traits + { + /** Get power of seven SI base units of date that is represented by an identifier + * + * Definition must follow the openPMD `unitDimension` definition: + * length L, mass M, time T, electric current I, thermodynamic temperature + * theta, amount of substance N, luminous intensity J + * \see http://www.openPMD.org + * \see http://dx.doi.org/10.5281/zenodo.33624 + * Must return a vector of size() == 7, for unitless attributes all + * elements are zero. + * + * \tparam T_Identifier any picongpu identifier + * \return \p std::vector ::get() as static public method + * + */ + template + struct UnitDimension; -namespace traits -{ - /** Get power of seven SI base units of date that is represented by an identifier - * - * Definition must follow the openPMD `unitDimension` definition: - * length L, mass M, time T, electric current I, thermodynamic temperature - * theta, amount of substance N, luminous intensity J - * \see http://www.openPMD.org - * \see http://dx.doi.org/10.5281/zenodo.33624 - * Must return a vector of size() == 7, for unitless attributes all - * elements are zero. - * - * \tparam T_Identifier any picongpu identifier - * \return \p std::vector ::get() as static public method - * - */ - template - struct UnitDimension; - -} /* namespace traits */ + } /* namespace traits */ } /* namespace picongpu */ diff --git a/include/picongpu/traits/UsesRNG.hpp b/include/picongpu/traits/UsesRNG.hpp deleted file mode 100644 index 78ef500102..0000000000 --- a/include/picongpu/traits/UsesRNG.hpp +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright 2016-2020 Marco Garten, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include - -namespace picongpu -{ -namespace traits -{ - -/** Checks if an object requires the RNG - * - * @tparam T_Object any object (class or typename) - * - * This struct must inherit from (boost::true_type/false_type) - */ -template -struct UsesRNG : public boost::false_type -{ -}; - -}// namespace traits - -}// namespace picongpu diff --git a/include/picongpu/traits/attribute/GetCharge.hpp b/include/picongpu/traits/attribute/GetCharge.hpp index 05a59c2cc3..3f0d341016 100644 --- a/include/picongpu/traits/attribute/GetCharge.hpp +++ b/include/picongpu/traits/attribute/GetCharge.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Axel Huebl +/* Copyright 2014-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -29,93 +29,82 @@ namespace picongpu { -namespace traits -{ -namespace attribute -{ -namespace detail -{ - -/** Calculate the real charge of a particle - * - * use attribute `boundElectrons` and the proton number from - * flag `atomicNumbers` to calculate the charge - * - * \tparam T_HasBoundElectrons boolean that describes if species allows multiple charge states - * due to bound electrons - */ -template -struct LoadBoundElectrons -{ - /** Functor implementation - * - * \tparam T_Particle particle type - * \param weighting the particle's weighting - * \param particle particle reference - */ - template - HDINLINE float_X operator()(const float_X weighting, const T_Particle& particle) + namespace traits { - using HasAtomicNumbers = typename pmacc::traits::HasFlag< - T_Particle, - atomicNumbers<> - >::type; - PMACC_CASSERT_MSG_TYPE( - Having_boundElectrons_particle_attribute_requires_atomicNumbers_flag, - T_Particle, - HasAtomicNumbers::value - ); - const float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; + namespace attribute + { + namespace detail + { + /** Calculate the real charge of a particle + * + * use attribute `boundElectrons` and the proton number from + * flag `atomicNumbers` to calculate the charge + * + * \tparam T_HasBoundElectrons boolean that describes if species allows multiple charge states + * due to bound electrons + */ + template + struct LoadBoundElectrons + { + /** Functor implementation + * + * \tparam T_Particle particle type + * \param weighting the particle's weighting + * \param particle particle reference + */ + template + HDINLINE float_X operator()(const float_X weighting, const T_Particle& particle) + { + using HasAtomicNumbers = typename pmacc::traits::HasFlag>::type; + PMACC_CASSERT_MSG_TYPE( + Having_boundElectrons_particle_attribute_requires_atomicNumbers_flag, + T_Particle, + HasAtomicNumbers::value); + const float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; - /* note: ELECTRON_CHARGE is negative and the second term is also negative - */ - return - ELECTRON_CHARGE * - ( particle[boundElectrons_] - protonNumber ) * - weighting; - } -}; + /* note: ELECTRON_CHARGE is negative and the second term is also negative + */ + return ELECTRON_CHARGE * (particle[boundElectrons_] - protonNumber) * weighting; + } + }; -/** Calculate the real charge of a particle - * - * This is the fallback implementation if no `boundElectrons` are available for a particle - */ -template<> -struct LoadBoundElectrons -{ - /** Functor implementation - * - * \tparam T_Particle particle type - * \param weighting the particle's weighting - * \param particle particle reference - */ - template - HDINLINE float_X operator()(const float_X weighting, const T_Particle&) - { - return frame::getCharge< typename T_Particle::FrameType >() * weighting; - } -}; -} // namespace detail + /** Calculate the real charge of a particle + * + * This is the fallback implementation if no `boundElectrons` are available for a particle + */ + template<> + struct LoadBoundElectrons + { + /** Functor implementation + * + * \tparam T_Particle particle type + * \param weighting the particle's weighting + * \param particle particle reference + */ + template + HDINLINE float_X operator()(const float_X weighting, const T_Particle&) + { + return frame::getCharge() * weighting; + } + }; + } // namespace detail -/** get the charge of a macro particle - * - * This function trait considers the `boundElectrons` attribute if it is set - * - * @param weighting weighting of the particle - * @param particle a reference to a particle - * @return charge of the macro particle - */ -template -HDINLINE float_X getCharge(const float_X weighting, const T_Particle& particle) -{ - using ParticleType = T_Particle; - typedef typename pmacc::traits::HasIdentifier::type hasBoundElectrons; - return detail::LoadBoundElectrons()( - weighting, - particle - ); -} + /** get the charge of a macro particle + * + * This function trait considers the `boundElectrons` attribute if it is set + * + * @param weighting weighting of the particle + * @param particle a reference to a particle + * @return charge of the macro particle + */ + template + HDINLINE float_X getCharge(const float_X weighting, const T_Particle& particle) + { + using ParticleType = T_Particle; + typedef typename pmacc::traits::HasIdentifier::type hasBoundElectrons; + return detail::LoadBoundElectrons()(weighting, particle); + } -}// namespace attribute -}// namespace traits -}// namespace picongpu + } // namespace attribute + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/traits/attribute/GetChargeState.hpp b/include/picongpu/traits/attribute/GetChargeState.hpp index bb2340deba..fc99c429cf 100644 --- a/include/picongpu/traits/attribute/GetChargeState.hpp +++ b/include/picongpu/traits/attribute/GetChargeState.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Marco Garten, Rene Widera +/* Copyright 2014-2021 Marco Garten, Rene Widera * * This file is part of PIConGPU. * @@ -29,83 +29,77 @@ namespace picongpu { -namespace traits -{ -namespace attribute -{ -namespace detail -{ - -/** Calculate the charge state of an atom / ion - * - * use attribute `boundElectrons` to calculate the charge state - */ -template -struct LoadChargeState -{ - /** Functor implementation - * - * \return chargeState = number of electrons in neutral atom - number of currently bound electrons - */ - template - HDINLINE float_X operator()(const T_Particle& particle) + namespace traits { - using HasAtomicNumbers = typename pmacc::traits::HasFlag< - T_Particle, - atomicNumbers<> - >::type; - PMACC_CASSERT_MSG_TYPE( - Having_boundElectrons_particle_attribute_requires_atomicNumbers_flag, - T_Particle, - HasAtomicNumbers::value - ); - const float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; - return protonNumber - particle[boundElectrons_]; - } -}; - -/** Calculate charge state of an atom / ion - * - * This is the fallback implementation to throw an error if no `boundElectrons` - * are available for a species. - */ -template<> -struct LoadChargeState -{ + namespace attribute + { + namespace detail + { + /** Calculate the charge state of an atom / ion + * + * use attribute `boundElectrons` to calculate the charge state + */ + template + struct LoadChargeState + { + /** Functor implementation + * + * \return chargeState = number of electrons in neutral atom - number of currently bound electrons + */ + template + HDINLINE float_X operator()(const T_Particle& particle) + { + using HasAtomicNumbers = typename pmacc::traits::HasFlag>::type; + PMACC_CASSERT_MSG_TYPE( + Having_boundElectrons_particle_attribute_requires_atomicNumbers_flag, + T_Particle, + HasAtomicNumbers::value); + const float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; + return protonNumber - particle[boundElectrons_]; + } + }; - template - HDINLINE void operator()(const T_Particle& particle) - { - /* The compiler is allowed to evaluate an expression that does not depend on a template parameter - * even if the class is never instantiated. In that case static assert is always - * evaluated (e.g. with clang), this results in an error if the condition is false. - * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html - * - * A workaround is to add a template dependency to the expression. - * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. - */ - PMACC_CASSERT_MSG(This_species_has_only_one_charge_state,1==2 && (sizeof(T_Particle) != 0)); - } -}; -} // namespace detail + /** Calculate charge state of an atom / ion + * + * This is the fallback implementation to throw an error if no `boundElectrons` + * are available for a species. + */ + template<> + struct LoadChargeState + { + template + HDINLINE void operator()(const T_Particle& particle) + { + /* The compiler is allowed to evaluate an expression that does not depend on a template + * parameter even if the class is never instantiated. In that case static assert is always + * evaluated (e.g. with clang), this results in an error if the condition is false. + * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html + * + * A workaround is to add a template dependency to the expression. + * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. + */ + PMACC_CASSERT_MSG(This_species_has_only_one_charge_state, 1 == 2 && (sizeof(T_Particle) != 0)); + } + }; + } // namespace detail -/** get the charge state of a macro particle - * - * This function trait considers the `boundElectrons` attribute if it is set. - * Charge states do not add up and also the various particles in a macro particle - * do NOT have different charge states where one would average over them. - * - * @param particle a reference to a particle - * @return charge of the macro particle - */ -template -HDINLINE float_X getChargeState(const T_Particle& particle) -{ - using ParticleType = T_Particle; - typedef typename pmacc::traits::HasIdentifier::type hasBoundElectrons; - return detail::LoadChargeState()(particle); -} + /** get the charge state of a macro particle + * + * This function trait considers the `boundElectrons` attribute if it is set. + * Charge states do not add up and also the various particles in a macro particle + * do NOT have different charge states where one would average over them. + * + * @param particle a reference to a particle + * @return charge of the macro particle + */ + template + HDINLINE float_X getChargeState(const T_Particle& particle) + { + using ParticleType = T_Particle; + typedef typename pmacc::traits::HasIdentifier::type hasBoundElectrons; + return detail::LoadChargeState()(particle); + } -}// namespace attribute -}// namespace traits -}// namespace picongpu + } // namespace attribute + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/traits/attribute/GetMass.hpp b/include/picongpu/traits/attribute/GetMass.hpp index 0261d04d1b..85875de12b 100644 --- a/include/picongpu/traits/attribute/GetMass.hpp +++ b/include/picongpu/traits/attribute/GetMass.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -25,24 +25,23 @@ namespace picongpu { -namespace traits -{ -namespace attribute -{ - -/** get the mass of a makro particle - * - * @param weighting weighting of the particle - * @param particle a reference to a particle - * @return mass of the makro particle - */ -template -HDINLINE float_X getMass(const float_X weighting, const T_Particle& particle) -{ - using ParticleType = T_Particle; - return frame::getMass () * weighting; -} + namespace traits + { + namespace attribute + { + /** get the mass of a makro particle + * + * @param weighting weighting of the particle + * @param particle a reference to a particle + * @return mass of the makro particle + */ + template + HDINLINE float_X getMass(const float_X weighting, const T_Particle& particle) + { + using ParticleType = T_Particle; + return frame::getMass() * weighting; + } -}// namespace attribute -}// namespace traits -}// namespace picongpu + } // namespace attribute + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/traits/frame/GetCharge.hpp b/include/picongpu/traits/frame/GetCharge.hpp index 2ef07c747e..402b402ac3 100644 --- a/include/picongpu/traits/frame/GetCharge.hpp +++ b/include/picongpu/traits/frame/GetCharge.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -24,16 +24,15 @@ namespace picongpu { -namespace traits -{ -namespace frame -{ - -/** get the charge value for a species frame - */ -template -HDINLINE float_X getCharge(); + namespace traits + { + namespace frame + { + /** get the charge value for a species frame + */ + template + HDINLINE float_X getCharge(); -}// namespace frame -}// namespace traits -}// namespace picongpu + } // namespace frame + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/traits/frame/GetMass.hpp b/include/picongpu/traits/frame/GetMass.hpp index 5b412f6b08..a8f6ae82ab 100644 --- a/include/picongpu/traits/frame/GetMass.hpp +++ b/include/picongpu/traits/frame/GetMass.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -23,14 +23,13 @@ namespace picongpu { -namespace traits -{ -namespace frame -{ - -template -HDINLINE float_X getMass(); + namespace traits + { + namespace frame + { + template + HDINLINE float_X getMass(); -}// namespace frame -}// namespace traits -}// namespace picongpu + } // namespace frame + } // namespace traits +} // namespace picongpu diff --git a/include/picongpu/unitless/bremsstrahlung.unitless b/include/picongpu/unitless/bremsstrahlung.unitless index b7dfdee2bd..a8308e018f 100644 --- a/include/picongpu/unitless/bremsstrahlung.unitless +++ b/include/picongpu/unitless/bremsstrahlung.unitless @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -21,32 +21,29 @@ namespace picongpu { -namespace particles -{ -namespace bremsstrahlung -{ - -namespace electron -{ + namespace particles + { + namespace bremsstrahlung + { + namespace electron + { + constexpr float_64 MIN_ENERGY_SI = MIN_ENERGY_MeV * 1.0e3 * UNITCONV_keV_to_Joule; + constexpr float_X MIN_ENERGY = MIN_ENERGY_SI / UNIT_ENERGY; -constexpr float_64 MIN_ENERGY_SI = MIN_ENERGY_MeV * 1.0e3 * UNITCONV_keV_to_Joule; -constexpr float_X MIN_ENERGY = MIN_ENERGY_SI / UNIT_ENERGY; + constexpr float_64 MAX_ENERGY_SI = MAX_ENERGY_MeV * 1.0e3 * UNITCONV_keV_to_Joule; + constexpr float_X MAX_ENERGY = MAX_ENERGY_SI / UNIT_ENERGY; -constexpr float_64 MAX_ENERGY_SI = MAX_ENERGY_MeV * 1.0e3 * UNITCONV_keV_to_Joule; -constexpr float_X MAX_ENERGY = MAX_ENERGY_SI / UNIT_ENERGY; + constexpr float_X NUM_STEPS_STOPPING_POWER_INTERGRAL = 1.0e3; -constexpr float_X NUM_STEPS_STOPPING_POWER_INTERGRAL = 1.0e3; - -} // namespace electron - -namespace photon -{ + } // namespace electron -constexpr float_64 SOFT_PHOTONS_CUTOFF_SI = SOFT_PHOTONS_CUTOFF_keV * UNITCONV_keV_to_Joule; -constexpr float_X SOFT_PHOTONS_CUTOFF = SOFT_PHOTONS_CUTOFF_SI / UNIT_ENERGY; + namespace photon + { + constexpr float_64 SOFT_PHOTONS_CUTOFF_SI = SOFT_PHOTONS_CUTOFF_keV * UNITCONV_keV_to_Joule; + constexpr float_X SOFT_PHOTONS_CUTOFF = SOFT_PHOTONS_CUTOFF_SI / UNIT_ENERGY; -} // namespace photon + } // namespace photon -} // namespace bremsstrahlung -} // namespace particles + } // namespace bremsstrahlung + } // namespace particles } // namespace picongpu diff --git a/include/picongpu/unitless/checkpoints.unitless b/include/picongpu/unitless/checkpoints.unitless index 2cb994db5e..44cc056e6a 100644 --- a/include/picongpu/unitless/checkpoints.unitless +++ b/include/picongpu/unitless/checkpoints.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, Benjamin Worpitz, * Sergei Bastrakov * * This file is part of PIConGPU. @@ -28,50 +28,40 @@ namespace picongpu { -namespace detail -{ - + namespace detail + { /** Additional fields for checkpointing * * @tparam T_FieldSolver field solver type */ - template< typename T_FieldSolver > + template struct AdditionalCheckpointFields { - using type = MakeSeq_t< >; + using type = MakeSeq_t<>; }; - //! Only the YeePML solver needs additional fields for checkpointing - template< typename ... T_Args > - struct AdditionalCheckpointFields< - fields::maxwellSolver::YeePML< T_Args ... > - > + /** Only the YeePML solver needs additional fields for checkpointing + * + * Currently LehePML is YeePML so automatically works for it as well. + */ + template + struct AdditionalCheckpointFields> { - using type = MakeSeq_t< - fields::maxwellSolver::yeePML::FieldE, - fields::maxwellSolver::yeePML::FieldB - >; + using type = MakeSeq_t; }; -} // namespace detail + } // namespace detail /** Note: we need at least FieldE and FieldB for restart * capabilities! */ - using NativeFileCheckpointFields = MakeSeq_t< - FieldE, - FieldB - >; + using NativeFileCheckpointFields = MakeSeq_t; - using AdditionalFileCheckpointFields = - typename picongpu::detail::AdditionalCheckpointFields< fields::Solver >::type; + using AdditionalFileCheckpointFields = typename picongpu::detail::AdditionalCheckpointFields::type; /* List of particle species for checkpoint/restart */ using FileCheckpointParticles = VectorAllSpecies; /** List of fields for checkpoint/restart */ - using FileCheckpointFields = MakeSeq_t< - NativeFileCheckpointFields, - AdditionalFileCheckpointFields - >; -} + using FileCheckpointFields = MakeSeq_t; +} // namespace picongpu diff --git a/include/picongpu/unitless/density.unitless b/include/picongpu/unitless/density.unitless index 88ae0e0ea8..9dcc7baf14 100644 --- a/include/picongpu/unitless/density.unitless +++ b/include/picongpu/unitless/density.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -24,8 +24,7 @@ namespace picongpu { - constexpr float_X BASE_DENSITY = - float_X( SI::BASE_DENSITY_SI * UNIT_LENGTH * UNIT_LENGTH * UNIT_LENGTH ); + constexpr float_X BASE_DENSITY = float_X(SI::BASE_DENSITY_SI * UNIT_LENGTH * UNIT_LENGTH * UNIT_LENGTH); } #include "picongpu/particles/densityProfiles/profiles.hpp" diff --git a/include/picongpu/unitless/fieldBackground.unitless b/include/picongpu/unitless/fieldBackground.unitless index ba701b0503..32d7471c07 100644 --- a/include/picongpu/unitless/fieldBackground.unitless +++ b/include/picongpu/unitless/fieldBackground.unitless @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl +/* Copyright 2014-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -22,3 +22,4 @@ /** Load pre-defined templates (implementation) */ #include "picongpu/fields/background/templates/TWTS/TWTS.tpp" +#include "picongpu/fields/background/templates/twtsfast/twtsfast.tpp" diff --git a/include/picongpu/unitless/fileOutput.unitless b/include/picongpu/unitless/fileOutput.unitless index 8a078c6052..42881b7d72 100644 --- a/include/picongpu/unitless/fileOutput.unitless +++ b/include/picongpu/unitless/fileOutput.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt * * This file is part of PIConGPU. * diff --git a/include/picongpu/unitless/grid.unitless b/include/picongpu/unitless/grid.unitless index 9b5ef60942..2a0d0328ee 100644 --- a/include/picongpu/unitless/grid.unitless +++ b/include/picongpu/unitless/grid.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Benjamin Worpitz * * This file is part of PIConGPU. @@ -19,7 +19,6 @@ */ - #pragma once #include @@ -27,28 +26,23 @@ namespace picongpu { // normed grid parameter - constexpr float_X DELTA_T = float_X( SI::DELTA_T_SI / UNIT_TIME ); - constexpr float_X CELL_WIDTH = float_X( SI::CELL_WIDTH_SI / UNIT_LENGTH ); - constexpr float_X CELL_HEIGHT = float_X( SI::CELL_HEIGHT_SI / UNIT_LENGTH ); - constexpr float_X CELL_DEPTH = float_X( SI::CELL_DEPTH_SI / UNIT_LENGTH ); - CONST_VECTOR( float_X, DIM3, cellSize, CELL_WIDTH, CELL_HEIGHT, CELL_DEPTH ); + constexpr float_X DELTA_T = float_X(SI::DELTA_T_SI / UNIT_TIME); + constexpr float_X CELL_WIDTH = float_X(SI::CELL_WIDTH_SI / UNIT_LENGTH); + constexpr float_X CELL_HEIGHT = float_X(SI::CELL_HEIGHT_SI / UNIT_LENGTH); + constexpr float_X CELL_DEPTH = float_X(SI::CELL_DEPTH_SI / UNIT_LENGTH); + CONST_VECTOR(float_X, DIM3, cellSize, CELL_WIDTH, CELL_HEIGHT, CELL_DEPTH); // always a 3D cell, even in 1D3V or 2D3V constexpr float_X CELL_VOLUME = CELL_WIDTH * CELL_HEIGHT * CELL_DEPTH; // only used for CFL checks -#if (SIMDIM==DIM3) - constexpr float_X INV_CELL2_SUM = - 1.0 / ( CELL_WIDTH * CELL_WIDTH ) + - 1.0 / ( CELL_HEIGHT * CELL_HEIGHT ) + - 1.0 / ( CELL_DEPTH * CELL_DEPTH ); -#elif(SIMDIM==DIM2) - constexpr float_X INV_CELL2_SUM = - 1.0 / ( CELL_WIDTH * CELL_WIDTH ) + - 1.0 / ( CELL_HEIGHT * CELL_HEIGHT ); +#if(SIMDIM == DIM3) + constexpr float_X INV_CELL2_SUM + = 1.0 / (CELL_WIDTH * CELL_WIDTH) + 1.0 / (CELL_HEIGHT * CELL_HEIGHT) + 1.0 / (CELL_DEPTH * CELL_DEPTH); +#elif(SIMDIM == DIM2) + constexpr float_X INV_CELL2_SUM = 1.0 / (CELL_WIDTH * CELL_WIDTH) + 1.0 / (CELL_HEIGHT * CELL_HEIGHT); #else - constexpr float_X INV_CELL2_SUM = - 1.0 / ( CELL_WIDTH * CELL_WIDTH ); + constexpr float_X INV_CELL2_SUM = 1.0 / (CELL_WIDTH * CELL_WIDTH); #endif -} +} // namespace picongpu diff --git a/include/picongpu/unitless/ionizer.unitless b/include/picongpu/unitless/ionizer.unitless index a0b5ef0030..be860e1442 100644 --- a/include/picongpu/unitless/ionizer.unitless +++ b/include/picongpu/unitless/ionizer.unitless @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Marco Garten +/* Copyright 2014-2021 Marco Garten * * This file is part of PIConGPU. * diff --git a/include/picongpu/unitless/particle.unitless b/include/picongpu/unitless/particle.unitless index 7fc199ef76..6ecb65bd16 100644 --- a/include/picongpu/unitless/particle.unitless +++ b/include/picongpu/unitless/particle.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once diff --git a/include/picongpu/unitless/physicalConstants.unitless b/include/picongpu/unitless/physicalConstants.unitless index d1865092d1..d10abe34d8 100644 --- a/include/picongpu/unitless/physicalConstants.unitless +++ b/include/picongpu/unitless/physicalConstants.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Marco Garten, Heiko Burau +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Marco Garten, Heiko Burau * * This file is part of PIConGPU. * @@ -23,18 +23,18 @@ namespace picongpu { //! reduced Planck constant - constexpr float_X HBAR = (float_X) (SI::HBAR_SI / UNIT_ENERGY / UNIT_TIME); + constexpr float_X HBAR = (float_X)(SI::HBAR_SI / UNIT_ENERGY / UNIT_TIME); //! Charge of electron - constexpr float_X ELECTRON_CHARGE = (float_X) (SI::ELECTRON_CHARGE_SI / UNIT_CHARGE); + constexpr float_X ELECTRON_CHARGE = (float_X)(SI::ELECTRON_CHARGE_SI / UNIT_CHARGE); //! Mass of electron - constexpr float_X ELECTRON_MASS = (float_X) (SI::ELECTRON_MASS_SI / UNIT_MASS); + constexpr float_X ELECTRON_MASS = (float_X)(SI::ELECTRON_MASS_SI / UNIT_MASS); //! magnetic constexprant must be double 3.92907e-39 - constexpr float_X MUE0 = (float_X) (SI::MUE0_SI / UNIT_LENGTH / UNIT_MASS * UNIT_CHARGE * UNIT_CHARGE); + constexpr float_X MUE0 = (float_X)(SI::MUE0_SI / UNIT_LENGTH / UNIT_MASS * UNIT_CHARGE * UNIT_CHARGE); //! electric constexprant must be double 2.54513e+38 - constexpr float_X EPS0 = (float_X) (1. / MUE0 / SPEED_OF_LIGHT / SPEED_OF_LIGHT); + constexpr float_X EPS0 = (float_X)(1. / MUE0 / SPEED_OF_LIGHT / SPEED_OF_LIGHT); // = 1/c^2 constexpr float_X MUE0_EPS0 = float_X(1. / SPEED_OF_LIGHT / SPEED_OF_LIGHT); @@ -48,4 +48,4 @@ namespace picongpu /* Atomic unit of time in PIC units */ constexpr float_X ATOMIC_UNIT_TIME = float_X(SI::ATOMIC_UNIT_TIME / UNIT_TIME); -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/unitless/pml.unitless b/include/picongpu/unitless/pml.unitless index 4094eeeb99..82a5c716e4 100644 --- a/include/picongpu/unitless/pml.unitless +++ b/include/picongpu/unitless/pml.unitless @@ -1,4 +1,4 @@ -/* Copyright 2019-2020 Sergei Bastrakov +/* Copyright 2019-2021 Sergei Bastrakov, Klaus Steiniger * * This file is part of PIConGPU. * @@ -24,69 +24,82 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ -namespace yeePML -{ - - // Assert parameters are in the valid ranges - PMACC_CASSERT_MSG( You_can_not_set_negative_grading_order_for_pml_kappa_and_sigma___change_pml_param, (SIGMA_KAPPA_GRADING_ORDER >= 0.0) ); - PMACC_CASSERT_MSG( You_can_not_set_negative_value_pml_sigma_max_x___change_pml_param, (SIGMA_MAX_SI[ 0 ] >= 0.0) ); - PMACC_CASSERT_MSG( You_can_not_set_negative_value_pml_sigma_max_y___change_pml_param, (SIGMA_MAX_SI[ 1 ] >= 0.0) ); - PMACC_CASSERT_MSG( You_can_not_set_negative_value_pml_sigma_max_z___change_pml_param, (SIGMA_MAX_SI[ 2 ] >= 0.0) ); - PMACC_CASSERT_MSG( You_can_not_set_pml_kappa_max_x_value_less_than_one___change_pml_param, (KAPPA_MAX[ 0 ] >= 1.0) ); - PMACC_CASSERT_MSG( You_can_not_set_pml_kappa_max_y_value_less_than_one___change_pml_param, (KAPPA_MAX[ 1 ] >= 1.0) ); - PMACC_CASSERT_MSG( You_can_not_set_pml_kappa_max_z_value_less_than_one___change_pml_param, (KAPPA_MAX[ 2 ] >= 1.0) ); - PMACC_CASSERT_MSG( You_can_not_set_negative_grading_order_for_pml_alpha___change_pml_param, (ALPHA_GRADING_ORDER >= 0.0) ); - PMACC_CASSERT_MSG( You_can_not_set_negative_pml_alpha_max_x___change_pml_param, (ALPHA_MAX_SI[ 0 ] >= 0.0) ); - PMACC_CASSERT_MSG( You_can_not_set_negative_pml_alpha_max_y___change_pml_param, (ALPHA_MAX_SI[ 1 ] >= 0.0) ); - PMACC_CASSERT_MSG( You_can_not_set_negative_pml_alpha_max_z___change_pml_param, (ALPHA_MAX_SI[ 2 ] >= 0.0) ); + namespace fields + { + namespace maxwellSolver + { + namespace Pml + { + // Assert parameters are in the valid ranges + PMACC_CASSERT_MSG( + You_can_not_set_negative_grading_order_for_pml_kappa_and_sigma___change_pml_param, + (SIGMA_KAPPA_GRADING_ORDER >= 0.0)); + PMACC_CASSERT_MSG( + You_can_not_set_negative_value_pml_sigma_max_x___change_pml_param, + (SIGMA_MAX_SI[0] >= 0.0)); + PMACC_CASSERT_MSG( + You_can_not_set_negative_value_pml_sigma_max_y___change_pml_param, + (SIGMA_MAX_SI[1] >= 0.0)); + PMACC_CASSERT_MSG( + You_can_not_set_negative_value_pml_sigma_max_z___change_pml_param, + (SIGMA_MAX_SI[2] >= 0.0)); + PMACC_CASSERT_MSG( + You_can_not_set_pml_kappa_max_x_value_less_than_one___change_pml_param, + (KAPPA_MAX[0] >= 1.0)); + PMACC_CASSERT_MSG( + You_can_not_set_pml_kappa_max_y_value_less_than_one___change_pml_param, + (KAPPA_MAX[1] >= 1.0)); + PMACC_CASSERT_MSG( + You_can_not_set_pml_kappa_max_z_value_less_than_one___change_pml_param, + (KAPPA_MAX[2] >= 1.0)); + PMACC_CASSERT_MSG( + You_can_not_set_negative_grading_order_for_pml_alpha___change_pml_param, + (ALPHA_GRADING_ORDER >= 0.0)); + PMACC_CASSERT_MSG( + You_can_not_set_negative_pml_alpha_max_x___change_pml_param, + (ALPHA_MAX_SI[0] >= 0.0)); + PMACC_CASSERT_MSG( + You_can_not_set_negative_pml_alpha_max_y___change_pml_param, + (ALPHA_MAX_SI[1] >= 0.0)); + PMACC_CASSERT_MSG( + You_can_not_set_negative_pml_alpha_max_z___change_pml_param, + (ALPHA_MAX_SI[2] >= 0.0)); - /* Normalize artificial conductivity by eps0, so that the result can be used - * for matching electric conductivity and magnetic permeability - * unit: 1 / s - */ - constexpr float_64 NORMALIZED_SIGMA_MAX_SI[ 3 ] = { - SIGMA_MAX_SI[ 0 ] / SI::EPS0_SI, - SIGMA_MAX_SI[ 1 ] / SI::EPS0_SI, - SIGMA_MAX_SI[ 2 ] / SI::EPS0_SI - }; + /* Normalize artificial conductivity by eps0, so that the result can be used + * for matching electric conductivity and magnetic permeability + * unit: 1 / s + */ + constexpr float_64 NORMALIZED_SIGMA_MAX_SI[3] + = {SIGMA_MAX_SI[0] / SI::EPS0_SI, SIGMA_MAX_SI[1] / SI::EPS0_SI, SIGMA_MAX_SI[2] / SI::EPS0_SI}; - /** Max value of normalized conductivity in PIC units - * - * unit: 1 / time - * (that is why we multiply by UNIT_TIME and not divide) - */ - constexpr float_64 NORMALIZED_SIGMA_MAX[ 3 ] = { - NORMALIZED_SIGMA_MAX_SI[ 0 ] * UNIT_TIME, - NORMALIZED_SIGMA_MAX_SI[ 1 ] * UNIT_TIME, - NORMALIZED_SIGMA_MAX_SI[ 2 ] * UNIT_TIME - }; + /** Max value of normalized conductivity in PIC units + * + * unit: 1 / time + * (that is why we multiply by UNIT_TIME and not divide) + */ + constexpr float_64 NORMALIZED_SIGMA_MAX[3] + = {NORMALIZED_SIGMA_MAX_SI[0] * UNIT_TIME, + NORMALIZED_SIGMA_MAX_SI[1] * UNIT_TIME, + NORMALIZED_SIGMA_MAX_SI[2] * UNIT_TIME}; - /* Normalize complex frequency shift by eps0, so that the result can be used - * for matching electric conductivity and magnetic permeability - * unit: 1 / s - */ - constexpr float_64 NORMALIZED_ALPHA_MAX_SI[ 3 ] = { - ALPHA_MAX_SI[ 0 ] / SI::EPS0_SI, - ALPHA_MAX_SI[ 1 ] / SI::EPS0_SI, - ALPHA_MAX_SI[ 2 ] / SI::EPS0_SI - }; + /* Normalize complex frequency shift by eps0, so that the result can be used + * for matching electric conductivity and magnetic permeability + * unit: 1 / s + */ + constexpr float_64 NORMALIZED_ALPHA_MAX_SI[3] + = {ALPHA_MAX_SI[0] / SI::EPS0_SI, ALPHA_MAX_SI[1] / SI::EPS0_SI, ALPHA_MAX_SI[2] / SI::EPS0_SI}; - /** Max value of normalized complex frequency shift in PIC units - * - * unit: 1 / time - * (that is why we multiply by UNIT_TIME and not divide) - */ - constexpr float_64 NORMALIZED_ALPHA_MAX[ 3 ] = { - NORMALIZED_ALPHA_MAX_SI[ 0 ] * UNIT_TIME, - NORMALIZED_ALPHA_MAX_SI[ 1 ] * UNIT_TIME, - NORMALIZED_ALPHA_MAX_SI[ 2 ] * UNIT_TIME - }; + /** Max value of normalized complex frequency shift in PIC units + * + * unit: 1 / time + * (that is why we multiply by UNIT_TIME and not divide) + */ + constexpr float_64 NORMALIZED_ALPHA_MAX[3] + = {NORMALIZED_ALPHA_MAX_SI[0] * UNIT_TIME, + NORMALIZED_ALPHA_MAX_SI[1] * UNIT_TIME, + NORMALIZED_ALPHA_MAX_SI[2] * UNIT_TIME}; -} // namespace yeePML -} // namespace maxwellSolver -} // namespace fields + } // namespace Pml + } // namespace maxwellSolver + } // namespace fields } // namespace picongpu diff --git a/include/picongpu/unitless/png.unitless b/include/picongpu/unitless/png.unitless index 19e548d6ef..99d479c33e 100644 --- a/include/picongpu/unitless/png.unitless +++ b/include/picongpu/unitless/png.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -25,56 +25,70 @@ namespace picongpu { -namespace traits -{ - template< typename > - struct is_laser_none : std::false_type {}; + namespace traits + { + template + struct is_laser_none : std::false_type + { + }; - template< typename T > - struct is_laser_none< fields::laserProfiles::None< T > > : std::true_type {}; + template + struct is_laser_none> : std::true_type + { + }; - template< typename > - struct is_laser_planewave : std::false_type {}; + template + struct is_laser_planewave : std::false_type + { + }; - template< typename T > - struct is_laser_planewave< fields::laserProfiles::PlaneWave< T > > : std::true_type {}; -} - // asserts for wrong user configurations - // - // setting 1: Laser -#if( EM_FIELD_SCALE_CHANNEL1 == 1 || EM_FIELD_SCALE_CHANNEL2 == 1 || EM_FIELD_SCALE_CHANNEL3 == 1 ) + template + struct is_laser_planewave> : std::true_type + { + }; + } // namespace traits + // asserts for wrong user configurations + // + // setting 1: Laser +#if(EM_FIELD_SCALE_CHANNEL1 == 1 || EM_FIELD_SCALE_CHANNEL2 == 1 || EM_FIELD_SCALE_CHANNEL3 == 1) PMACC_CASSERT_MSG( You_can_not_scale_your_preview_to_laser_without_using_a_laser___change_png_param, - !traits::is_laser_none< fields::laserProfiles::Selected >::value - ); + !traits::is_laser_none::value); #endif // setting 2: Drifting Plasma -#if( EM_FIELD_SCALE_CHANNEL1 == 2 || EM_FIELD_SCALE_CHANNEL2 == 2 || EM_FIELD_SCALE_CHANNEL3 == 2 ) - PMACC_CASSERT_MSG( You_can_not_scale_your_preview_to_drift_without_a_initially_drifting_plasma___change_png_param, ((PARTICLE_INIT_DRIFT_GAMMA)>1.0) ); +#if(EM_FIELD_SCALE_CHANNEL1 == 2 || EM_FIELD_SCALE_CHANNEL2 == 2 || EM_FIELD_SCALE_CHANNEL3 == 2) + PMACC_CASSERT_MSG( + You_can_not_scale_your_preview_to_drift_without_a_initially_drifting_plasma___change_png_param, + ((PARTICLE_INIT_DRIFT_GAMMA) > 1.0)); #endif // setting 3: Plasma Wave -#if( EM_FIELD_SCALE_CHANNEL1 == 3 || EM_FIELD_SCALE_CHANNEL2 == 3 || EM_FIELD_SCALE_CHANNEL3 == 3 ) - PMACC_CASSERT_MSG( You_can_not_scale_your_preview_to_a_zero_plasma_density___change_png_param, (BASE_DENSITY>0.0) ); +#if(EM_FIELD_SCALE_CHANNEL1 == 3 || EM_FIELD_SCALE_CHANNEL2 == 3 || EM_FIELD_SCALE_CHANNEL3 == 3) + PMACC_CASSERT_MSG( + You_can_not_scale_your_preview_to_a_zero_plasma_density___change_png_param, + (BASE_DENSITY > 0.0)); #endif // setting 4: Thermal Warm Plasma -#if( EM_FIELD_SCALE_CHANNEL1 == 4 || EM_FIELD_SCALE_CHANNEL2 == 4 || EM_FIELD_SCALE_CHANNEL3 == 4 ) - PMACC_CASSERT_MSG( You_can_not_scale_your_preview_to_a_zero_plasma_density___change_png_param, (BASE_DENSITY>0.0) ); - PMACC_CASSERT_MSG( You_can_not_scale_your_preview_to_a_zero_electron_temperature___change_png_param, ((ELECTRON_TEMPERATURE)>0.0) ); +#if(EM_FIELD_SCALE_CHANNEL1 == 4 || EM_FIELD_SCALE_CHANNEL2 == 4 || EM_FIELD_SCALE_CHANNEL3 == 4) + PMACC_CASSERT_MSG( + You_can_not_scale_your_preview_to_a_zero_plasma_density___change_png_param, + (BASE_DENSITY > 0.0)); + PMACC_CASSERT_MSG( + You_can_not_scale_your_preview_to_a_zero_electron_temperature___change_png_param, + ((ELECTRON_TEMPERATURE) > 0.0)); #endif // setting 5: Blow Out -#if( EM_FIELD_SCALE_CHANNEL1 == 5 || EM_FIELD_SCALE_CHANNEL2 == 5 || EM_FIELD_SCALE_CHANNEL3 == 5 ) - //PMACC_CASSERT_MSG( You_can_not_scale_your_preview_to_a_zero_plasma_density___change_png_param, (BASE_DENSITY>0.0) ); +#if(EM_FIELD_SCALE_CHANNEL1 == 5 || EM_FIELD_SCALE_CHANNEL2 == 5 || EM_FIELD_SCALE_CHANNEL3 == 5) + // PMACC_CASSERT_MSG( You_can_not_scale_your_preview_to_a_zero_plasma_density___change_png_param, + // (BASE_DENSITY>0.0) ); PMACC_CASSERT_MSG( You_can_not_scale_your_preview_to_blowout_without_a_laser___change_png_param, - !traits::is_laser_none< fields::laserProfiles::Selected >::value - ); + !traits::is_laser_none::value); PMACC_CASSERT_MSG( You_can_not_scale_your_preview_to_blowout_with_a_laser_without_beam_waist___change_png_param, - !traits::is_laser_planewave< fields::laserProfiles::Selected >::value - ); + !traits::is_laser_planewave::value); #endif -} +} // namespace picongpu diff --git a/include/picongpu/unitless/precision.unitless b/include/picongpu/unitless/precision.unitless index e45aca41e1..bfcf0a4131 100644 --- a/include/picongpu/unitless/precision.unitless +++ b/include/picongpu/unitless/precision.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -24,37 +24,36 @@ namespace picongpu { - using float_X = precisionPIConGPU::precisionType; namespace precision32Bit { using float_X = precisionType; /* 32 Bit defines */ - using float1_X = ::pmacc::math::Vector< float_X, 1 >; - using float2_X = ::pmacc::math::Vector< float_X, 2 >; - using float3_X = ::pmacc::math::Vector< float_X, 3 >; - using floatD_X = ::pmacc::math::Vector< float_X, simDim >; - } + using float1_X = ::pmacc::math::Vector; + using float2_X = ::pmacc::math::Vector; + using float3_X = ::pmacc::math::Vector; + using floatD_X = ::pmacc::math::Vector; + } // namespace precision32Bit namespace precision64Bit { using float_X = precisionType; /* 64 Bit defines */ - using float1_X = ::pmacc::math::Vector< float_X, 1 >; - using float2_X = ::pmacc::math::Vector< float_X, 2 >; - using float3_X = ::pmacc::math::Vector< float_X, 3 >; - using floatD_X = ::pmacc::math::Vector< float_X, simDim >; - } + using float1_X = ::pmacc::math::Vector; + using float2_X = ::pmacc::math::Vector; + using float3_X = ::pmacc::math::Vector; + using floatD_X = ::pmacc::math::Vector; + } // namespace precision64Bit using float_32 = precision32Bit::float_X; using float_64 = precision64Bit::float_X; /* variable precision defines */ - using float1_X = ::pmacc::math::Vector< float_X, 1 >; - using float2_X = ::pmacc::math::Vector< float_X, 2 >; - using float3_X = ::pmacc::math::Vector< float_X, 3 >; - using floatD_X = ::pmacc::math::Vector< float_X, simDim >; + using float1_X = ::pmacc::math::Vector; + using float2_X = ::pmacc::math::Vector; + using float3_X = ::pmacc::math::Vector; + using floatD_X = ::pmacc::math::Vector; /* 32 Bit defines */ using float1_32 = precision32Bit::float1_X; using float2_32 = precision32Bit::float2_X; @@ -67,10 +66,9 @@ namespace picongpu using floatD_64 = precision64Bit::floatD_X; // literals for short-hand notations - constexpr float_X - operator""_X( long double x ) + constexpr float_X operator""_X(long double x) { - return float_X( x ); + return float_X(x); } // special functions diff --git a/include/picongpu/unitless/pusher.unitless b/include/picongpu/unitless/pusher.unitless index c43aa7d2e4..707ac191cf 100644 --- a/include/picongpu/unitless/pusher.unitless +++ b/include/picongpu/unitless/pusher.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch, Annegret Roeszler * * This file is part of PIConGPU. * @@ -24,13 +24,15 @@ #include "picongpu/particles/pusher/particlePusherAcceleration.hpp" #include "picongpu/particles/pusher/particlePusherBoris.hpp" +#include "picongpu/particles/pusher/particlePusherComposite.hpp" #include "picongpu/particles/pusher/particlePusherVay.hpp" +#include "picongpu/particles/pusher/particlePusherHigueraCary.hpp" #include "picongpu/particles/pusher/particlePusherFree.hpp" #include "picongpu/particles/pusher/particlePusherPhoton.hpp" #include "picongpu/particles/pusher/particlePusherProbe.hpp" #include "picongpu/particles/pusher/particlePusherReducedLandauLifshitz.hpp" -#if(SIMDIM==DIM3) -#include "picongpu/particles/pusher/particlePusherAxel.hpp" +#if(SIMDIM == DIM3) +# include "picongpu/particles/pusher/particlePusherAxel.hpp" #endif #include @@ -39,58 +41,61 @@ namespace picongpu { - -namespace particles -{ -namespace pusher -{ - -struct Acceleration : -public particlePusherAcceleration::Push > -{ -}; - -#if(SIMDIM==DIM3) - -struct Axel : -public particlePusherAxel::Push > -{ -}; + namespace particles + { + namespace pusher + { + struct Acceleration : public particlePusherAcceleration::Push> + { + }; + +#if(SIMDIM == DIM3) + + struct Axel : public particlePusherAxel::Push> + { + }; #endif -struct Boris : -public particlePusherBoris::Push > -{ -}; - -struct Vay : -public particlePusherVay::Push > -{ -}; - -struct Free : -public particlePusherFree::Push > -{ -}; - -struct Photon : -public particlePusherPhoton::Push > -{ -}; - -struct ReducedLandauLifshitz : -public particlePusherReducedLandauLifshitz::Push > -{ -}; - -struct Probe : -public particlePusherProbe::Push< - pmacc::nvidia::functors::Assign, - particlePusherProbe::ActualPusher -> -{ -}; - -} //namespace pusher -} //namespace particles -} //namespace picongpu + struct Boris : public particlePusherBoris::Push> + { + }; + + struct Vay : public particlePusherVay::Push> + { + }; + + struct HigueraCary : public particlePusherHigueraCary::Push> + { + }; + + struct Free : public particlePusherFree::Push> + { + }; + + struct Photon : public particlePusherPhoton::Push> + { + }; + + struct ReducedLandauLifshitz : public particlePusherReducedLandauLifshitz::Push> + { + }; + + struct Probe + : public particlePusherProbe::Push + { + }; + + template + struct Composite : public particlePusherComposite::Push + { + }; + + template + struct CompositeBinarySwitchActivationFunctor + : public particlePusherComposite::BinarySwitchActivationFunctor + { + }; + + } // namespace pusher + } // namespace particles +} // namespace picongpu diff --git a/include/picongpu/unitless/radiation.unitless b/include/picongpu/unitless/radiation.unitless index 58e3dd0b81..0ffbc55585 100644 --- a/include/picongpu/unitless/radiation.unitless +++ b/include/picongpu/unitless/radiation.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch +/* Copyright 2013-2021 Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -22,46 +22,54 @@ #include -PMACC_CASSERT_MSG( The_Nyquist_limit_needs_to_be_below_one, ( picongpu::plugins::radiation::radiationNyquist::NyquistFactor < 1.0 ) ); -PMACC_CASSERT_MSG( The_Nyquist_limit_needs_to_be_larger_than_zero, ( picongpu::plugins::radiation::radiationNyquist::NyquistFactor > 0.0 ) ); +PMACC_CASSERT_MSG( + The_Nyquist_limit_needs_to_be_below_one, + (picongpu::plugins::radiation::radiationNyquist::NyquistFactor < 1.0)); +PMACC_CASSERT_MSG( + The_Nyquist_limit_needs_to_be_larger_than_zero, + (picongpu::plugins::radiation::radiationNyquist::NyquistFactor > 0.0)); namespace picongpu { -namespace plugins -{ -namespace radiation -{ -namespace linear_frequencies -{ - constexpr float_X omega_min = SI::omega_min*UNIT_TIME; - constexpr float_X omega_max = SI::omega_max*UNIT_TIME; - constexpr float_X delta_omega = (float_X) ((omega_max - omega_min) / (float_X) (N_omega - 1)); // difference beween two omega + namespace plugins + { + namespace radiation + { + namespace linear_frequencies + { + constexpr float_X omega_min = SI::omega_min * UNIT_TIME; + constexpr float_X omega_max = SI::omega_max * UNIT_TIME; + constexpr float_X delta_omega + = (float_X)((omega_max - omega_min) / (float_X)(N_omega - 1)); // difference beween two omega - constexpr unsigned int blocksize_omega = pmacc::math::CT::volume::type::value; - constexpr unsigned int gridsize_omega = N_omega / blocksize_omega; // size of grid (dim: x); radiation -} // namespace linear_frequencies + constexpr unsigned int blocksize_omega + = pmacc::math::CT::volume::type::value; + constexpr unsigned int gridsize_omega = N_omega / blocksize_omega; // size of grid (dim: x); radiation + } // namespace linear_frequencies -namespace log_frequencies -{ - constexpr float_X omega_min = (SI::omega_min*UNIT_TIME); - constexpr float_X omega_max = (SI::omega_max*UNIT_TIME); + namespace log_frequencies + { + constexpr float_X omega_min = (SI::omega_min * UNIT_TIME); + constexpr float_X omega_max = (SI::omega_max * UNIT_TIME); - constexpr unsigned int blocksize_omega = pmacc::math::CT::volume::type::value; - constexpr unsigned int gridsize_omega = N_omega / blocksize_omega; // size of grid (dim: x); radiation -} // namespace log_frequencies + constexpr unsigned int blocksize_omega + = pmacc::math::CT::volume::type::value; + constexpr unsigned int gridsize_omega = N_omega / blocksize_omega; // size of grid (dim: x); radiation + } // namespace log_frequencies -namespace frequencies_from_list -{ - constexpr unsigned int blocksize_omega = pmacc::math::CT::volume::type::value; - constexpr unsigned int gridsize_omega = N_omega / blocksize_omega; // size of grid (dim: x); radiation -} // namespace frequencies_from_list + namespace frequencies_from_list + { + constexpr unsigned int blocksize_omega + = pmacc::math::CT::volume::type::value; + constexpr unsigned int gridsize_omega = N_omega / blocksize_omega; // size of grid (dim: x); radiation + } // namespace frequencies_from_list -namespace parameters -{ - constexpr unsigned int gridsize_theta = N_observer; // size of grid /dim: y); radiation -} // namespace parameters + namespace parameters + { + constexpr unsigned int gridsize_theta = N_observer; // size of grid /dim: y); radiation + } // namespace parameters -} // namespace radiation -} // namespace plugins + } // namespace radiation + } // namespace plugins } // namespace picongpu #include "picongpu/plugins/radiation/frequencies/radiation_lin_freq.hpp" diff --git a/include/picongpu/unitless/speciesAttributes.unitless b/include/picongpu/unitless/speciesAttributes.unitless index 613103dcca..1877b81b7d 100644 --- a/include/picongpu/unitless/speciesAttributes.unitless +++ b/include/picongpu/unitless/speciesAttributes.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Axel Huebl, +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Axel Huebl, * Alexander Grund, Finn-Ole Carstens * * This file is part of PIConGPU. @@ -53,583 +53,582 @@ */ namespace picongpu { -namespace traits -{ - -template -struct Unit > -{ - static std::vector get() - { - std::vector unit(simDim); - /* in-cell position needs two transformations to get to SI: - in-cell [0;1) -> dimensionless scaling to grid -> SI - */ - for(uint32_t i=0;i -struct UnitDimension > -{ - static std::vector get() - { - /* L, M, T, I, theta, N, J - * - * position is in meter: m - * -> L - */ - std::vector unitDimension( NUnitDimension, 0.0 ); - unitDimension.at(SIBaseUnits::length) = 1.0; - - return unitDimension; - } -}; -template -struct MacroWeighted > -{ - // the position is identical and can not be scaled by weightings - static bool get() - { - return false; - } -}; -template -struct WeightingPower > -{ - // x * weighting^0 == x: same for real and macro particle - static float_64 get() - { - return 0.0; - } -}; - -template<> -struct Unit -{ - // unitless and not scaled by a factor: by convention 1.0 - static std::vector get() - { - std::vector unit( 1, 1.0 ); - return unit; - } -}; -template<> -struct UnitDimension -{ - static std::vector get() - { - // radiationMask is unitless - std::vector unitDimension( NUnitDimension, 0.0 ); - - return unitDimension; - } -}; -template<> -struct MacroWeighted -{ - // identical and can not be scaled by weightings - static bool get() - { - return false; - } -}; -template<> -struct WeightingPower -{ - // flag * weighting^0 == flag: same for real and macro particle - static float_64 get() - { - return 0.0; - } -}; - -template<> -struct Unit -{ - // unitless and not scaled by a factor: by convention 1.0 - static std::vector get() - { - std::vector unit( 1, 1.0 ); - return unit; - } -}; -template<> -struct UnitDimension -{ - static std::vector get() - { - // transitionRadiationMask is unitless - std::vector unitDimension( NUnitDimension, 0.0 ); - - return unitDimension; - } -}; -template<> -struct MacroWeighted -{ - // identical and can not be scaled by weightings - static bool get() - { - return false; - } -}; -template<> -struct WeightingPower -{ - // flag * weighting^0 == flag: same for real and macro particle - static float_64 get() - { - return 0.0; - } -}; - -template<> -struct Unit -{ - static std::vector get() - { - const uint32_t components = GetNComponents::value; - - std::vector unit(components); - for(uint32_t i=0;i -struct UnitDimension -{ - static std::vector get() - { - /* L, M, T, I, theta, N, J - * - * momentum is in mass times speed: kg * m / s - * -> L * M * T^-1 - */ - std::vector unitDimension( NUnitDimension, 0.0 ); - unitDimension.at(SIBaseUnits::length) = 1.0; - unitDimension.at(SIBaseUnits::mass) = 1.0; - unitDimension.at(SIBaseUnits::time) = -1.0; - - return unitDimension; - } -}; -template<> -struct MacroWeighted -{ - // we currently push macro particle momentums - static bool get() - { - return true; - } -}; -template<> -struct WeightingPower -{ - /* px * weighting^1 == px * weighting: momentum is contributed linearly - * in the macro-particle ensemble - */ - static float_64 get() - { - return 1.0; - } -}; - -template<> -struct Unit -{ - static std::vector get() - { - const uint32_t components = GetNComponents::value; - - std::vector unit(components); - for(uint32_t i=0;i -struct UnitDimension -{ - static std::vector get() - { - /* L, M, T, I, theta, N, J - * - * momentum is in mass times speed: kg * m / s - * -> L * M * T^-1 - */ - std::vector unitDimension( NUnitDimension, 0.0 ); - unitDimension.at(SIBaseUnits::length) = 1.0; - unitDimension.at(SIBaseUnits::mass) = 1.0; - unitDimension.at(SIBaseUnits::time) = -1.0; - - return unitDimension; - } -}; -template<> -struct MacroWeighted -{ - // we currently push macro particle momentums - static bool get() - { - return true; - } -}; -template<> -struct WeightingPower -{ - /* px_real * weighting^1 == px_macro * weighting: momentum is contributed - * linearly in the macro-particle ensemble - */ - static float_64 get() - { - return 1.0; - } -}; - -template<> -struct Unit -{ - // unitless and not scaled by a factor: 1.0 - static std::vector get() - { - std::vector unit( 1, 1.0 ); - return unit; - } -}; -template<> -struct UnitDimension -{ - static std::vector get() - { - // weighting is unitless - std::vector unitDimension( NUnitDimension, 0.0 ); - - return unitDimension; - } -}; -template<> -struct MacroWeighted -{ - // the weighting attribute is an attribute of the macro particle - static bool get() - { - return true; - } -}; -template<> -struct WeightingPower -{ - /* 1 * weighting^1 == weighting: real particles contibute linearily - * to the macro particle weighting - */ - static float_64 get() - { - return 1.0; - } -}; - - -template<> -struct Unit -{ - // unitless and not scaled by a factor: by convention 1.0 - static std::vector get() - { - std::vector unit( 1, 1.0 ); - return unit; - } -}; -template<> -struct UnitDimension -{ - static std::vector get() - { - // voronoiCellId is unitless - std::vector unitDimension( NUnitDimension, 0.0 ); - - return unitDimension; - } -}; -template<> -struct MacroWeighted -{ - // the voronoiCellId attribute is not a physical parameter - static bool get() - { - return false; - } -}; -template<> -struct WeightingPower -{ - // the voronoiCellId attribute is not a physical parameter - static float_64 get() - { - return 0.0; - } -}; - -template<> -struct Unit -{ - static std::vector get() - { - uint32_t const components = 3u; - - std::vector< double > const unit( components, UNIT_EFIELD); - - return unit; - } -}; -template<> -struct UnitDimension -{ - static std::vector get() - { - /* L, M, T, I, theta, N, J - * - * E is in volts per meters: V / m = kg * m / (A * s^3) - * -> L * M * T^-3 * I^-1 - */ - std::vector unitDimension( 7, 0.0 ); - unitDimension.at(SIBaseUnits::length) = 1.0; - unitDimension.at(SIBaseUnits::mass) = 1.0; - unitDimension.at(SIBaseUnits::time) = -3.0; - unitDimension.at(SIBaseUnits::electricCurrent) = -1.0; - - return unitDimension; - } -}; -template<> -struct MacroWeighted -{ - static bool get() - { - return false; - } -}; -template<> -struct WeightingPower -{ - // local electric fields do not scale with weighting - static float_64 get() - { - return 0.0; - } -}; - -template<> -struct Unit -{ - static std::vector get() - { - uint32_t const components = 3u; - - std::vector< double > const unit( components, UNIT_BFIELD); - - return unit; - } -}; -template<> -struct UnitDimension -{ - static std::vector get() - { - /* L, M, T, I, theta, N, J - * - * B is in Tesla : kg / (A * s^2) - * -> M * T^-2 * I^-1 - */ - std::vector unitDimension( 7, 0.0 ); - unitDimension.at(SIBaseUnits::mass) = 1.0; - unitDimension.at(SIBaseUnits::time) = -2.0; - unitDimension.at(SIBaseUnits::electricCurrent) = -1.0; - - return unitDimension; - } -}; -template<> -struct MacroWeighted -{ - static bool get() - { - return false; - } -}; -template<> -struct WeightingPower -{ - // local magnetic fields do not scale with weighting - static float_64 get() - { - return 0.0; - } -}; - -template<> -struct Unit -{ - // unitless and not scaled by a factor: by convention 1.0 - static std::vector get() - { - std::vector unit( 1, 1.0 ); - return unit; - } -}; -template<> -struct UnitDimension -{ - static std::vector get() - { - // unitless - return std::vector( NUnitDimension, 0.0 ); - } -}; -template<> -struct MacroWeighted -{ - // we can only follow maro particles via ids - static bool get() - { - return false; - } -}; -template<> -struct WeightingPower -{ - // particle ids do not scale with weighting - static float_64 get() - { - return 0.0; - } -}; - -template<> -struct Unit -{ - /* unitless index and not scaled by a factor: by convention 1.0 */ - static std::vector get() - { - std::vector unit( simDim, 1.0 ); - return unit; - } -}; -template<> -struct UnitDimension -{ - static std::vector get() - { - /* totalCellIdx is a cell index and therefore unitless - */ - std::vector unitDimension( NUnitDimension, 0.0 ); - - return unitDimension; - } -}; -template<> -struct MacroWeighted -{ - // the cell idx is identical and can not be scaled by weightings - static bool get() - { - return false; - } -}; -template<> -struct WeightingPower -{ - // idx * weighting^0 == idx: same for real and macro particle - static float_64 get() - { - return 0.0; - } -}; - -template<> -struct Unit -{ - // unitless and not scaled by a factor: 1.0 - static std::vector get() - { - std::vector unit( 1, 1.0 ); - return unit; - } -}; -template<> -struct UnitDimension -{ - static std::vector get() - { - // boundElectrons is unitless - std::vector unitDimension( NUnitDimension, 0.0 ); - - return unitDimension; - } -}; -template<> -struct MacroWeighted -{ - // bound electrons are counted for a single real ion - static bool get() - { - return false; - } -}; -template<> -struct WeightingPower -{ - /* #e-_real * weighting^1 == #e-_macro: bound electrons are contributed - * linearly from the underlying real particles - */ - static float_64 get() - { - return 1.0; - } -}; - -template<> -struct Unit -{ - // unitless and not scaled by a factor: 1.0 - static std::vector get() - { - return std::vector( picongpu::flylite::populations, 1.0 ); - } -}; -template<> -struct UnitDimension -{ - static std::vector get() - { - // superconfig is unitless - std::vector unitDimension( NUnitDimension, 0.0 ); - - return unitDimension; - } -}; -template<> -struct MacroWeighted -{ - // represented by (1) or (weighted) ions??? - static bool get() - { - return false; - } -}; -template<> -struct WeightingPower -{ - static float_64 get() - { - return 1.0; - } -}; - -} // namespace traits + namespace traits + { + template + struct Unit> + { + static std::vector get() + { + std::vector unit(simDim); + /* in-cell position needs two transformations to get to SI: + in-cell [0;1) -> dimensionless scaling to grid -> SI + */ + for(uint32_t i = 0; i < simDim; ++i) + unit[i] = cellSize[i] * UNIT_LENGTH; + + return unit; + } + }; + template + struct UnitDimension> + { + static std::vector get() + { + /* L, M, T, I, theta, N, J + * + * position is in meter: m + * -> L + */ + std::vector unitDimension(NUnitDimension, 0.0); + unitDimension.at(SIBaseUnits::length) = 1.0; + + return unitDimension; + } + }; + template + struct MacroWeighted> + { + // the position is identical and can not be scaled by weightings + static bool get() + { + return false; + } + }; + template + struct WeightingPower> + { + // x * weighting^0 == x: same for real and macro particle + static float_64 get() + { + return 0.0; + } + }; + + template<> + struct Unit + { + // unitless and not scaled by a factor: by convention 1.0 + static std::vector get() + { + std::vector unit(1, 1.0); + return unit; + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + // radiationMask is unitless + std::vector unitDimension(NUnitDimension, 0.0); + + return unitDimension; + } + }; + template<> + struct MacroWeighted + { + // identical and can not be scaled by weightings + static bool get() + { + return false; + } + }; + template<> + struct WeightingPower + { + // flag * weighting^0 == flag: same for real and macro particle + static float_64 get() + { + return 0.0; + } + }; + + template<> + struct Unit + { + // unitless and not scaled by a factor: by convention 1.0 + static std::vector get() + { + std::vector unit(1, 1.0); + return unit; + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + // transitionRadiationMask is unitless + std::vector unitDimension(NUnitDimension, 0.0); + + return unitDimension; + } + }; + template<> + struct MacroWeighted + { + // identical and can not be scaled by weightings + static bool get() + { + return false; + } + }; + template<> + struct WeightingPower + { + // flag * weighting^0 == flag: same for real and macro particle + static float_64 get() + { + return 0.0; + } + }; + + template<> + struct Unit + { + static std::vector get() + { + const uint32_t components = GetNComponents::value; + + std::vector unit(components); + for(uint32_t i = 0; i < components; ++i) + unit[i] = UNIT_MASS * UNIT_SPEED; + + return unit; + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + /* L, M, T, I, theta, N, J + * + * momentum is in mass times speed: kg * m / s + * -> L * M * T^-1 + */ + std::vector unitDimension(NUnitDimension, 0.0); + unitDimension.at(SIBaseUnits::length) = 1.0; + unitDimension.at(SIBaseUnits::mass) = 1.0; + unitDimension.at(SIBaseUnits::time) = -1.0; + + return unitDimension; + } + }; + template<> + struct MacroWeighted + { + // we currently push macro particle momentums + static bool get() + { + return true; + } + }; + template<> + struct WeightingPower + { + /* px * weighting^1 == px * weighting: momentum is contributed linearly + * in the macro-particle ensemble + */ + static float_64 get() + { + return 1.0; + } + }; + + template<> + struct Unit + { + static std::vector get() + { + const uint32_t components = GetNComponents::value; + + std::vector unit(components); + for(uint32_t i = 0; i < components; ++i) + unit[i] = UNIT_MASS * UNIT_SPEED; + + return unit; + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + /* L, M, T, I, theta, N, J + * + * momentum is in mass times speed: kg * m / s + * -> L * M * T^-1 + */ + std::vector unitDimension(NUnitDimension, 0.0); + unitDimension.at(SIBaseUnits::length) = 1.0; + unitDimension.at(SIBaseUnits::mass) = 1.0; + unitDimension.at(SIBaseUnits::time) = -1.0; + + return unitDimension; + } + }; + template<> + struct MacroWeighted + { + // we currently push macro particle momentums + static bool get() + { + return true; + } + }; + template<> + struct WeightingPower + { + /* px_real * weighting^1 == px_macro * weighting: momentum is contributed + * linearly in the macro-particle ensemble + */ + static float_64 get() + { + return 1.0; + } + }; + + template<> + struct Unit + { + // unitless and not scaled by a factor: 1.0 + static std::vector get() + { + std::vector unit(1, 1.0); + return unit; + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + // weighting is unitless + std::vector unitDimension(NUnitDimension, 0.0); + + return unitDimension; + } + }; + template<> + struct MacroWeighted + { + // the weighting attribute is an attribute of the macro particle + static bool get() + { + return true; + } + }; + template<> + struct WeightingPower + { + /* 1 * weighting^1 == weighting: real particles contibute linearily + * to the macro particle weighting + */ + static float_64 get() + { + return 1.0; + } + }; + + + template<> + struct Unit + { + // unitless and not scaled by a factor: by convention 1.0 + static std::vector get() + { + std::vector unit(1, 1.0); + return unit; + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + // voronoiCellId is unitless + std::vector unitDimension(NUnitDimension, 0.0); + + return unitDimension; + } + }; + template<> + struct MacroWeighted + { + // the voronoiCellId attribute is not a physical parameter + static bool get() + { + return false; + } + }; + template<> + struct WeightingPower + { + // the voronoiCellId attribute is not a physical parameter + static float_64 get() + { + return 0.0; + } + }; + + template<> + struct Unit + { + static std::vector get() + { + uint32_t const components = 3u; + + std::vector const unit(components, UNIT_EFIELD); + + return unit; + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + /* L, M, T, I, theta, N, J + * + * E is in volts per meters: V / m = kg * m / (A * s^3) + * -> L * M * T^-3 * I^-1 + */ + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::length) = 1.0; + unitDimension.at(SIBaseUnits::mass) = 1.0; + unitDimension.at(SIBaseUnits::time) = -3.0; + unitDimension.at(SIBaseUnits::electricCurrent) = -1.0; + + return unitDimension; + } + }; + template<> + struct MacroWeighted + { + static bool get() + { + return false; + } + }; + template<> + struct WeightingPower + { + // local electric fields do not scale with weighting + static float_64 get() + { + return 0.0; + } + }; + + template<> + struct Unit + { + static std::vector get() + { + uint32_t const components = 3u; + + std::vector const unit(components, UNIT_BFIELD); + + return unit; + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + /* L, M, T, I, theta, N, J + * + * B is in Tesla : kg / (A * s^2) + * -> M * T^-2 * I^-1 + */ + std::vector unitDimension(7, 0.0); + unitDimension.at(SIBaseUnits::mass) = 1.0; + unitDimension.at(SIBaseUnits::time) = -2.0; + unitDimension.at(SIBaseUnits::electricCurrent) = -1.0; + + return unitDimension; + } + }; + template<> + struct MacroWeighted + { + static bool get() + { + return false; + } + }; + template<> + struct WeightingPower + { + // local magnetic fields do not scale with weighting + static float_64 get() + { + return 0.0; + } + }; + + template<> + struct Unit + { + // unitless and not scaled by a factor: by convention 1.0 + static std::vector get() + { + std::vector unit(1, 1.0); + return unit; + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + // unitless + return std::vector(NUnitDimension, 0.0); + } + }; + template<> + struct MacroWeighted + { + // we can only follow maro particles via ids + static bool get() + { + return false; + } + }; + template<> + struct WeightingPower + { + // particle ids do not scale with weighting + static float_64 get() + { + return 0.0; + } + }; + + template<> + struct Unit + { + /* unitless index and not scaled by a factor: by convention 1.0 */ + static std::vector get() + { + std::vector unit(simDim, 1.0); + return unit; + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + /* totalCellIdx is a cell index and therefore unitless + */ + std::vector unitDimension(NUnitDimension, 0.0); + + return unitDimension; + } + }; + template<> + struct MacroWeighted + { + // the cell idx is identical and can not be scaled by weightings + static bool get() + { + return false; + } + }; + template<> + struct WeightingPower + { + // idx * weighting^0 == idx: same for real and macro particle + static float_64 get() + { + return 0.0; + } + }; + + template<> + struct Unit + { + // unitless and not scaled by a factor: 1.0 + static std::vector get() + { + std::vector unit(1, 1.0); + return unit; + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + // boundElectrons is unitless + std::vector unitDimension(NUnitDimension, 0.0); + + return unitDimension; + } + }; + template<> + struct MacroWeighted + { + // bound electrons are counted for a single real ion + static bool get() + { + return false; + } + }; + template<> + struct WeightingPower + { + /* #e-_real * weighting^1 == #e-_macro: bound electrons are contributed + * linearly from the underlying real particles + */ + static float_64 get() + { + return 1.0; + } + }; + + template<> + struct Unit + { + // unitless and not scaled by a factor: 1.0 + static std::vector get() + { + return std::vector(picongpu::flylite::populations, 1.0); + } + }; + template<> + struct UnitDimension + { + static std::vector get() + { + // superconfig is unitless + std::vector unitDimension(NUnitDimension, 0.0); + + return unitDimension; + } + }; + template<> + struct MacroWeighted + { + // represented by (1) or (weighted) ions??? + static bool get() + { + return false; + } + }; + template<> + struct WeightingPower + { + static float_64 get() + { + return 1.0; + } + }; + + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/unitless/speciesConstants.unitless b/include/picongpu/unitless/speciesConstants.unitless index c7c8c631f4..659da93719 100644 --- a/include/picongpu/unitless/speciesConstants.unitless +++ b/include/picongpu/unitless/speciesConstants.unitless @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PIConGPU. * @@ -22,10 +22,9 @@ namespace picongpu { - //! Charge of base particle - constexpr float_X BASE_CHARGE = (float_X) (SI::BASE_CHARGE_SI / UNIT_CHARGE); + constexpr float_X BASE_CHARGE = (float_X)(SI::BASE_CHARGE_SI / UNIT_CHARGE); //! Mass of base particle - constexpr float_X BASE_MASS = (float_X) (SI::BASE_MASS_SI / UNIT_MASS); + constexpr float_X BASE_MASS = (float_X)(SI::BASE_MASS_SI / UNIT_MASS); -} //namespace picongpu +} // namespace picongpu diff --git a/include/picongpu/unitless/speciesDefinition.unitless b/include/picongpu/unitless/speciesDefinition.unitless index a385dcb466..da3ae56677 100644 --- a/include/picongpu/unitless/speciesDefinition.unitless +++ b/include/picongpu/unitless/speciesDefinition.unitless @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * @@ -32,48 +32,39 @@ namespace picongpu { -namespace traits -{ -namespace frame -{ + namespace traits + { + namespace frame + { + /** default `getMass()` specialization + * + * - the default mass is `BASE_MASS * massRatio<>` + * - massRatio<> is the user defined ratio which is pinned as flag to a species + */ + template + HDINLINE float_X getMass() + { + using MassRatioValue = + typename pmacc::traits::Resolve>::type>::type; -/** default `getMass()` specialization - * - * - the default mass is `BASE_MASS * massRatio<>` - * - massRatio<> is the user defined ratio which is pinned as flag to a species - */ -template -HDINLINE float_X getMass() -{ - using MassRatioValue = typename pmacc::traits::Resolve< - typename GetFlagType< - T_Frame, - massRatio<> - >::type - >::type; + return BASE_MASS * MassRatioValue::getValue(); + } - return BASE_MASS * MassRatioValue::getValue(); -}; + /** default `getCharge()` specialization + * + * - the default charge is `BASE_CHARGE * chargeRatio<>` + * - chargeRatio<> is the user defined ratio which is pinned as flag to a species + */ + template + HDINLINE float_X getCharge() + { + using ChargeRatioValue = + typename pmacc::traits::Resolve>::type>::type; -/** default `getCharge()` specialization - * - * - the default charge is `BASE_CHARGE * chargeRatio<>` - * - chargeRatio<> is the user defined ratio which is pinned as flag to a species - */ -template -HDINLINE float_X getCharge() -{ - using ChargeRatioValue = typename pmacc::traits::Resolve< - typename GetFlagType< - T_Frame, - chargeRatio<> - >::type - >::type; - - return BASE_CHARGE * ChargeRatioValue::getValue(); -}; + return BASE_CHARGE * ChargeRatioValue::getValue(); + } -} // namespace frame -} // namespace traits + } // namespace frame + } // namespace traits } // namespace picongpu diff --git a/include/picongpu/unitless/speciesInitialization.unitless b/include/picongpu/unitless/speciesInitialization.unitless index 948ca2a390..3077b32226 100644 --- a/include/picongpu/unitless/speciesInitialization.unitless +++ b/include/picongpu/unitless/speciesInitialization.unitless @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PIConGPU. * diff --git a/include/picongpu/unitless/starter.unitless b/include/picongpu/unitless/starter.unitless index b4cffd2a07..d8214462c4 100644 --- a/include/picongpu/unitless/starter.unitless +++ b/include/picongpu/unitless/starter.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -21,25 +21,19 @@ #include "picongpu/initialization/InitialiserController.hpp" #include "picongpu/plugins/PluginController.hpp" -#include "picongpu/simulation/control/MySimulation.hpp" +#include "picongpu/simulation/control/Simulation.hpp" #include "picongpu/simulation/control/SimulationStarter.hpp" namespace picongpu { - namespace defaultPIConGPU { /* Define a starter for the simulation with the name "SimStarter" * * etc.: using SimStarter = MyOwnStarterClass; */ - using SimStarter = ::picongpu::SimulationStarter< - ::picongpu::InitialiserController, - ::picongpu::PluginController, - ::picongpu::MySimulation - >; - } -} - - + using SimStarter = ::picongpu:: + SimulationStarter<::picongpu::InitialiserController, ::picongpu::PluginController, ::picongpu::Simulation>; + } // namespace defaultPIConGPU +} // namespace picongpu diff --git a/include/picongpu/unitless/synchrotronPhotons.unitless b/include/picongpu/unitless/synchrotronPhotons.unitless index 7be2a760c4..beabc3277a 100644 --- a/include/picongpu/unitless/synchrotronPhotons.unitless +++ b/include/picongpu/unitless/synchrotronPhotons.unitless @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -24,24 +24,23 @@ namespace picongpu { -namespace particles -{ -namespace synchrotronPhotons -{ - -/** Sample point stepping */ -constexpr float_64 SYNC_FUNCS_STEP_WIDTH = - SYNC_FUNCS_CUTOFF / static_cast(SYNC_FUNCS_NUM_SAMPLES - 1u); - -/** In the definition of the first synchrotron function the bessel function is integrated - * up to infinity but in fact it is sufficient to integrate up to this constant. */ -constexpr float_64 SYNC_FUNCS_F1_INTEGRAL_BOUND = 50.0; - -constexpr float_X SOFT_PHOTONS_CUTOFF_MOM = static_cast( - HBAR * pmacc::algorithms::math::Pi::doubleValue / SOFT_PHOTONS_CUTOFF_RATIO / DELTA_T / SPEED_OF_LIGHT); - -} // namespace synchrotronPhotons -} // namespace particles + namespace particles + { + namespace synchrotronPhotons + { + /** Sample point stepping */ + constexpr float_64 SYNC_FUNCS_STEP_WIDTH + = SYNC_FUNCS_CUTOFF / static_cast(SYNC_FUNCS_NUM_SAMPLES - 1u); + + /** In the definition of the first synchrotron function the bessel function is integrated + * up to infinity but in fact it is sufficient to integrate up to this constant. */ + constexpr float_64 SYNC_FUNCS_F1_INTEGRAL_BOUND = 50.0; + + constexpr float_X SOFT_PHOTONS_CUTOFF_MOM = static_cast( + HBAR * pmacc::math::Pi::doubleValue / SOFT_PHOTONS_CUTOFF_RATIO / DELTA_T / SPEED_OF_LIGHT); + + } // namespace synchrotronPhotons + } // namespace particles } // namespace picongpu #include "picongpu/particles/synchrotronPhotons/PhotonCreator.hpp" diff --git a/include/picongpu/unitless/transitionRadiation.unitless b/include/picongpu/unitless/transitionRadiation.unitless index b834b241f9..d6612489b4 100644 --- a/include/picongpu/unitless/transitionRadiation.unitless +++ b/include/picongpu/unitless/transitionRadiation.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch, Finn-Ole Carstens +/* Copyright 2013-2021 Rene Widera, Richard Pausch, Finn-Ole Carstens * * This file is part of PIConGPU. * @@ -23,46 +23,50 @@ namespace picongpu { -namespace plugins -{ -namespace transitionRadiation -{ -//! units for linear frequencies distribution for transition radiation plugin -namespace linearFrequencies -{ - constexpr float_X omegaMin = SI::omegaMin*UNIT_TIME; - constexpr float_X omegaMax = SI::omegaMax*UNIT_TIME; - constexpr float_X deltaOmega = (float_X) ((omegaMax - omegaMin) / (float_X) (nOmega - 1)); // difference beween two omega + namespace plugins + { + namespace transitionRadiation + { + //! units for linear frequencies distribution for transition radiation plugin + namespace linearFrequencies + { + constexpr float_X omegaMin = SI::omegaMin * UNIT_TIME; + constexpr float_X omegaMax = SI::omegaMax * UNIT_TIME; + constexpr float_X deltaOmega + = (float_X)((omegaMax - omegaMin) / (float_X)(nOmega - 1)); // difference beween two omega - constexpr unsigned int blocksizeOmega = pmacc::math::CT::volume::type::value; - constexpr unsigned int gridsizeOmega = nOmega / blocksizeOmega; // size of grid (dim: x); radiation -} + constexpr unsigned int blocksizeOmega + = pmacc::math::CT::volume::type::value; + constexpr unsigned int gridsizeOmega = nOmega / blocksizeOmega; // size of grid (dim: x); radiation + } // namespace linearFrequencies -//! units for logarithmic frequencies distribution for transition radiation plugin -namespace logFrequencies -{ - constexpr float_X omegaMin = (SI::omegaMin*UNIT_TIME); - constexpr float_X omegaMax = (SI::omegaMax*UNIT_TIME); + //! units for logarithmic frequencies distribution for transition radiation plugin + namespace logFrequencies + { + constexpr float_X omegaMin = (SI::omegaMin * UNIT_TIME); + constexpr float_X omegaMax = (SI::omegaMax * UNIT_TIME); - constexpr unsigned int blocksizeOmega = pmacc::math::CT::volume::type::value; - constexpr unsigned int gridsizeOmega = nOmega / blocksizeOmega; // size of grid (dim: x); radiation -} + constexpr unsigned int blocksizeOmega + = pmacc::math::CT::volume::type::value; + constexpr unsigned int gridsizeOmega = nOmega / blocksizeOmega; // size of grid (dim: x); radiation + } // namespace logFrequencies -//! units for frequencies from list for transition radiation calculation -namespace listFrequencies -{ - constexpr unsigned int blocksizeOmega = pmacc::math::CT::volume::type::value; - constexpr unsigned int gridsizeOmega = nOmega / blocksizeOmega; // size of grid (dim: x); radiation -} + //! units for frequencies from list for transition radiation calculation + namespace listFrequencies + { + constexpr unsigned int blocksizeOmega + = pmacc::math::CT::volume::type::value; + constexpr unsigned int gridsizeOmega = nOmega / blocksizeOmega; // size of grid (dim: x); radiation + } // namespace listFrequencies -//! unit for foil position -namespace parameters -{ - constexpr float_X foilPosition = SI::foilPosition / UNIT_LENGTH; -} + //! unit for foil position + namespace parameters + { + constexpr float_X foilPosition = SI::foilPosition / UNIT_LENGTH; + } -} // namespace transitionRadiation -} // namespace plugins + } // namespace transitionRadiation + } // namespace plugins } // namespace picongpu #include "picongpu/plugins/transitionRadiation/frequencies/LinearFrequencies.hpp" diff --git a/include/picongpu/version.hpp b/include/picongpu/version.hpp index dfa6a3d6b5..4543da54e1 100644 --- a/include/picongpu/version.hpp +++ b/include/picongpu/version.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -20,6 +20,6 @@ #pragma once #define PICONGPU_VERSION_MAJOR 0 -#define PICONGPU_VERSION_MINOR 5 +#define PICONGPU_VERSION_MINOR 6 #define PICONGPU_VERSION_PATCH 0 -#define PICONGPU_VERSION_LABEL "" +#define PICONGPU_VERSION_LABEL "dev" diff --git a/include/picongpu/versionFormat.cpp b/include/picongpu/versionFormat.cpp index 235f569a06..ea11b755a7 100644 --- a/include/picongpu/versionFormat.cpp +++ b/include/picongpu/versionFormat.cpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl, Franz Poeschel * * This file is part of PIConGPU. * @@ -17,6 +17,7 @@ * If not, see . */ +#include #include "picongpu/versionFormat.hpp" #include @@ -25,18 +26,21 @@ #include #ifdef __CUDACC_VER_MAJOR__ -#include -#include +# include +# include #endif #include -#if( ENABLE_HDF5 == 1 ) -# include +#if(ENABLE_HDF5 == 1) +# include #endif -#if( ENABLE_ADIOS == 1 ) -# include +#if(ENABLE_ADIOS == 1) +# include #endif -#if( PIC_ENABLE_PNG == 1 ) -# include +#if(PIC_ENABLE_PNG == 1) +# include +#endif +#if(ENABLE_OPENPMD == 1) +# include #endif #include @@ -44,16 +48,13 @@ namespace picongpu { - std::list< std::string > - getSoftwareVersions( std::ostream & cliText ) + std::list getSoftwareVersions(std::ostream& cliText) { - std::string const versionNotFound( "NOTFOUND" ); + std::string const versionNotFound("NOTFOUND"); std::stringstream picongpu; - picongpu << PICONGPU_VERSION_MAJOR << "." - << PICONGPU_VERSION_MINOR << "." - << PICONGPU_VERSION_PATCH; - if( std::string( PICONGPU_VERSION_LABEL ).size() > 0 ) + picongpu << PICONGPU_VERSION_MAJOR << "." << PICONGPU_VERSION_MINOR << "." << PICONGPU_VERSION_PATCH; + if(std::string(PICONGPU_VERSION_LABEL).size() > 0) picongpu << "-" << PICONGPU_VERSION_LABEL; std::stringstream buildType; @@ -74,32 +75,25 @@ namespace picongpu #ifdef __CUDACC_VER_MAJOR__ std::stringstream cuda; - cuda << __CUDACC_VER_MAJOR__ << "." - << __CUDACC_VER_MINOR__ << "." - << __CUDACC_VER_BUILD__; + cuda << __CUDACC_VER_MAJOR__ << "." << __CUDACC_VER_MINOR__ << "." << __CUDACC_VER_BUILD__; std::stringstream mallocMC; - mallocMC << MALLOCMC_VERSION_MAJOR << "." - << MALLOCMC_VERSION_MINOR << "." - << MALLOCMC_VERSION_PATCH; + mallocMC << MALLOCMC_VERSION_MAJOR << "." << MALLOCMC_VERSION_MINOR << "." << MALLOCMC_VERSION_PATCH; #endif std::stringstream boost; - boost << int(BOOST_VERSION / 100000) << "." - << int(BOOST_VERSION / 100 % 1000) << "." + boost << int(BOOST_VERSION / 100000) << "." << int(BOOST_VERSION / 100 % 1000) << "." << int(BOOST_VERSION % 100); std::stringstream mpiStandard; std::stringstream mpiFlavor; std::stringstream mpiFlavorVersion; mpiStandard << MPI_VERSION << "." << MPI_SUBVERSION; -#if defined( OMPI_MAJOR_VERSION ) +#if defined(OMPI_MAJOR_VERSION) // includes derivates such as Bullx MPI, Sun, ... mpiFlavor << "OpenMPI"; - mpiFlavorVersion << OMPI_MAJOR_VERSION << "." - << OMPI_MINOR_VERSION << "." - << OMPI_RELEASE_VERSION; -#elif defined( MPICH_VERSION ) + mpiFlavorVersion << OMPI_MAJOR_VERSION << "." << OMPI_MINOR_VERSION << "." << OMPI_RELEASE_VERSION; +#elif defined(MPICH_VERSION) /* includes MPICH2 and MPICH3 and * derivates such as IBM, Cray, MS, Intel, MVAPICH(2), ... */ mpiFlavor << "MPICH"; @@ -110,43 +104,42 @@ namespace picongpu #endif std::stringstream pngwriter; -#if( PIC_ENABLE_PNG == 1 ) - pngwriter << PNGWRITER_VERSION_MAJOR << "." - << PNGWRITER_VERSION_MINOR << "." - << PNGWRITER_VERSION_PATCH; +#if(PIC_ENABLE_PNG == 1) + pngwriter << PNGWRITER_VERSION_MAJOR << "." << PNGWRITER_VERSION_MINOR << "." << PNGWRITER_VERSION_PATCH; #else pngwriter << versionNotFound; #endif std::stringstream splash; std::stringstream splashFormat; -#if( ENABLE_HDF5 == 1 ) - splash << SPLASH_VERSION_MAJOR << "." - << SPLASH_VERSION_MINOR << "." - << SPLASH_VERSION_PATCH; - splashFormat << SPLASH_FILE_FORMAT_MAJOR << "." - << SPLASH_FILE_FORMAT_MINOR; +#if(ENABLE_HDF5 == 1) + splash << SPLASH_VERSION_MAJOR << "." << SPLASH_VERSION_MINOR << "." << SPLASH_VERSION_PATCH; + splashFormat << SPLASH_FILE_FORMAT_MAJOR << "." << SPLASH_FILE_FORMAT_MINOR; #else splash << versionNotFound; splashFormat << versionNotFound; #endif std::stringstream adios; -#if( ENABLE_ADIOS == 1 ) +#if(ENABLE_ADIOS == 1) adios << ADIOS_VERSION; #else adios << versionNotFound; #endif +#if(ENABLE_OPENPMD == 1) + std::string openPMD = openPMD::getVersion(); +#else + std::string openPMD = versionNotFound; +#endif + // CLI Formatting cliText << "PIConGPU: " << picongpu.str() << std::endl; - cliText << " Build-Type: " << buildType.str() << std::endl - << std::endl; + cliText << " Build-Type: " << buildType.str() << std::endl << std::endl; cliText << "Third party:" << std::endl; cliText << " OS: " << os.str() << std::endl; cliText << " arch: " << arch.str() << std::endl; - cliText << " CXX: " << cxx.str() - << " (" << cxxVersion.str() << ")" << std::endl; + cliText << " CXX: " << cxx.str() << " (" << cxxVersion.str() << ")" << std::endl; cliText << " CMake: " << cmake.str() << std::endl; #ifdef __CUDACC_VER_MAJOR__ cliText << " CUDA: " << cuda.str() << std::endl; @@ -155,32 +148,33 @@ namespace picongpu cliText << " Boost: " << boost.str() << std::endl; cliText << " MPI: " << std::endl << " standard: " << mpiStandard.str() << std::endl - << " flavor: " << mpiFlavor.str() - << " (" << mpiFlavorVersion.str() << ")" << std::endl; + << " flavor: " << mpiFlavor.str() << " (" << mpiFlavorVersion.str() << ")" << std::endl; cliText << " PNGwriter: " << pngwriter.str() << std::endl; - cliText << " libSplash: " << splash.str() - << " (Format " << splashFormat.str() << ")" << std::endl; + cliText << " libSplash: " << splash.str() << " (Format " << splashFormat.str() << ")" << std::endl; cliText << " ADIOS: " << adios.str() << std::endl; + cliText << " openPMD: " << openPMD << std::endl; // Module-like formatting of software only - std::list< std::string > software; - software.push_back( std::string( "PIConGPU/" ) + picongpu.str() ); - software.push_back( cxx.str() + std::string( "/" ) + cxxVersion.str() ); - software.push_back( std::string( "CMake/" ) + cmake.str() ); + std::list software; + software.push_back(std::string("PIConGPU/") + picongpu.str()); + software.push_back(cxx.str() + std::string("/") + cxxVersion.str()); + software.push_back(std::string("CMake/") + cmake.str()); #ifdef __CUDACC_VER_MAJOR__ - software.push_back( std::string( "CUDA/" ) + cuda.str() ); + software.push_back(std::string("CUDA/") + cuda.str()); #endif - software.push_back( std::string( "Boost/" ) + boost.str() ); - software.push_back( mpiFlavor.str() + std::string( "/" ) + mpiFlavorVersion.str() ); + software.push_back(std::string("Boost/") + boost.str()); + software.push_back(mpiFlavor.str() + std::string("/") + mpiFlavorVersion.str()); #ifdef __CUDACC_VER_MAJOR__ - software.push_back( std::string( "mallocMC/" ) + mallocMC.str() ); + software.push_back(std::string("mallocMC/") + mallocMC.str()); #endif - if( pngwriter.str().compare( versionNotFound ) != 0 ) - software.push_back( std::string( "PNGwriter/" ) + pngwriter.str() ); - if( splash.str().compare( versionNotFound ) != 0 ) - software.push_back( std::string( "libSplash/" ) + splash.str() ); - if( adios.str().compare( versionNotFound ) != 0 ) - software.push_back( std::string( "ADIOS/" ) + adios.str() ); + if(pngwriter.str().compare(versionNotFound) != 0) + software.push_back(std::string("PNGwriter/") + pngwriter.str()); + if(splash.str().compare(versionNotFound) != 0) + software.push_back(std::string("libSplash/") + splash.str()); + if(adios.str().compare(versionNotFound) != 0) + software.push_back(std::string("ADIOS/") + adios.str()); + if(openPMD.compare(versionNotFound) != 0) + software.push_back(std::string("openPMD/") + openPMD); return software; } diff --git a/include/picongpu/versionFormat.hpp b/include/picongpu/versionFormat.hpp index b33a9c1be0..51f5ee42c4 100644 --- a/include/picongpu/versionFormat.hpp +++ b/include/picongpu/versionFormat.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PIConGPU. * @@ -35,6 +35,5 @@ namespace picongpu * @param[out] cliText formatted table for output to a command line * @return a list of strings in the form software/version */ - std::list< std::string > - getSoftwareVersions( std::ostream & cliText ); + std::list getSoftwareVersions(std::ostream& cliText); } // namespace picongpu diff --git a/include/pmacc/CMakeLists.txt b/include/pmacc/CMakeLists.txt index 5862a8d927..10fa730351 100644 --- a/include/pmacc/CMakeLists.txt +++ b/include/pmacc/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright 2015-2020 Erik Zenker, Alexander Grund +# Copyright 2015-2021 Erik Zenker, Alexander Grund # # This file is part of PMacc. # @@ -24,7 +24,7 @@ # PMacc tests ################################################################################ -cmake_minimum_required(VERSION 3.11.4) +cmake_minimum_required(VERSION 3.15.0) project(PMaccTest) # set helper pathes to find libraries and packages @@ -53,10 +53,19 @@ endif() # Language Flags ############################################################################### -# enforce C++11 +# enforce C++14 set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) + + +################################################################################ +# Directory of this file. +################################################################################ +set(PMACC_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR}) + +# Normalize the path (e.g. remove ../) +get_filename_component(PMACC_ROOT_DIR "${PMACC_ROOT_DIR}" ABSOLUTE) ################################################################################ # PMacc @@ -69,16 +78,10 @@ add_definitions(${PMacc_DEFINITIONS}) ############################################################################### -# Boost.Test +# Catch2 ############################################################################### -find_package(Boost 1.65.1 COMPONENTS unit_test_framework REQUIRED) -if(TARGET Boost::unit_test_framework) - set(LIBS ${LIBS} Boost::boost Boost::unit_test_framework) -else() - include_directories(SYSTEM ${Boost_INCLUDE_DIRS}) - set(LIBS ${LIBS} ${Boost_LIBRARIES}) -endif() +add_subdirectory(${PMACC_ROOT_DIR}/../../thirdParty/catch2/catch_main ${CMAKE_BINARY_DIR}/catch2) ################################################################################ @@ -89,13 +92,17 @@ find_package(MPI REQUIRED) include_directories(SYSTEM ${MPI_C_INCLUDE_PATH}) set(LIBS ${LIBS} ${MPI_C_LIBRARIES}) +option(USE_MPI_AS_ROOT_USER "add --allow-run-as-root mpiexec used by ctest" OFF) + +if(USE_MPI_AS_ROOT_USER) + set(MPI_RUNTIME_FLAGS "--allow-run-as-root") +endif() ############################################################################### # Targets ############################################################################### include_directories(${CMAKE_CURRENT_SOURCE_DIR}/test) -add_definitions(-DBOOST_TEST_DYN_LINK) # CTest enable_testing() @@ -108,10 +115,11 @@ foreach(dim 2 3) get_filename_component(testCaseFilename ${testCaseFilepath} NAME) string(REPLACE "UT.cpp" "" testCase ${testCaseFilename}) set(testExe "${PROJECT_NAME}-${testCase}-${dim}D") - cupla_add_executable(${testExe} ${testCaseFilepath} ${CMAKE_CURRENT_SOURCE_DIR}/test/main.cpp) + cupla_add_executable(${testExe} ${testCaseFilepath}) target_compile_definitions(${testExe} PRIVATE TEST_DIM=${dim}) target_link_libraries(${testExe} PUBLIC ${LIBS}) - add_test(NAME "${testCase}-${dim}D" COMMAND mpiexec -n 1 ./${testExe}) + target_link_libraries(${testExe} PUBLIC CatchMain) + add_test(NAME "${testCase}-${dim}D" COMMAND mpiexec ${MPI_RUNTIME_FLAGS} -n 1 ./${testExe}) endforeach() string(REPLACE "-DTEST_DIM=${dim}" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endforeach() diff --git a/include/pmacc/Environment.def b/include/pmacc/Environment.def index 5670c7a0f4..87b5fc969b 100644 --- a/include/pmacc/Environment.def +++ b/include/pmacc/Environment.def @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -25,13 +25,12 @@ namespace pmacc { - - template< uint32_t T_dim = DIM1 > + template class Environment; -namespace detail -{ - struct Environment; + namespace detail + { + struct Environment; -} // namespace detail + } // namespace detail } // namespace pmacc diff --git a/include/pmacc/Environment.hpp b/include/pmacc/Environment.hpp index 5bf9f88d47..8695306538 100644 --- a/include/pmacc/Environment.hpp +++ b/include/pmacc/Environment.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Felix Schmitt, Conrad Schumann, +/* Copyright 2014-2021 Felix Schmitt, Conrad Schumann, * Alexander Grund, Axel Huebl * * This file is part of PMacc. @@ -43,507 +43,513 @@ namespace pmacc { - -namespace detail -{ - /** collect state variables of the environment context - * - * This class handle the initialization and finalize of the - * MPI context and the selection of the GPU. - */ - class EnvironmentContext + namespace detail { + /** collect state variables of the environment context + * + * This class handle the initialization and finalize of the + * MPI context and the selection of the GPU. + */ + class EnvironmentContext + { + friend Environment; - friend Environment; + friend pmacc::Environment; + friend pmacc::Environment; + friend pmacc::Environment; - friend pmacc::Environment; - friend pmacc::Environment; - friend pmacc::Environment; + EnvironmentContext() + : m_isMpiInitialized(false) + , m_isDeviceSelected(false) + , m_isSubGridDefined(false) + , m_isMpiDirectEnabled(false) + { + } - EnvironmentContext( ) : - m_isMpiInitialized( false ), - m_isDeviceSelected( false ), - m_isSubGridDefined( false ) - { - } + /** initialization state of MPI */ + bool m_isMpiInitialized; - /** initialization state of MPI */ - bool m_isMpiInitialized; + /** state if a computing device is selected */ + bool m_isDeviceSelected; - /** state if a computing device is selected */ - bool m_isDeviceSelected; + /** state if the SubGrid is defined */ + bool m_isSubGridDefined; - /** state if the SubGrid is defined */ - bool m_isSubGridDefined; + /** state shows if MPI direct is activated */ + bool m_isMpiDirectEnabled; - /** get the singleton EnvironmentContext - * - * @return instance of EnvironmentContext - */ - static EnvironmentContext& getInstance() - { - static EnvironmentContext instance; - return instance; - } + /** get the singleton EnvironmentContext + * + * @return instance of EnvironmentContext + */ + static EnvironmentContext& getInstance() + { + static EnvironmentContext instance; + return instance; + } - /** state of the MPI context - * - * @return true if MPI is initialized else false - */ - bool isMpiInitialized() - { - return m_isMpiInitialized; - } + /** state of the MPI context + * + * @return true if MPI is initialized else false + */ + bool isMpiInitialized() + { + return m_isMpiInitialized; + } - /** is a computing device selected - * - * @return true if device is selected else false - */ - bool isDeviceSelected() - { - return m_isDeviceSelected; - } + /** is a computing device selected + * + * @return true if device is selected else false + */ + bool isDeviceSelected() + { + return m_isDeviceSelected; + } + + /** is the SubGrid defined + * + * @return true if SubGrid is defined, else false + */ + bool isSubGridDefined() + { + return m_isSubGridDefined; + } + + /** initialize the environment + * + * After this call it is allowed to use MPI. + */ + HINLINE void init(); + + /** cleanup the environment */ + HINLINE void finalize(); - /** is the SubGrid defined + /** select a computing device + * + * After this call it is allowed to use the computing device. + * + * @param deviceNumber number of the device + */ + HINLINE void setDevice(int deviceNumber); + + //! activate MPI direct usage + void enableMpiDirect() + { + m_isMpiDirectEnabled = true; + } + + //! query if MPI direct support is activated + bool isMpiDirectEnabled() const + { + return m_isMpiDirectEnabled; + } + }; + + /** PMacc environment * - * @return true if SubGrid is defined, else false + * Get access to all PMacc singleton classes those not depend on a dimension. */ - bool isSubGridDefined() + struct Environment { - return m_isSubGridDefined; - } + Environment() + { + } - /** initialize the environment - * - * After this call it is allowed to use MPI. - */ - HINLINE void init(); + /** cleanup the environment */ + void finalize() + { + EnvironmentContext::getInstance().finalize(); + } - /** cleanup the environment */ - HINLINE void finalize(); + /** get the singleton StreamController + * + * @return instance of StreamController + */ + pmacc::StreamController& StreamController() + { + PMACC_ASSERT_MSG( + EnvironmentContext::getInstance().isDeviceSelected(), + "Environment< DIM >::initDevices() must be called before this method!"); + return StreamController::getInstance(); + } - /** select a computing device - * - * After this call it is allowed to use the computing device. - * - * @param deviceNumber number of the device - */ - HINLINE void setDevice(int deviceNumber); + /** get the singleton Manager + * + * @return instance of Manager + */ + pmacc::Manager& Manager() + { + return Manager::getInstance(); + } - }; + /** get the singleton TransactionManager + * + * @return instance of TransactionManager + */ + pmacc::TransactionManager& TransactionManager() const + { + PMACC_ASSERT_MSG( + EnvironmentContext::getInstance().isDeviceSelected(), + "Environment< DIM >::initDevices() must be called before this method!"); + return TransactionManager::getInstance(); + } - /** PMacc environment - * - * Get access to all PMacc singleton classes those not depend on a dimension. - */ - struct Environment - { - Environment() - { - } + /** get the singleton EnvironmentController + * + * @return instance of EnvironmentController + */ + pmacc::EnvironmentController& EnvironmentController() + { + PMACC_ASSERT_MSG( + EnvironmentContext::getInstance().isMpiInitialized(), + "Environment< DIM >::initDevices() must be called before this method!"); + return EnvironmentController::getInstance(); + } - /** cleanup the environment */ - void finalize() - { - EnvironmentContext::getInstance().finalize(); - } + /** get the singleton Factory + * + * @return instance of Factory + */ + pmacc::Factory& Factory() + { + PMACC_ASSERT_MSG( + EnvironmentContext::getInstance().isMpiInitialized() + && EnvironmentContext::getInstance().isDeviceSelected(), + "Environment< DIM >::initDevices() must be called before this method!"); + return Factory::getInstance(); + } - /** get the singleton StreamController - * - * @return instance of StreamController - */ - pmacc::StreamController& StreamController() - { - PMACC_ASSERT_MSG( - EnvironmentContext::getInstance().isDeviceSelected(), - "Environment< DIM >::initDevices() must be called before this method!" - ); - return StreamController::getInstance(); - } + /** get the singleton EventPool + * + * @return instance of EventPool + */ + pmacc::EventPool& EventPool() + { + PMACC_ASSERT_MSG( + EnvironmentContext::getInstance().isDeviceSelected(), + "Environment< DIM >::initDevices() must be called before this method!"); + return EventPool::getInstance(); + } - /** get the singleton Manager - * - * @return instance of Manager - */ - pmacc::Manager& Manager() - { - return Manager::getInstance(); - } + /** get the singleton ParticleFactory + * + * @return instance of ParticleFactory + */ + pmacc::ParticleFactory& ParticleFactory() + { + return ParticleFactory::getInstance(); + } - /** get the singleton TransactionManager - * - * @return instance of TransactionManager - */ - pmacc::TransactionManager& TransactionManager() const - { - PMACC_ASSERT_MSG( - EnvironmentContext::getInstance().isDeviceSelected(), - "Environment< DIM >::initDevices() must be called before this method!" - ); - return TransactionManager::getInstance(); - } + /** get the singleton DataConnector + * + * @return instance of DataConnector + */ + pmacc::DataConnector& DataConnector() + { + return DataConnector::getInstance(); + } - /** get the singleton EnvironmentController - * - * @return instance of EnvironmentController - */ - pmacc::EnvironmentController& EnvironmentController() + /** get the singleton PluginConnector + * + * @return instance of PluginConnector + */ + pmacc::PluginConnector& PluginConnector() + { + return PluginConnector::getInstance(); + } + + /** get the singleton MemoryInfo + * + * @return instance of MemoryInfo + */ + nvidia::memory::MemoryInfo& MemoryInfo() + { + PMACC_ASSERT_MSG( + EnvironmentContext::getInstance().isDeviceSelected(), + "Environment< DIM >::initDevices() must be called before this method!"); + return nvidia::memory::MemoryInfo::getInstance(); + } + + /** get the singleton SimulationDescription + * + * @return instance of SimulationDescription + */ + simulationControl::SimulationDescription& SimulationDescription() + { + return simulationControl::SimulationDescription::getInstance(); + } + }; + } // namespace detail + + /** Global Environment singleton for PMacc + */ + template + class Environment : public detail::Environment + { + public: + void enableMpiDirect() { - PMACC_ASSERT_MSG( - EnvironmentContext::getInstance().isMpiInitialized(), - "Environment< DIM >::initDevices() must be called before this method!" - ); - return EnvironmentController::getInstance(); + detail::EnvironmentContext::getInstance().enableMpiDirect(); } - /** get the singleton Factory - * - * @return instance of Factory - */ - pmacc::Factory& Factory() + bool isMpiDirectEnabled() const { - PMACC_ASSERT_MSG( - EnvironmentContext::getInstance().isMpiInitialized() && - EnvironmentContext::getInstance().isDeviceSelected(), - "Environment< DIM >::initDevices() must be called before this method!" - ); - return Factory::getInstance(); + return detail::EnvironmentContext::getInstance().isMpiDirectEnabled(); } - /** get the singleton EventPool + /** get the singleton GridController * - * @return instance of EventPool + * @return instance of GridController */ - pmacc::EventPool& EventPool() + pmacc::GridController& GridController() { PMACC_ASSERT_MSG( - EnvironmentContext::getInstance().isDeviceSelected(), - "Environment< DIM >::initDevices() must be called before this method!" - ); - return EventPool::getInstance(); + detail::EnvironmentContext::getInstance().isMpiInitialized(), + "Environment< DIM >::initDevices() must be called before this method!"); + return pmacc::GridController::getInstance(); } - /** get the singleton ParticleFactory + /** get the singleton SubGrid * - * @return instance of ParticleFactory + * @return instance of SubGrid */ - pmacc::ParticleFactory& ParticleFactory() + pmacc::SubGrid& SubGrid() { - return ParticleFactory::getInstance(); + PMACC_ASSERT_MSG( + detail::EnvironmentContext::getInstance().isSubGridDefined(), + "Environment< DIM >::initGrids() must be called before this method!"); + return pmacc::SubGrid::getInstance(); } - /** get the singleton DataConnector + /** get the singleton Filesystem * - * @return instance of DataConnector + * @return instance of Filesystem */ - pmacc::DataConnector& DataConnector() + pmacc::Filesystem& Filesystem() { - return DataConnector::getInstance(); + return pmacc::Filesystem::getInstance(); } - /** get the singleton PluginConnector + /** get the singleton Environment< DIM > * - * @return instance of PluginConnector + * @return instance of Environment */ - pmacc::PluginConnector& PluginConnector() + static Environment& get() { - return PluginConnector::getInstance(); + static Environment instance; + return instance; } - /** get the singleton MemoryInfo + /** create and initialize the environment of PMacc * - * @return instance of MemoryInfo - */ - nvidia::memory::MemoryInfo& MemoryInfo() - { - PMACC_ASSERT_MSG( - EnvironmentContext::getInstance().isDeviceSelected(), - "Environment< DIM >::initDevices() must be called before this method!" - ); - return nvidia::memory::MemoryInfo::getInstance(); - } - - /** get the singleton SimulationDescription + * Usage of MPI or device(accelerator) function calls before this method + * are not allowed. * - * @return instance of SimulationDescription + * @param devices number of devices per simulation dimension + * @param periodic periodicity each simulation dimension + * (0 == not periodic, 1 == periodic) */ - simulationControl::SimulationDescription& SimulationDescription() + void initDevices(DataSpace devices, DataSpace periodic) { - return simulationControl::SimulationDescription::getInstance(); - } - }; -} // namespace detail + // initialize the MPI context + detail::EnvironmentContext::getInstance().init(); -/** Global Environment singleton for PMacc - */ -template< uint32_t T_dim > -class Environment : public detail::Environment -{ -public: + // create singleton instances + GridController().init(devices, periodic); - /** get the singleton GridController - * - * @return instance of GridController - */ - pmacc::GridController< T_dim >& GridController() - { - PMACC_ASSERT_MSG( - detail::EnvironmentContext::getInstance().isMpiInitialized(), - "Environment< DIM >::initDevices() must be called before this method!" - ); - return pmacc::GridController< T_dim >::getInstance(); - } - - /** get the singleton SubGrid - * - * @return instance of SubGrid - */ - pmacc::SubGrid< T_dim >& SubGrid() - { - PMACC_ASSERT_MSG( - detail::EnvironmentContext::getInstance().isSubGridDefined(), - "Environment< DIM >::initGrids() must be called before this method!" - ); - return pmacc::SubGrid< T_dim >::getInstance(); - } - - /** get the singleton Filesystem - * - * @return instance of Filesystem - */ - pmacc::Filesystem< T_dim >& Filesystem() - { - return pmacc::Filesystem< T_dim >::getInstance(); - } + EnvironmentController(); - /** get the singleton Environment< DIM > - * - * @return instance of Environment - */ - static Environment< T_dim >& get() - { - static Environment< T_dim > instance; - return instance; - } - - /** create and initialize the environment of PMacc - * - * Usage of MPI or device(accelerator) function calls before this method - * are not allowed. - * - * @param devices number of devices per simulation dimension - * @param periodic periodicity each simulation dimension - * (0 == not periodic, 1 == periodic) - */ - void initDevices( - DataSpace< T_dim > devices, - DataSpace< T_dim > periodic - ) - { - // initialize the MPI context - detail::EnvironmentContext::getInstance().init(); - - // create singleton instances - GridController().init( devices, periodic ); - - EnvironmentController(); + Filesystem(); - Filesystem(); + detail::EnvironmentContext::getInstance().setDevice(static_cast(GridController().getHostRank())); - detail::EnvironmentContext::getInstance().setDevice( - static_cast( GridController().getHostRank() ) - ); + StreamController().activate(); - StreamController().activate(); + MemoryInfo(); - MemoryInfo(); - - TransactionManager(); - - SimulationDescription(); - - } - - /** initialize the computing domain information of PMacc - * - * @param globalDomainSize size of the global simulation domain [cells] - * @param localDomainSize size of the local simulation domain [cells] - * @param localDomainOffset local domain offset [cells] - */ - void initGrids( - DataSpace< T_dim > globalDomainSize, - DataSpace< T_dim > localDomainSize, - DataSpace< T_dim > localDomainOffset - ) - { - PMACC_ASSERT_MSG( - detail::EnvironmentContext::getInstance().isMpiInitialized(), - "Environment< DIM >::initDevices() must be called before this method!" - ); + TransactionManager(); - detail::EnvironmentContext::getInstance().m_isSubGridDefined = true; - - // create singleton instances - SubGrid().init( - localDomainSize, - globalDomainSize, - localDomainOffset - ); - - DataConnector(); - - PluginConnector(); - } - - Environment(const Environment&) = delete; + SimulationDescription(); + } - Environment& operator=(const Environment&) = delete; + /** initialize the computing domain information of PMacc + * + * @param globalDomainSize size of the global simulation domain [cells] + * @param localDomainSize size of the local simulation domain [cells] + * @param localDomainOffset local domain offset [cells] + */ + void initGrids( + DataSpace globalDomainSize, + DataSpace localDomainSize, + DataSpace localDomainOffset) + { + PMACC_ASSERT_MSG( + detail::EnvironmentContext::getInstance().isMpiInitialized(), + "Environment< DIM >::initDevices() must be called before this method!"); -private: + detail::EnvironmentContext::getInstance().m_isSubGridDefined = true; - Environment() - { - } + // create singleton instances + SubGrid().init(localDomainSize, globalDomainSize, localDomainOffset); - ~Environment() - { + DataConnector(); - } + PluginConnector(); + } -}; + Environment(const Environment&) = delete; -namespace detail -{ + Environment& operator=(const Environment&) = delete; - void EnvironmentContext::init() - { - m_isMpiInitialized = true; - - // MPI_Init with NULL is allowed since MPI 2.0 - MPI_CHECK(MPI_Init(NULL,NULL)); - } + private: + Environment() + { + } - void EnvironmentContext::finalize() - { - if( m_isMpiInitialized ) + ~Environment() { - pmacc::Environment<>::get().Manager().waitForAllTasks(); - // Required by scorep for flushing the buffers - cudaDeviceSynchronize(); - m_isMpiInitialized = false; - /* Free the MPI context. - * The gpu context is freed by the `StreamController`, because - * MPI and CUDA are independent. - */ - MPI_CHECK(MPI_Finalize()); } - } + }; - void EnvironmentContext::setDevice(int deviceNumber) + namespace detail { - int num_gpus = 0; //number of gpus - cudaGetDeviceCount(&num_gpus); -#if (PMACC_CUDA_ENABLED == 1) - //##ERROR handling - if (num_gpus < 1) //check if cuda device is found + void EnvironmentContext::init() { - throw std::runtime_error("no CUDA capable devices detected"); - } -#endif + m_isMpiInitialized = true; - int maxTries = num_gpus; - bool deviceSelectionSuccessful = false; + // MPI_Init with NULL is allowed since MPI 2.0 + MPI_CHECK(MPI_Init(NULL, NULL)); + } - cudaError rc; + void EnvironmentContext::finalize() + { + if(m_isMpiInitialized) + { + pmacc::Environment<>::get().Manager().waitForAllTasks(); + // Required by scorep for flushing the buffers + cuplaDeviceSynchronize(); + m_isMpiInitialized = false; + /* Free the MPI context. + * The gpu context is freed by the `StreamController`, because + * MPI and CUDA are independent. + */ + MPI_CHECK(MPI_Finalize()); + } + } - // search the first selectable device in the compute node - for (int deviceOffset = 0; deviceOffset < maxTries; ++deviceOffset) + void EnvironmentContext::setDevice(int deviceNumber) { - /* Modulo 'num_gpus' avoids invalid device indices for systems where the environment variable - * `CUDA_VISIBLE_DEVICES` is used to pre-select a device. - */ - const int tryDeviceId = (deviceOffset + deviceNumber) % num_gpus; - - log("Trying to allocate device %1%.") % tryDeviceId; -#if (PMACC_CUDA_ENABLED == 1) - cudaDeviceProp devProp; - CUDA_CHECK((cuplaError_t)cudaGetDeviceProperties(&devProp, tryDeviceId)); - - /* If the cuda gpu compute mode is 'default' - * (https://docs.nvidia.com/cuda/cuda-c-programming-guide/#compute-modes) - * then we try to get a device only once. - * The index used to select a device is based on the local MPI rank so - * that each rank tries a different device. - */ - if (devProp.computeMode == cudaComputeModeDefault) + int num_gpus = 0; // number of gpus + cuplaGetDeviceCount(&num_gpus); +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) + //##ERROR handling + if(num_gpus < 1) // check if cupla device is found { - maxTries = 1; - log("Device %1% is running in default mode.") % tryDeviceId; + throw std::runtime_error("no CUDA capable devices detected"); } #endif - rc = cudaSetDevice(tryDeviceId); + int maxTries = num_gpus; + bool deviceSelectionSuccessful = false; - if(rc == cudaSuccess) - { - cudaStream_t stream; - /* \todo: Check if this workaround is needed - * - * - since NVIDIA change something in driver cudaSetDevice never - * return an error if another process already use the selected - * device if gpu compute mode is set "process exclusive" - * - create a dummy stream to check if the device is already used by - * an other process. - * - cudaStreamCreate fails if gpu is already in use - */ - rc = cudaStreamCreate(&stream); - } + cuplaError rc; - if (rc == cudaSuccess) + // search the first selectable device in the compute node + for(int deviceOffset = 0; deviceOffset < maxTries; ++deviceOffset) { -#if (PMACC_CUDA_ENABLED == 1) - cudaDeviceProp dprop; - CUDA_CHECK((cuplaError_t)cudaGetDeviceProperties(&dprop, tryDeviceId)); - log ("Set device to %1%: %2%") % tryDeviceId % dprop.name; - if(cudaErrorSetOnActiveProcess == cudaSetDeviceFlags(cudaDeviceScheduleSpin)) + /* Modulo 'num_gpus' avoids invalid device indices for systems where the environment variable + * `CUDA_VISIBLE_DEVICES` is used to pre-select a device. + */ + const int tryDeviceId = (deviceOffset + deviceNumber) % num_gpus; + + log("Trying to allocate device %1%.") % tryDeviceId; + +#if(BOOST_LANG_CUDA || BOOST_LANG_HIP) +# if(BOOST_LANG_CUDA) + cudaDeviceProp devProp; +# elif(BOOST_LANG_HIP) + hipDeviceProp_t devProp; +# endif + + CUDA_CHECK((cuplaError_t) ALPAKA_API_PREFIX(GetDeviceProperties)(&devProp, tryDeviceId)); + + /* If the cuda gpu compute mode is 'default' + * (https://docs.nvidia.com/cuda/cuda-c-programming-guide/#compute-modes) + * then we try to get a device only once. + * The index used to select a device is based on the local MPI rank so + * that each rank tries a different device. + */ + if(devProp.computeMode == ALPAKA_API_PREFIX(ComputeModeDefault)) { - cudaGetLastError(); //reset all errors - /* - because of cudaStreamCreate was called cudaSetDeviceFlags crashed - * - to set the flags reset the device and set flags again + maxTries = 1; + log("Device %1% is running in default mode.") % tryDeviceId; + } +#endif + + rc = cuplaSetDevice(tryDeviceId); + + if(rc == cuplaSuccess) + { + cuplaStream_t stream; + /* \todo: Check if this workaround is needed + * + * - since NVIDIA change something in driver cuplaSetDevice never + * return an error if another process already use the selected + * device if gpu compute mode is set "process exclusive" + * - create a dummy stream to check if the device is already used by + * an other process. + * - cuplaStreamCreate fails if gpu is already in use */ - CUDA_CHECK(cudaDeviceReset()); - CUDA_CHECK((cuplaError_t)cudaSetDeviceFlags(cudaDeviceScheduleSpin)); + rc = cuplaStreamCreate(&stream); } + + if(rc == cuplaSuccess) + { +#if(BOOST_LANG_CUDA || BOOST_LANG_HIP) + CUDA_CHECK((cuplaError_t) ALPAKA_API_PREFIX(GetDeviceProperties)(&devProp, tryDeviceId)); + log("Set device to %1%: %2%") % tryDeviceId % devProp.name; + if(ALPAKA_API_PREFIX(ErrorSetOnActiveProcess) + == ALPAKA_API_PREFIX(SetDeviceFlags)(ALPAKA_API_PREFIX(DeviceScheduleSpin))) + { + cuplaGetLastError(); // reset all errors + /* - because of cuplaStreamCreate was called cuplaSetDeviceFlags crashed + * - to set the flags reset the device and set flags again + */ + CUDA_CHECK(cuplaDeviceReset()); + CUDA_CHECK( + (cuplaError_t) ALPAKA_API_PREFIX(SetDeviceFlags)(ALPAKA_API_PREFIX(DeviceScheduleSpin))); + } #endif - CUDA_CHECK(cudaGetLastError()); - deviceSelectionSuccessful = true; - break; - } - else if (rc == cudaErrorDeviceAlreadyInUse -#if (PMACC_CUDA_ENABLED == 1) - || rc==(cudaError)cudaErrorDevicesUnavailable + CUDA_CHECK(cuplaGetLastError()); + deviceSelectionSuccessful = true; + break; + } + else if( + rc == cuplaErrorDeviceAlreadyInUse +#if(PMACC_CUDA_ENABLED == 1) + || rc == (cuplaError) cudaErrorDevicesUnavailable #endif - ) - { - cudaGetLastError(); //reset all errors - log ("Device %1% already in use, try next.") % tryDeviceId; - continue; + ) + { + cuplaGetLastError(); // reset all errors + log("Device %1% already in use, try next.") % tryDeviceId; + continue; + } + else + { + CUDA_CHECK(rc); /*error message*/ + } } - else + if(!deviceSelectionSuccessful) { - CUDA_CHECK(rc); /*error message*/ + std::cerr << "Failed to select one of the " << num_gpus << " devices." << std::endl; + throw std::runtime_error("Compute device selection failed."); } - } - if(!deviceSelectionSuccessful) - { - std::cerr << "Failed to select one of the " << num_gpus << " devices." << std::endl; - throw std::runtime_error("Compute device selection failed."); - } - m_isDeviceSelected = true; - } + m_isDeviceSelected = true; + } -} // namespace detail + } // namespace detail } // namespace pmacc /* No namespace for macro defines */ @@ -559,7 +565,7 @@ namespace detail * depended on the opType this method is blocking * * @param opType place were the operation is running - * possible places are: `ITask::TASK_CUDA`, `ITask::TASK_MPI`, `ITask::TASK_HOST` + * possible places are: `ITask::TASK_DEVICE`, `ITask::TASK_MPI`, `ITask::TASK_HOST` */ #define __startOperation(opType) (pmacc::Environment<>::get().TransactionManager().startOperation(opType)) @@ -568,7 +574,7 @@ namespace detail * depended on the opType this method is blocking * * @param opType place were the operation is running - * possible places are: `ITask::TASK_CUDA`, `ITask::TASK_MPI`, `ITask::TASK_HOST` + * possible places are: `ITask::TASK_DEVICE`, `ITask::TASK_MPI`, `ITask::TASK_HOST` */ #define __getEventStream(opType) (pmacc::Environment<>::get().TransactionManager().getEventStream(opType)) diff --git a/include/pmacc/HandleGuardRegion.hpp b/include/pmacc/HandleGuardRegion.hpp index d3387a7dae..a40eada221 100644 --- a/include/pmacc/HandleGuardRegion.hpp +++ b/include/pmacc/HandleGuardRegion.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -21,8 +21,8 @@ #pragma once -namespace pmacc{ - +namespace pmacc +{ /** * Property struct that exposes policies for handling data in the guard region * Each police must handle both sides of the (possible) exchange: @@ -38,14 +38,11 @@ namespace pmacc{ * to/from any other rank, which is the case for the boundary of the total * volume when non-periodic conditions are used */ - template< - class T_HandleExchanged, - class T_HandleNotExchanged - > + template struct HandleGuardRegion { - typedef T_HandleExchanged HandleExchanged; + typedef T_HandleExchanged HandleExchanged; typedef T_HandleNotExchanged HandleNotExchanged; }; -} // namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/PMaccConfig.cmake b/include/pmacc/PMaccConfig.cmake index 9d69a854bf..6924256fad 100644 --- a/include/pmacc/PMaccConfig.cmake +++ b/include/pmacc/PMaccConfig.cmake @@ -1,4 +1,4 @@ -# Copyright 2015-2020 Erik Zenker, Rene Widera, Axel Huebl +# Copyright 2015-2021 Erik Zenker, Rene Widera, Axel Huebl # # This file is part of PMacc. # @@ -29,7 +29,7 @@ ############################################################################### # PMacc ############################################################################### -cmake_minimum_required(VERSION 3.11.4) +cmake_minimum_required(VERSION 3.15.0) # set helper pathes to find libraries and packages # Add specific hints @@ -72,10 +72,10 @@ endif() # Language Flags ############################################################################### -# enforce C++11 +# enforce C++14 set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) ############################################################################### @@ -108,7 +108,7 @@ set_property(CACHE PMACC_ALPAKA_PROVIDER PROPERTY STRINGS "intern;extern") mark_as_advanced(PMACC_ALPAKA_PROVIDER) if(${PMACC_ALPAKA_PROVIDER} STREQUAL "intern") - list(INSERT CMAKE_MODULE_PATH 0 "${PMacc_DIR}/../../thirdParty/alpaka") + list(INSERT CMAKE_MODULE_PATH 0 "${PMacc_DIR}/../../thirdParty/cupla/alpaka") endif() @@ -139,12 +139,6 @@ if( ON) endif() -if(NOT cupla_ALPAKA_PROVIDER) - # force cupla to use third party alpaka version - set(cupla_ALPAKA_PROVIDER "extern" CACHE STRING "Select which alpaka is used") - set(alpaka_DIR "${PMacc_DIR}/../../thirdParty/alpaka" CACHE PATH "path to alpaka") -endif() - if(${PMACC_CUPLA_PROVIDER} STREQUAL "intern") find_package(cupla REQUIRED @@ -349,8 +343,8 @@ if(ALPAKA_ACC_GPU_CUDA_ENABLE) "(Found ${CUDA_VERSION})") endif() # Newer CUDA releases: probably troublesome, warn at least - if(CUDA_VERSION VERSION_GREATER 10.2) - message(WARNING "Untested CUDA release >10.2 (Found ${CUDA_VERSION})! " + if(CUDA_VERSION VERSION_GREATER 11.2) + message(WARNING "Untested CUDA release >11.2 (Found ${CUDA_VERSION})! " "Maybe use a newer PIConGPU?") endif() endif() @@ -360,9 +354,13 @@ endif() # Find OpenMP ################################################################################ -find_package(OpenMP) -if(OPENMP_FOUND) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" AND ALPAKA_ACC_GPU_CUDA_ENABLE AND ALPAKA_CUDA_COMPILER MATCHES "clang") + message(WARNING "OpenMP host side acceleration is disabled: CUDA compilation with clang is not supporting OpenMP.") +else() + find_package(OpenMP) + if(OPENMP_FOUND) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + endif() endif() @@ -370,13 +368,14 @@ endif() # Find mallocMC ################################################################################ -if(ALPAKA_ACC_GPU_CUDA_ENABLE) - find_package(mallocMC 2.3.0 QUIET) +if(ALPAKA_ACC_GPU_CUDA_ENABLE OR ALPAKA_ACC_GPU_HIP_ENABLE) + set(mallocMC_ALPAKA_PROVIDER "extern" CACHE STRING "Select which alpaka is used for mallocMC") + find_package(mallocMC 2.5.0 QUIET) if(NOT mallocMC_FOUND) message(STATUS "Using mallocMC from thirdParty/ directory") set(MALLOCMC_ROOT "${PMacc_DIR}/../../thirdParty/mallocMC") - find_package(mallocMC 2.3.0 REQUIRED) + find_package(mallocMC 2.5.0 REQUIRED) endif(NOT mallocMC_FOUND) set(PMacc_INCLUDE_DIRS ${PMacc_INCLUDE_DIRS} ${mallocMC_INCLUDE_DIRS}) diff --git a/include/pmacc/algorithms/GlobalReduce.hpp b/include/pmacc/algorithms/GlobalReduce.hpp index a551cda9a6..3775b9b820 100644 --- a/include/pmacc/algorithms/GlobalReduce.hpp +++ b/include/pmacc/algorithms/GlobalReduce.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PMacc. * @@ -29,57 +29,52 @@ namespace pmacc { -namespace algorithms -{ - -/* Reduce values in GPU memory over all MPI instances - */ -class GlobalReduce -{ -public: - - GlobalReduce(const uint32_t byte, const uint32_t sharedMemByte = 4 * 1024) : reduce(byte, sharedMemByte) - { - } - - /* Activate participation for reduce algorithm. - * Must called from any mpi process. This function use global blocking mpi calls. - * Don't create a instance befor you have set you cuda device! - * @param isActive true if mpi rank should be part of reduce operation, else false - */ - void participate(bool isActive) - { - mpi_reduce.participate(isActive); - } - - /* Reduce elements in global gpu memeory - * - * @param func functor for reduce which takes two arguments, first argument is the source and get the new reduced value. - * Functor must specialize the function getMPI_Op. - * @param src a class or a pointer where the reduce algorithm can access the value by operator [] (one dimension access) - * @param n number of elements to reduce - * - * @return reduced value (same on every mpi instance) - */ - template - typename traits::GetValueType::ValueType operator()(Functor func, - Src src, - uint32_t n) + namespace algorithms { - typedef typename traits::GetValueType::ValueType Type; + /* Reduce values in GPU memory over all MPI instances + */ + class GlobalReduce + { + public: + GlobalReduce(const uint32_t byte, const uint32_t sharedMemByte = 4 * 1024) : reduce(byte, sharedMemByte) + { + } - Type localResult = reduce(func, src, n); - Type globalResult; + /* Activate participation for reduce algorithm. + * Must called from any mpi process. This function use global blocking mpi calls. + * Don't create a instance befor you have set you cuda device! + * @param isActive true if mpi rank should be part of reduce operation, else false + */ + void participate(bool isActive) + { + mpi_reduce.participate(isActive); + } - mpi_reduce(func, &globalResult, &localResult, 1); - return globalResult; - } -private: - ::pmacc::nvidia::reduce::Reduce reduce; - ::pmacc::mpi::MPIReduce mpi_reduce; -}; -} -} + /* Reduce elements in global gpu memeory + * + * @param func functor for reduce which takes two arguments, first argument is the source and get the new + * reduced value. Functor must specialize the function getMPI_Op. + * @param src a class or a pointer where the reduce algorithm can access the value by operator [] (one + * dimension access) + * @param n number of elements to reduce + * + * @return reduced value (same on every mpi instance) + */ + template + typename traits::GetValueType::ValueType operator()(Functor func, Src src, uint32_t n) + { + typedef typename traits::GetValueType::ValueType Type; + Type localResult = reduce(func, src, n); + Type globalResult; + mpi_reduce(func, &globalResult, &localResult, 1); + return globalResult; + } + private: + ::pmacc::nvidia::reduce::Reduce reduce; + ::pmacc::mpi::MPIReduce mpi_reduce; + }; + } // namespace algorithms +} // namespace pmacc diff --git a/include/pmacc/algorithms/PromoteType.hpp b/include/pmacc/algorithms/PromoteType.hpp index a54f61920d..ddd7284b6f 100644 --- a/include/pmacc/algorithms/PromoteType.hpp +++ b/include/pmacc/algorithms/PromoteType.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PMacc. * @@ -23,24 +23,25 @@ namespace pmacc { -namespace algorithms -{ -namespace promoteType -{ - - // general: use first type - template - struct promoteType { - typedef T1 type; - }; + namespace algorithms + { + namespace promoteType + { + // general: use first type + template + struct promoteType + { + typedef T1 type; + }; - // special: promote float to double - template< > - struct promoteType { - typedef double type; - }; + // special: promote float to double + template<> + struct promoteType + { + typedef double type; + }; -} //namespace promoteType -} //namespace algorithms -} //namespace pmacc + } // namespace promoteType + } // namespace algorithms +} // namespace pmacc diff --git a/include/pmacc/algorithms/TypeCast.hpp b/include/pmacc/algorithms/TypeCast.hpp index 5d7975b72c..6b6ebea9fd 100644 --- a/include/pmacc/algorithms/TypeCast.hpp +++ b/include/pmacc/algorithms/TypeCast.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,29 +23,28 @@ namespace pmacc { -namespace algorithms -{ -namespace precisionCast -{ - -template -struct TypeCast -{ - typedef CastToType result; - - HDINLINE result operator()(const Type& value) const + namespace algorithms { - return static_cast(value); - } -}; + namespace precisionCast + { + template + struct TypeCast + { + typedef CastToType result; + HDINLINE result operator()(const Type& value) const + { + return static_cast(value); + } + }; -template -HDINLINE typename TypeCast::result precisionCast(const Type& value) -{ - return TypeCast ()(value); -} -} //namespace precisionCast -} //namespace algorithms -}//namespace pmacc + template + HDINLINE typename TypeCast::result precisionCast(const Type& value) + { + return TypeCast()(value); + } + + } // namespace precisionCast + } // namespace algorithms +} // namespace pmacc diff --git a/include/pmacc/algorithms/math.hpp b/include/pmacc/algorithms/math.hpp index 938dad66d3..c2c9304283 100644 --- a/include/pmacc/algorithms/math.hpp +++ b/include/pmacc/algorithms/math.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Alexander Debus +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Alexander Debus * * This file is part of PMacc. * @@ -24,40 +24,28 @@ #include "pmacc/types.hpp" #include "pmacc/algorithms/math/defines/abs.hpp" -#include "pmacc/algorithms/math/defines/sqrt.hpp" #include "pmacc/algorithms/math/defines/exp.hpp" -#include "pmacc/algorithms/math/defines/erf.hpp" #include "pmacc/algorithms/math/defines/trigo.hpp" #include "pmacc/algorithms/math/defines/cross.hpp" #include "pmacc/algorithms/math/defines/dot.hpp" #include "pmacc/algorithms/math/defines/comparison.hpp" #include "pmacc/algorithms/math/defines/floatingPoint.hpp" -#include "pmacc/algorithms/math/defines/pow.hpp" #include "pmacc/algorithms/math/defines/modf.hpp" -#include "pmacc/algorithms/math/defines/fmod.hpp" #include "pmacc/algorithms/math/defines/bessel.hpp" #include "pmacc/algorithms/math/defines/pi.hpp" #include "pmacc/algorithms/math/floatMath/abs.tpp" -#include "pmacc/algorithms/math/floatMath/sqrt.tpp" #include "pmacc/algorithms/math/floatMath/exp.tpp" -#include "pmacc/algorithms/math/floatMath/erf.tpp" #include "pmacc/algorithms/math/floatMath/trigo.tpp" #include "pmacc/algorithms/math/floatMath/comparison.tpp" #include "pmacc/algorithms/math/floatMath/floatingPoint.tpp" -#include "pmacc/algorithms/math/floatMath/pow.tpp" #include "pmacc/algorithms/math/floatMath/modf.tpp" -#include "pmacc/algorithms/math/floatMath/fmod.tpp" #include "pmacc/algorithms/math/floatMath/bessel.tpp" #include "pmacc/algorithms/math/doubleMath/abs.tpp" -#include "pmacc/algorithms/math/doubleMath/sqrt.tpp" #include "pmacc/algorithms/math/doubleMath/exp.tpp" -#include "pmacc/algorithms/math/doubleMath/erf.tpp" #include "pmacc/algorithms/math/doubleMath/trigo.tpp" #include "pmacc/algorithms/math/doubleMath/comparison.tpp" #include "pmacc/algorithms/math/doubleMath/floatingPoint.tpp" -#include "pmacc/algorithms/math/doubleMath/pow.tpp" #include "pmacc/algorithms/math/doubleMath/modf.tpp" -#include "pmacc/algorithms/math/doubleMath/fmod.tpp" #include "pmacc/algorithms/math/doubleMath/bessel.tpp" diff --git a/include/pmacc/algorithms/math/defines/abs.hpp b/include/pmacc/algorithms/math/defines/abs.hpp index 264d506156..98c8440480 100644 --- a/include/pmacc/algorithms/math/defines/abs.hpp +++ b/include/pmacc/algorithms/math/defines/abs.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,31 +23,16 @@ namespace pmacc { -namespace algorithms -{ - -namespace math -{ - -template -struct Abs; - -template -struct Abs2; - - -template -HDINLINE typename Abs< T1>::result abs(T1 value) -{ - return Abs< T1 > ()(value); -} - -template -HDINLINE typename Abs2< T1 >::result abs2(const T1& value) -{ - return Abs2< T1 > ()(value); -} - -} //namespace math -} //namespace algorithms -}//namespace pmacc + namespace math + { + template + struct Abs2; + + template + HDINLINE typename Abs2::result abs2(const T1& value) + { + return Abs2()(value); + } + + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/defines/bessel.hpp b/include/pmacc/algorithms/math/defines/bessel.hpp index 4a11f258ad..31f8d05ec0 100644 --- a/include/pmacc/algorithms/math/defines/bessel.hpp +++ b/include/pmacc/algorithms/math/defines/bessel.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Debus +/* Copyright 2016-2021 Alexander Debus * * This file is part of PMacc. * @@ -23,191 +23,147 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ -namespace bessel -{ - - /** Modified cylindrical Bessel function of first kind of order 1 - */ - template< typename T_Type > - struct I1; - - /** Modified cylindrical Bessel function of order 1 - * - * @param x input value - * @return float value - */ - template - HDINLINE typename I1::result i1( T_Type const & x ) - { - return I1< T_Type >( )( x ); - } - - /** Modified cylindrical Bessel function of first kind of order 0. - */ - template< typename T_Type > - struct I0; - - /** Modified cylindrical Bessel function of first kind of order 0. - * - * @param x input argument - * @return float value - */ - template< typename T_Type > - HDINLINE typename I0< T_Type >::result i0( T_Type const & x ) - { - return I0< T_Type >( )( x ); - } - - /** Bessel function of first kind of order 0 - */ - template< typename T_Type > - struct J0; - - /** Bessel function of first kind of order 0 - * - * @param x input argument - * @return float value - */ - template< typename T_Type > - HDINLINE typename J0< T_Type >::result j0( T_Type const & x ) - { - return J0< T_Type >( )( x ); - } - - /** Bessel function of first kind of order 1 - */ - template< typename T_Type > - struct J1; - - /** Bessel function of first kind of order 1 - * - * @param x input value - * @return float value - */ - template< typename T_Type > - HDINLINE typename J1< T_Type >::result j1( T_Type const & x ) - { - return J1< T_Type >( )( x ); - } - - /** Bessel function of first kind of order n - */ - template< - typename T_IntType, - typename T_FloatType - > - struct Jn; - - /** Bessel function of first kind of order n - * - * Calculate the value of the Bessel function - * of first kind of order n for the input argument. - * - * @param n nth order - * @param x input argument - * @return float value - */ - template< - typename T_IntType, - typename T_FloatType - > - HDINLINE - typename Jn< - T_IntType, - T_FloatType - >::result - jn( - T_IntType const & n, - T_FloatType const & x - ) - { - return Jn< - T_IntType, - T_FloatType - >( )( - n, - x - ); - } - - /** Bessel function of second kind of order 0 - * - */ - template< typename T_Type > - struct Y0; - - /**Bessel function of second kind of order 0 - * - * @param x input argument - * @return float value - */ - template< typename T_Type > - HDINLINE typename Y0< T_Type >::result y0( T_Type const & x ) - { - return Y0< T_Type >( )( x ); - } - - /* Bessel function of second kind of order 1. - */ - template< typename T_Type > - struct Y1; - - /** Bessel function of second kind of order 1 - * - * @param x input argument - * @return float value - */ - template< typename T_Type > - HDINLINE typename Y1< T_Type >::result y1( T_Type const & x ) - { - return Y1< T_Type >( )( x ); - } - - /** Bessel function of second kind of order n. - */ - template< - typename T_IntType, - typename T_FloatType - > - struct Yn; - - /** Bessel function of second kind of order n - * - * Calculate the value of the Bessel function - * of second kind of order n for the input argument. - * - * @param n nth order - * @param x input argument - * @return float value - */ - template< - typename T_IntType, - typename T_FloatType - > - HDINLINE - typename Yn< - T_IntType, - T_FloatType - >::result - yn( - T_IntType const & n, - T_FloatType const & x - ) + namespace math { - return Yn< - T_IntType, - T_FloatType - >( )( - n, - x - ); - } - -} //namespace bessel -} //namespace math -} //namespace algorithms -} //namespace pmacc + namespace bessel + { + /** Modified cylindrical Bessel function of first kind of order 1 + */ + template + struct I1; + + /** Modified cylindrical Bessel function of order 1 + * + * @param x input value + * @return float value + */ + template + HDINLINE typename I1::result i1(T_Type const& x) + { + return I1()(x); + } + + /** Modified cylindrical Bessel function of first kind of order 0. + */ + template + struct I0; + + /** Modified cylindrical Bessel function of first kind of order 0. + * + * @param x input argument + * @return float value + */ + template + HDINLINE typename I0::result i0(T_Type const& x) + { + return I0()(x); + } + + /** Bessel function of first kind of order 0 + */ + template + struct J0; + + /** Bessel function of first kind of order 0 + * + * @param x input argument + * @return float value + */ + template + HDINLINE typename J0::result j0(T_Type const& x) + { + return J0()(x); + } + + /** Bessel function of first kind of order 1 + */ + template + struct J1; + + /** Bessel function of first kind of order 1 + * + * @param x input value + * @return float value + */ + template + HDINLINE typename J1::result j1(T_Type const& x) + { + return J1()(x); + } + + /** Bessel function of first kind of order n + */ + template + struct Jn; + + /** Bessel function of first kind of order n + * + * Calculate the value of the Bessel function + * of first kind of order n for the input argument. + * + * @param n nth order + * @param x input argument + * @return float value + */ + template + HDINLINE typename Jn::result jn(T_IntType const& n, T_FloatType const& x) + { + return Jn()(n, x); + } + + /** Bessel function of second kind of order 0 + * + */ + template + struct Y0; + + /**Bessel function of second kind of order 0 + * + * @param x input argument + * @return float value + */ + template + HDINLINE typename Y0::result y0(T_Type const& x) + { + return Y0()(x); + } + + /* Bessel function of second kind of order 1. + */ + template + struct Y1; + + /** Bessel function of second kind of order 1 + * + * @param x input argument + * @return float value + */ + template + HDINLINE typename Y1::result y1(T_Type const& x) + { + return Y1()(x); + } + + /** Bessel function of second kind of order n. + */ + template + struct Yn; + + /** Bessel function of second kind of order n + * + * Calculate the value of the Bessel function + * of second kind of order n for the input argument. + * + * @param n nth order + * @param x input argument + * @return float value + */ + template + HDINLINE typename Yn::result yn(T_IntType const& n, T_FloatType const& x) + { + return Yn()(n, x); + } + + } // namespace bessel + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/defines/comparison.hpp b/include/pmacc/algorithms/math/defines/comparison.hpp index 2516a83cac..0f3986cf36 100644 --- a/include/pmacc/algorithms/math/defines/comparison.hpp +++ b/include/pmacc/algorithms/math/defines/comparison.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,52 +23,47 @@ namespace pmacc { -namespace algorithms -{ + namespace math + { + template + struct Max; -namespace math -{ + template + struct Min; -template -struct Max; + template + struct Max + { + typedef T result; -template -struct Min; + HDINLINE T operator()(T value1, T value2) + { + return value1 > value2 ? value1 : value2; + } + }; -template -struct Max -{ - typedef T result; + template + struct Min + { + typedef T result; - HDINLINE T operator()(T value1, T value2) - { - return value1 > value2 ? value1 : value2; - } -}; + HDINLINE T operator()(T value1, T value2) + { + return value1 < value2 ? value1 : value2; + } + }; -template -struct Min -{ - typedef T result; + template + HDINLINE typename Min::result min(const T1& value1, const T2& value2) + { + return Min()(value1, value2); + } - HDINLINE T operator()(T value1, T value2) - { - return value1 < value2 ? value1 : value2; - } -}; - -template -HDINLINE typename Min< T1,T2>::result min(const T1& value1,const T2& value2) -{ - return Min< T1,T2 > ()(value1,value2); -} - -template -HDINLINE typename Max< T1,T2 >::result max(const T1& value1,const T2& value2) -{ - return Max< T1,T2 > ()(value1,value2); -} + template + HDINLINE typename Max::result max(const T1& value1, const T2& value2) + { + return Max()(value1, value2); + } -} //namespace math -} //namespace algorithms -}//namespace pmacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/defines/cross.hpp b/include/pmacc/algorithms/math/defines/cross.hpp index 9b0d8e313c..06545acbd6 100644 --- a/include/pmacc/algorithms/math/defines/cross.hpp +++ b/include/pmacc/algorithms/math/defines/cross.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -24,21 +24,16 @@ namespace pmacc { -namespace algorithms -{ - -namespace math -{ + namespace math + { + template + struct Cross; -template -struct Cross; - -template -HDINLINE typename Cross< T1, T2 >::result cross(const T1& value, const T2& value2) -{ - return Cross< T1, T2 > ()(value, value2); -} -} //namespace math -} //namespace algorithms -}//namespace pmacc + template + HDINLINE typename Cross::result cross(const T1& value, const T2& value2) + { + return Cross()(value, value2); + } + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/defines/dot.hpp b/include/pmacc/algorithms/math/defines/dot.hpp index e0c7a37b96..b03c66205d 100644 --- a/include/pmacc/algorithms/math/defines/dot.hpp +++ b/include/pmacc/algorithms/math/defines/dot.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,20 +23,16 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ + namespace math + { + template + struct Dot; -template -struct Dot; - -template -HDINLINE typename Dot< T1, T2 >::result dot(const T1& value, const T2& value2) -{ - return Dot< T1, T2 > ()(value, value2); -} -} //namespace math -} //namespace algorithms -}//namespace pmacc + template + HDINLINE typename Dot::result dot(const T1& value, const T2& value2) + { + return Dot()(value, value2); + } + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/defines/erf.hpp b/include/pmacc/algorithms/math/defines/erf.hpp deleted file mode 100644 index 09709a642d..0000000000 --- a/include/pmacc/algorithms/math/defines/erf.hpp +++ /dev/null @@ -1,42 +0,0 @@ -/* Copyright 2014-2020 Axel Huebl - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -namespace pmacc -{ -namespace algorithms -{ -namespace math -{ - -template -struct Erf; - -template -HDINLINE static typename Erf< T1 >::result erf(const T1& value) -{ - return Erf< T1 > ()(value); -} - -} /* namespace math */ -} /* namespace algorithms */ -} /* namespace pmacc */ diff --git a/include/pmacc/algorithms/math/defines/exp.hpp b/include/pmacc/algorithms/math/defines/exp.hpp index 39db92761a..b793a6ff35 100644 --- a/include/pmacc/algorithms/math/defines/exp.hpp +++ b/include/pmacc/algorithms/math/defines/exp.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PMacc. * @@ -24,39 +24,16 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - - -template -struct Exp; - -template -struct Log; - -template -struct Log10; - -template -HDINLINE typename Exp< T1 >::result exp(const T1& value) -{ - return Exp< T1 > ()(value); -} - -template -HDINLINE typename Log< T1 >::result log(const T1& value) -{ - return Log< T1 > ()(value); -} - -template -HDINLINE typename Log10< T1 >::result log10(const T1& value) -{ - return Log10< T1 > ()(value); -} - -} //namespace math -} //namespace algorithms -}//namespace pmacc + namespace math + { + template + struct Log10; + + template + HDINLINE typename Log10::result log10(const T1& value) + { + return Log10()(value); + } + + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/defines/floatingPoint.hpp b/include/pmacc/algorithms/math/defines/floatingPoint.hpp index b78e674fa9..2d1e462b59 100644 --- a/include/pmacc/algorithms/math/defines/floatingPoint.hpp +++ b/include/pmacc/algorithms/math/defines/floatingPoint.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Alexander Grund +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -23,79 +23,50 @@ namespace pmacc { -namespace algorithms -{ - -namespace math -{ - -template -struct Floor; - -template -struct Ceil; + namespace math + { + template + struct Float2int_ru; -template -struct Float2int_ru; - -template -struct Float2int_rd; - -template -struct Float2int_rn; - -/** - * Rounds down (towards -inf) - */ -template -HDINLINE typename Floor< T1>::result floor(T1 value) -{ - return Floor< T1 > ()(value); -} + template + struct Float2int_rd; -/** - * Rounds up (towards +inf) - */ -template -HDINLINE typename Ceil< T1>::result ceil(T1 value) -{ - return Ceil< T1 > ()(value); -} + template + struct Float2int_rn; -/** - * Returns the smallest int value that is at least as big as value - * Note: Using values outside the range of an int is undefined - * @return integer value - */ -template -HDINLINE typename Float2int_ru< T1>::result float2int_ru(T1 value) -{ - return Float2int_ru< T1 > ()(value); -} + /** + * Returns the smallest int value that is at least as big as value + * Note: Using values outside the range of an int is undefined + * @return integer value + */ + template + HDINLINE typename Float2int_ru::result float2int_ru(T1 value) + { + return Float2int_ru()(value); + } -/** - * Returns the largest int value that is not greater than value - * Note: Using values outside the range of an int is undefined - * @return integer value - */ -template -HDINLINE typename Float2int_rd< T1>::result float2int_rd(T1 value) -{ - return Float2int_rd< T1 > ()(value); -} + /** + * Returns the largest int value that is not greater than value + * Note: Using values outside the range of an int is undefined + * @return integer value + */ + template + HDINLINE typename Float2int_rd::result float2int_rd(T1 value) + { + return Float2int_rd()(value); + } -/** - * Rounds towards the nearest value returning an int - * For the case of x.5 the even value is chosen from the 2 possible values - * Note: Using values outside the range of an int is undefined - * @return integer value - */ -template -HDINLINE typename Float2int_rn< T1>::result float2int_rn(T1 value) -{ - return Float2int_rn< T1 > ()(value); -} + /** + * Rounds towards the nearest value returning an int + * For the case of x.5 the even value is chosen from the 2 possible values + * Note: Using values outside the range of an int is undefined + * @return integer value + */ + template + HDINLINE typename Float2int_rn::result float2int_rn(T1 value) + { + return Float2int_rn()(value); + } -} //namespace math -} //namespace algorithms -}//namespace pmacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/defines/fmod.hpp b/include/pmacc/algorithms/math/defines/fmod.hpp deleted file mode 100644 index 604824d9a1..0000000000 --- a/include/pmacc/algorithms/math/defines/fmod.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright 2016-2020 Alexander Debus - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -namespace pmacc -{ -namespace algorithms -{ -namespace math -{ - -template -struct Fmod; - -/** - * Equivalent to the modulus-operator for float types - * returns the floating-point remainder of x / y. - * The functionality corresponds to the C++ - * math function fmod(). - * For details, see http://www.cplusplus.com/reference/cmath/fmod/ . - * @return float value - */ -template -HDINLINE typename Fmod< T1>::result fmod(T1 x, T1 y) -{ - return Fmod< T1 > ()(x, y); -} - -} //namespace math -} //namespace algorithms -} //namespace pmacc - diff --git a/include/pmacc/algorithms/math/defines/modf.hpp b/include/pmacc/algorithms/math/defines/modf.hpp index 9eac4f19be..f1a8a1c1df 100644 --- a/include/pmacc/algorithms/math/defines/modf.hpp +++ b/include/pmacc/algorithms/math/defines/modf.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PMacc. * @@ -23,21 +23,16 @@ namespace pmacc { -namespace algorithms -{ - -namespace math -{ + namespace math + { + template + struct Modf; -template -struct Modf; - -template -HDINLINE typename Modf::result modf(T value, T* intpart) -{ - return Modf()(value, intpart); -} + template + HDINLINE typename Modf::result modf(T value, T* intpart) + { + return Modf()(value, intpart); + } -} //namespace math -} //namespace algorithms -}//namespace pmacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/defines/pi.hpp b/include/pmacc/algorithms/math/defines/pi.hpp index 72f4636698..20479eaa39 100644 --- a/include/pmacc/algorithms/math/defines/pi.hpp +++ b/include/pmacc/algorithms/math/defines/pi.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Sergei Bastrakov +/* Copyright 2018-2021 Sergei Bastrakov * * This file is part of PMacc. * @@ -24,25 +24,19 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - - /** Values of pi and related constants as T_Type - */ - template< typename T_Type > - struct Pi + namespace math { - static constexpr T_Type value = static_cast< T_Type >( - 3.141592653589793238462643383279502884197169399 - ); - static constexpr T_Type doubleValue = static_cast< T_Type >( 2.0 ) * value; - static constexpr T_Type halfValue = value / static_cast< T_Type >( 2.0 ); - static constexpr T_Type quarterValue = value / static_cast< T_Type >( 4.0 ); - static constexpr T_Type doubleReciprocalValue = static_cast< T_Type >( 2.0 ) / value; - }; + /** Values of pi and related constants as T_Type + */ + template + struct Pi + { + static constexpr T_Type value = static_cast(3.141592653589793238462643383279502884197169399); + static constexpr T_Type doubleValue = static_cast(2.0) * value; + static constexpr T_Type halfValue = value / static_cast(2.0); + static constexpr T_Type quarterValue = value / static_cast(4.0); + static constexpr T_Type doubleReciprocalValue = static_cast(2.0) / value; + }; -} // namespace math -} // namespace algorithms + } // namespace math } // namespace pmacc diff --git a/include/pmacc/algorithms/math/defines/pow.hpp b/include/pmacc/algorithms/math/defines/pow.hpp deleted file mode 100644 index 0a26c66f72..0000000000 --- a/include/pmacc/algorithms/math/defines/pow.hpp +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -namespace pmacc -{ -namespace algorithms -{ - -namespace math -{ - -template -struct Pow; - - -/** Raised the base to the power exponent - * - * @param base base value - * @param exponent power exponent - * @return base rased to the power exponent - */ -template -HDINLINE typename Pow< T1, T2 >::result pow(const T1& base,const T2& exponent) -{ - return Pow< T1, T2 > ()(base, exponent); -} - -} //namespace math -} //namespace algorithms -}//namespace pmacc diff --git a/include/pmacc/algorithms/math/defines/sqrt.hpp b/include/pmacc/algorithms/math/defines/sqrt.hpp deleted file mode 100644 index b380760d4d..0000000000 --- a/include/pmacc/algorithms/math/defines/sqrt.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -namespace pmacc -{ -namespace algorithms -{ -namespace math -{ - -template -struct Sqrt; - -template -struct RSqrt; - - -template -HDINLINE typename Sqrt< T1 >::result sqrt(const T1& value) -{ - return Sqrt< T1 > ()(value); -} - -template -HDINLINE typename RSqrt< T1 >::result rsqrt(const T1& value) -{ - return RSqrt< T1 > ()(value); -} - -} //namespace math -} //namespace algorithms -}//namespace pmacc - diff --git a/include/pmacc/algorithms/math/defines/trigo.hpp b/include/pmacc/algorithms/math/defines/trigo.hpp index 41aadd105f..afb2ad4ed5 100644 --- a/include/pmacc/algorithms/math/defines/trigo.hpp +++ b/include/pmacc/algorithms/math/defines/trigo.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, Axel Huebl, Alexander Debus +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, Axel Huebl, Alexander Debus * * This file is part of PMacc. * @@ -23,112 +23,29 @@ namespace pmacc { -namespace algorithms -{ - -namespace math -{ - -template -struct Sin; - -template -struct ASin; - -template -struct Cos; - -template -struct ACos; + namespace math + { + template + struct SinCos; -template -struct Tan; + template + struct Sinc; -template -struct ATan; -template -struct Atan2; - -template -struct SinCos; - -template -struct Sinc; - - -template -HDINLINE -typename Sin< T1 >::result -sin(const T1& value) -{ - return Sin< T1 > ()(value); -} - -template -HDINLINE -typename ASin< T1 >::result -asin(const T1& value) -{ - return ASin< T1 > ()(value); -} + template + HDINLINE typename SinCos::result sincos( + ArgType arg, + SinType& sinValue, + CosType& cosValue) + { + return SinCos()(arg, sinValue, cosValue); + } -template -HDINLINE -typename Cos::result -cos(const T1& value) -{ - return Cos< T1 > ()(value); -} - -template -HDINLINE -typename ACos::result -acos(const T1& value) -{ - return ACos< T1 > ()(value); -} - -template -HDINLINE -typename Tan::result -tan(const T1& value) -{ - return Tan< T1 > ()(value); -} - -template -HDINLINE -typename ATan::result -atan(const T1& value) -{ - return ATan< T1 > ()(value); -} - -template -HDINLINE -typename SinCos< ArgType, SinType, CosType >::result -sincos(ArgType arg, SinType& sinValue, CosType& cosValue) -{ - return SinCos< ArgType, SinType, CosType > ()(arg, sinValue, cosValue); -} - -template -HDINLINE -typename Sinc::result -sinc(const T1& value) -{ - return Sinc< T1 > ()(value); -} - -template -HDINLINE -typename Atan2::result -atan2(const T1& val1, const T1& val2) -{ - return Atan2< T1 > ()(val1, val2); -} + template + HDINLINE typename Sinc::result sinc(const T1& value) + { + return Sinc()(value); + } -} /* namespace math */ -} /* namespace algorithms */ + } /* namespace math */ } /* namespace pmacc */ diff --git a/include/pmacc/algorithms/math/doubleMath/abs.tpp b/include/pmacc/algorithms/math/doubleMath/abs.tpp index ea9c2d72b3..429ae0e70e 100644 --- a/include/pmacc/algorithms/math/doubleMath/abs.tpp +++ b/include/pmacc/algorithms/math/doubleMath/abs.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PMacc. * @@ -28,40 +28,18 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -template<> -struct Abs -{ - typedef double result; - - HDINLINE double operator( )(double value) + namespace math { -#ifdef __CUDA_ARCH__ - return ::fabs( value ); -#else - /* \bug on cpu `::abs(double)` always return zero -> maybe this is the - * integer version of `abs()` - */ - return std::abs( value ); -#endif - } -}; - -template<> -struct Abs2 -{ - typedef double result; - - HDINLINE double operator( )(const double& value ) - { - return value*value; - } -}; - -} //namespace math -} //namespace algorithms + template<> + struct Abs2 + { + typedef double result; + + HDINLINE double operator()(const double& value) + { + return value * value; + } + }; + + } // namespace math } // namespace pmacc diff --git a/include/pmacc/algorithms/math/doubleMath/bessel.tpp b/include/pmacc/algorithms/math/doubleMath/bessel.tpp index 66ba789060..2e96ec556a 100644 --- a/include/pmacc/algorithms/math/doubleMath/bessel.tpp +++ b/include/pmacc/algorithms/math/doubleMath/bessel.tpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Debus +/* Copyright 2016-2021 Alexander Debus * * This file is part of PMacc. * @@ -27,176 +27,130 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ -namespace bessel -{ - - template< > - struct I0< double > + namespace math { - using result = double; - - HDINLINE result operator( )( result const & x ) + namespace bessel { -#if __CUDA_ARCH__ - return ::cyl_bessel_i0( x ); + template<> + struct I0 + { + using result = double; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::cyl_bessel_i0(x); #else - return boost::math::cyl_bessel_i( - 0, - x - ); + return boost::math::cyl_bessel_i(0, x); #endif - } - }; - - template< > - struct I1< double > - { - using result = double; - - HDINLINE result operator( )( result const & x ) - { -#if __CUDA_ARCH__ - return ::cyl_bessel_i1( x ); + } + }; + + template<> + struct I1 + { + using result = double; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::cyl_bessel_i1(x); #else - return boost::math::cyl_bessel_i( - 1, - x - ); + return boost::math::cyl_bessel_i(1, x); #endif - } - }; - - template< > - struct J0< double > - { - using result = double; - - HDINLINE result operator( )( result const & x ) - { -#if __CUDA_ARCH__ - return ::j0( x ); + } + }; + + template<> + struct J0 + { + using result = double; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::j0(x); #else - return boost::math::cyl_bessel_j( - 0, - x - ); + return boost::math::cyl_bessel_j(0, x); #endif - } - }; - - template< > - struct J1< double > - { - using result = double; - - HDINLINE result operator( )( result const & x ) - { -#if __CUDA_ARCH__ - return ::j1( x ); + } + }; + + template<> + struct J1 + { + using result = double; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::j1(x); #else - return boost::math::cyl_bessel_j( - 1, - x - ); + return boost::math::cyl_bessel_j(1, x); #endif - } - }; - - template< > - struct Jn< - int, - double - > - { - using result = double; - - HDINLINE result operator( )( - int const & n, - result const & x - ) - { -#if __CUDA_ARCH__ - return ::jn( - n, - x - ); + } + }; + + template<> + struct Jn + { + using result = double; + + HDINLINE result operator()(int const& n, result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::jn(n, x); #else - return boost::math::cyl_bessel_j( - n, - x - ); + return boost::math::cyl_bessel_j(n, x); #endif - } - }; - - template< > - struct Y0< double > - { - using result = double; - - HDINLINE result operator( )( result const & x ) - { -#if __CUDA_ARCH__ - return ::y0( x ); + } + }; + + template<> + struct Y0 + { + using result = double; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::y0(x); #else - return boost::math::cyl_neumann( - 0, - x - ); + return boost::math::cyl_neumann(0, x); #endif - } - }; - - template< > - struct Y1< double > - { - using result = double; - - HDINLINE result operator( )( result const & x ) - { -#if __CUDA_ARCH__ - return ::y1( x ); + } + }; + + template<> + struct Y1 + { + using result = double; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::y1(x); #else - return boost::math::cyl_neumann( - 1, - x - ); + return boost::math::cyl_neumann(1, x); #endif - } - }; - - template< > - struct Yn< - int, - double - > - { - using result = double; - - HDINLINE result operator( )( - int const & n, - result const & x - ) - { -#if __CUDA_ARCH__ - return ::yn( - n, - x - ); + } + }; + + template<> + struct Yn + { + using result = double; + + HDINLINE result operator()(int const& n, result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::yn(n, x); #else - return boost::math::cyl_neumann( - n, - x - ); + return boost::math::cyl_neumann(n, x); #endif - } - }; + } + }; -} //namespace bessel -} //namespace math -} //namespace algorithms -} //namespace pmacc + } // namespace bessel + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/doubleMath/comparison.tpp b/include/pmacc/algorithms/math/doubleMath/comparison.tpp index 65529abcee..2771f1db22 100644 --- a/include/pmacc/algorithms/math/doubleMath/comparison.tpp +++ b/include/pmacc/algorithms/math/doubleMath/comparison.tpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Benjamin Worpitz, Richard Pausch +/* Copyright 2015-2021 Benjamin Worpitz, Richard Pausch * * This file is part of PMacc. * @@ -27,33 +27,29 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -template<> -struct Min -{ - typedef double result; - - HDINLINE double operator()(double value1, double value2) + namespace math { - return ::fmin(value1, value2); - } -}; - -template<> -struct Max -{ - typedef double result; - - HDINLINE double operator()(double value1, double value2) - { - return ::fmax(value1, value2); - } -}; - -} //namespace math -} //namespace algorithms -} //namespace pmacc + template<> + struct Min + { + typedef double result; + + HDINLINE double operator()(double value1, double value2) + { + return ::fmin(value1, value2); + } + }; + + template<> + struct Max + { + typedef double result; + + HDINLINE double operator()(double value1, double value2) + { + return ::fmax(value1, value2); + } + }; + + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/doubleMath/erf.tpp b/include/pmacc/algorithms/math/doubleMath/erf.tpp deleted file mode 100644 index 3e449de147..0000000000 --- a/include/pmacc/algorithms/math/doubleMath/erf.tpp +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright 2014-2020 Axel Huebl, Richard Pausch - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -#include "pmacc/types.hpp" -#include - - -namespace pmacc -{ -namespace algorithms -{ -namespace math -{ - -template<> -struct Erf -{ - typedef double result; - - HDINLINE double operator( )(const double& value ) - { - return ::erf( value ); - } -}; - -} /* namespace math */ -} /* namespace algorithms */ -} /* namespace pmacc */ diff --git a/include/pmacc/algorithms/math/doubleMath/exp.tpp b/include/pmacc/algorithms/math/doubleMath/exp.tpp index db659d473f..4987ed38c4 100644 --- a/include/pmacc/algorithms/math/doubleMath/exp.tpp +++ b/include/pmacc/algorithms/math/doubleMath/exp.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PMacc. * @@ -28,45 +28,19 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -template<> -struct Exp -{ - typedef double result; - - HDINLINE double operator( )(const double& value ) - { - return ::exp( value ); - } -}; - -template<> -struct Log -{ - typedef double result; - - HDINLINE double operator( )(const double& value ) + namespace math { - return ::log( value ); - } -}; + template<> + struct Log10 + { + typedef double result; -template<> -struct Log10 -{ - typedef double result; - - HDINLINE double operator( )(const double& value) - { - return ::log10( value ); - } -}; + HDINLINE double operator()(const double& value) + { + return ::log10(value); + } + }; -} //namespace math -} //namespace algorithms + } // namespace math } // namespace pmacc diff --git a/include/pmacc/algorithms/math/doubleMath/floatingPoint.tpp b/include/pmacc/algorithms/math/doubleMath/floatingPoint.tpp index 33cc6bb6a5..a1e3519a37 100644 --- a/include/pmacc/algorithms/math/doubleMath/floatingPoint.tpp +++ b/include/pmacc/algorithms/math/doubleMath/floatingPoint.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, * Alexander Grund * * This file is part of PMacc. @@ -29,102 +29,76 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -template<> -struct Floor -{ - typedef double result; - - HDINLINE result operator( )(result value) + namespace math { - return ::floor( value ); - } -}; - -template<> -struct Ceil -{ - typedef double result; - - HDINLINE result operator( )(result value) - { - return ::ceil( value ); - } -}; - -template<> -struct Float2int_ru -{ - typedef int result; + template<> + struct Float2int_ru + { + typedef int result; - HDINLINE result operator( )(double value) - { -#if __CUDA_ARCH__ - return ::__double2int_ru( value ); + HDINLINE result operator()(double value) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::__double2int_ru(value); #else - return static_cast(ceil(value)); + return static_cast(ceil(value)); #endif - } -}; + } + }; -template<> -struct Float2int_rd -{ - typedef int result; + template<> + struct Float2int_rd + { + typedef int result; - HDINLINE result operator( )(double value) - { -#if __CUDA_ARCH__ - return ::__double2int_rd( value ); + HDINLINE result operator()(double value) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::__double2int_rd(value); #else - return static_cast(floor(value)); + return static_cast(floor(value)); #endif - } -}; + } + }; -template<> -struct Float2int_rn -{ - typedef int result; + template<> + struct Float2int_rn + { + typedef int result; - HDINLINE result operator( )(double value) - { -#if __CUDA_ARCH__ - return ::__double2int_rn( value ); + HDINLINE result operator()(double value) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::__double2int_rn(value); #else - if(value < 0.0) - return -(*this)(-value); - double intPart; - double fracPart = std::modf(value, &intPart); - result res = static_cast(intPart); - /* epsilon in the following code is used to consider values - * "very close" to x.5 also as x.5 - */ - if(fracPart > 0.5 + std::numeric_limits::epsilon()) - { - /* >x.5 --> Round up */ - res = res + 1; - } - else if(!(fracPart < 0.5 - std::numeric_limits::epsilon())) - { - /* We are NOT >x.5 AND NOT ==x.5 --> use x if x is even, else x+1 - * The "&~1" cancels the last bit which results in an even value - * res is even -> res+1 is odd -> (res+1)&~1 = res - * res is odd -> res+1 is even -> (res+1)&~1 = res+1 - */ - res = (res + 1) & ~1; - } - /* else res = res (round down) */ - return res; + if(value < 0.0) + return -(*this)(-value); + double intPart; + double fracPart = std::modf(value, &intPart); + result res = static_cast(intPart); + /* epsilon in the following code is used to consider values + * "very close" to x.5 also as x.5 + */ + if(fracPart > 0.5 + std::numeric_limits::epsilon()) + { + /* >x.5 --> Round up */ + res = res + 1; + } + else if(!(fracPart < 0.5 - std::numeric_limits::epsilon())) + { + /* We are NOT >x.5 AND NOT ==x.5 --> use x if x is even, else x+1 + * The "&~1" cancels the last bit which results in an even value + * res is even -> res+1 is odd -> (res+1)&~1 = res + * res is odd -> res+1 is even -> (res+1)&~1 = res+1 + */ + res = (res + 1) & ~1; + } + /* else res = res (round down) */ + return res; #endif - } -}; + } + }; -} //namespace math -} //namespace algorithms + } // namespace math } // namespace pmacc diff --git a/include/pmacc/algorithms/math/doubleMath/fmod.tpp b/include/pmacc/algorithms/math/doubleMath/fmod.tpp deleted file mode 100644 index aa02abc8e3..0000000000 --- a/include/pmacc/algorithms/math/doubleMath/fmod.tpp +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright 2016-2020 Alexander Debus - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -#include "pmacc/types.hpp" -#include - -namespace pmacc -{ -namespace algorithms -{ -namespace math -{ - -template<> -struct Fmod -{ - typedef double result; - - HDINLINE result operator( )(result x, result y) - { -#if __CUDA_ARCH__ - return ::fmod(x, y); -#else - return std::fmod(x, y); -#endif - } -}; - -} //namespace math -} //namespace algorithms -} //namespace pmacc - diff --git a/include/pmacc/algorithms/math/doubleMath/modf.tpp b/include/pmacc/algorithms/math/doubleMath/modf.tpp index 10ec3c632d..89774db6f4 100644 --- a/include/pmacc/algorithms/math/doubleMath/modf.tpp +++ b/include/pmacc/algorithms/math/doubleMath/modf.tpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PMacc. * @@ -26,26 +26,22 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -template<> -struct Modf -{ - typedef double result; - - HDINLINE double operator()(double value, double* intpart) + namespace math { -#if __CUDA_ARCH__ - return ::modf(value, intpart); + template<> + struct Modf + { + typedef double result; + + HDINLINE double operator()(double value, double* intpart) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::modf(value, intpart); #else - return std::modf(value, intpart); + return std::modf(value, intpart); #endif - } -}; + } + }; -} //namespace math -} //namespace algorithms + } // namespace math } // namespace pmacc diff --git a/include/pmacc/algorithms/math/doubleMath/pow.tpp b/include/pmacc/algorithms/math/doubleMath/pow.tpp deleted file mode 100644 index 73e169a2fe..0000000000 --- a/include/pmacc/algorithms/math/doubleMath/pow.tpp +++ /dev/null @@ -1,69 +0,0 @@ -/* Copyright 2013-2020 Rene Widera, Alexander Grund - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - - -#pragma once - -#include "pmacc/types.hpp" -#include - -namespace pmacc -{ -namespace algorithms -{ -namespace math -{ -/*C++98 standard define a separate version for int and double exponent*/ - -template<> -struct Pow -{ - typedef double result; - - HDINLINE result operator()(const double& base, const double& exponent) - { - return ::pow(base, exponent); - } -}; - -template<> -struct Pow -{ - typedef double result; - - HDINLINE result operator()(const double& base, const int& exponent) - { -#ifdef __CUDA_ARCH__ /*device version*/ - /* @todo: There is an incompatibility with C++11 + CUDA + GCC where no device function - * pow(double, int) is defined: http://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-tools-title-known - * Use the pow(double, double) instead which reduces performance or implement an own (faster) version - */ - return ::pow(base, static_cast(exponent)); -#else - return ::pow(base, exponent); -#endif - } -}; - - -} //namespace math -} //namespace algorithms -} // namespace pmacc diff --git a/include/pmacc/algorithms/math/doubleMath/sqrt.tpp b/include/pmacc/algorithms/math/doubleMath/sqrt.tpp deleted file mode 100644 index f5a25da0fb..0000000000 --- a/include/pmacc/algorithms/math/doubleMath/sqrt.tpp +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz, - * Richard Pausch - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - - -#pragma once - -#include "pmacc/types.hpp" -#include - -namespace pmacc -{ -namespace algorithms -{ -namespace math -{ - -template<> -struct Sqrt -{ - typedef double result; - - HDINLINE double operator( )(const double& value ) - { - return ::sqrt( value ); - } -}; - -template<> -struct RSqrt -{ - typedef double result; - - HDINLINE double operator( )(const double& value ) - { -#if !defined(__CUDACC__) - return 1.0/::sqrt(value); -#else - return ::rsqrt(value); -#endif - } -}; - -} //namespace math -} //namespace algorithms -} // namespace pmacc diff --git a/include/pmacc/algorithms/math/doubleMath/trigo.tpp b/include/pmacc/algorithms/math/doubleMath/trigo.tpp index 79e8e51826..acee91c2fc 100644 --- a/include/pmacc/algorithms/math/doubleMath/trigo.tpp +++ b/include/pmacc/algorithms/math/doubleMath/trigo.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, * Axel Huebl, Alexander Debus * * This file is part of PMacc. @@ -28,119 +28,38 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -template<> -struct Sin -{ - typedef double result; - - HDINLINE double operator( )(const double& value ) - { - return ::sin( value ); - } -}; - -template<> -struct ASin -{ - typedef double result; - - HDINLINE double operator( )(const double& value) - { - return ::asin( value ); - } -}; - -template<> -struct Cos -{ - typedef double result; - - HDINLINE double operator( )(const double& value ) - { - return ::cos( value ); - } -}; - -template<> -struct ACos -{ - typedef double result; - - HDINLINE double operator( )(const double& value) + namespace math { - return ::acos( value ); - } -}; + template<> + struct SinCos + { + typedef void result; -template<> -struct Tan -{ - typedef double result; - - HDINLINE double operator( )(const double& value ) - { - return ::tan( value ); - } -}; - -template<> -struct ATan -{ - typedef double result; - - HDINLINE double operator( )(const double& value) - { - return ::atan( value ); - } -}; - -template<> -struct SinCos -{ - typedef void result; - - HDINLINE void operator( )(double arg, double& sinValue, double& cosValue ) - { + HDINLINE void operator()(double arg, double& sinValue, double& cosValue) + { #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) - sinValue = ::sin(arg); - cosValue = ::cos(arg); + sinValue = cupla::math::sin(arg); + cosValue = cupla::math::cos(arg); #else - ::sincos(arg, &sinValue, &cosValue); + ::sincos(arg, &sinValue, &cosValue); #endif - } -}; - - -template<> -struct Sinc -{ - typedef double result; + } + }; - HDINLINE double operator( )(const double& value ) - { - if(pmacc::algorithms::math::abs(value) < DBL_EPSILON) - return 1.0; - else - return pmacc::algorithms::math::sin( value )/value; - } -}; -template<> -struct Atan2 -{ - typedef double result; + template<> + struct Sinc + { + typedef double result; - HDINLINE double operator( )(const double& val1, const double& val2 ) - { - return ::atan2( val1, val2 ); - } -}; + HDINLINE double operator()(const double& value) + { + if(cupla::math::abs(value) < DBL_EPSILON) + return 1.0; + else + return cupla::math::sin(value) / value; + } + }; -} //namespace math -} //namespace algorithms + } // namespace math } // namespace pmacc diff --git a/include/pmacc/algorithms/math/floatMath/abs.tpp b/include/pmacc/algorithms/math/floatMath/abs.tpp index befec3d6db..82b92abc01 100644 --- a/include/pmacc/algorithms/math/floatMath/abs.tpp +++ b/include/pmacc/algorithms/math/floatMath/abs.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PMacc. * @@ -27,34 +27,18 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - - -template<> -struct Abs -{ - typedef float result; - - HDINLINE float operator( )(float value) + namespace math { - return ::fabsf( value ); - } -}; - -template<> -struct Abs2 -{ - typedef float result; - - HDINLINE float operator( )(const float& value ) - { - return value*value; - } -}; - -} //namespace math -} //namespace algorithms + template<> + struct Abs2 + { + typedef float result; + + HDINLINE float operator()(const float& value) + { + return value * value; + } + }; + + } // namespace math } // namespace pmacc diff --git a/include/pmacc/algorithms/math/floatMath/bessel.tpp b/include/pmacc/algorithms/math/floatMath/bessel.tpp index 2fede97e33..b0597e7d50 100644 --- a/include/pmacc/algorithms/math/floatMath/bessel.tpp +++ b/include/pmacc/algorithms/math/floatMath/bessel.tpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Debus +/* Copyright 2016-2021 Alexander Debus * * This file is part of PMacc. * @@ -27,176 +27,130 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ -namespace bessel -{ - - template< > - struct I0< float > + namespace math { - using result = float; - - HDINLINE result operator( )( result const & x ) + namespace bessel { -#if __CUDA_ARCH__ - return ::cyl_bessel_i0f( x ); + template<> + struct I0 + { + using result = float; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::cyl_bessel_i0f(x); #else - return boost::math::cyl_bessel_i( - 0, - x - ); + return boost::math::cyl_bessel_i(0, x); #endif - } - }; - - template< > - struct I1< float > - { - using result = float; - - HDINLINE result operator( )( result const & x ) - { -#if __CUDA_ARCH__ - return ::cyl_bessel_i1f( x ); + } + }; + + template<> + struct I1 + { + using result = float; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::cyl_bessel_i1f(x); #else - return boost::math::cyl_bessel_i( - 1, - x - ); + return boost::math::cyl_bessel_i(1, x); #endif - } - }; - - template< > - struct J0< float > - { - using result = float; - - HDINLINE result operator( )( result const & x ) - { -#if __CUDA_ARCH__ - return ::j0f( x ); + } + }; + + template<> + struct J0 + { + using result = float; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu_ + return ::j0f(x); #else - return boost::math::cyl_bessel_j( - 0, - x - ); + return boost::math::cyl_bessel_j(0, x); #endif - } - }; - - template< > - struct J1< float > - { - using result = float; - - HDINLINE result operator( )( result const & x ) - { -#if __CUDA_ARCH__ - return ::j1f( x ); + } + }; + + template<> + struct J1 + { + using result = float; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::j1f(x); #else - return boost::math::cyl_bessel_j( - 1, - x - ); + return boost::math::cyl_bessel_j(1, x); #endif - } - }; - - template< > - struct Jn< - int, - float - > - { - using result = float; - - HDINLINE result operator( )( - int const & n, - result const & x - ) - { -#if __CUDA_ARCH__ - return ::jnf( - n, - x - ); + } + }; + + template<> + struct Jn + { + using result = float; + + HDINLINE result operator()(int const& n, result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::jnf(n, x); #else - return boost::math::cyl_bessel_j( - n, - x - ); + return boost::math::cyl_bessel_j(n, x); #endif - } - }; - - template< > - struct Y0< float > - { - using result = float; - - HDINLINE result operator( )( result const & x ) - { -#if __CUDA_ARCH__ - return ::y0f( x ); + } + }; + + template<> + struct Y0 + { + using result = float; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::y0f(x); #else - return boost::math::cyl_neumann( - 0, - x - ); + return boost::math::cyl_neumann(0, x); #endif - } - }; - - template< > - struct Y1< float > - { - using result = float; - - HDINLINE result operator( )( result const & x ) - { -#if __CUDA_ARCH__ - return ::y1f( x ); + } + }; + + template<> + struct Y1 + { + using result = float; + + HDINLINE result operator()(result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::y1f(x); #else - return boost::math::cyl_neumann( - 1, - x - ); + return boost::math::cyl_neumann(1, x); #endif - } - }; - - template< > - struct Yn< - int, - float - > - { - using result = float; - - HDINLINE result operator( )( - int const & n, - result const & x - ) - { -#if __CUDA_ARCH__ - return ::ynf( - n, - x - ); + } + }; + + template<> + struct Yn + { + using result = float; + + HDINLINE result operator()(int const& n, result const& x) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::ynf(n, x); #else - return boost::math::cyl_neumann( - n, - x - ); + return boost::math::cyl_neumann(n, x); #endif - } - }; + } + }; -} //namespace bessel -} //namespace math -} //namespace algorithms -} //namespace pmacc + } // namespace bessel + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/floatMath/comparison.tpp b/include/pmacc/algorithms/math/floatMath/comparison.tpp index 0897fbe393..ca27ada2ea 100644 --- a/include/pmacc/algorithms/math/floatMath/comparison.tpp +++ b/include/pmacc/algorithms/math/floatMath/comparison.tpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Benjamin Worpitz, Richard Pausch +/* Copyright 2015-2021 Benjamin Worpitz, Richard Pausch * * This file is part of PMacc. * @@ -27,33 +27,29 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -template<> -struct Min -{ - typedef float result; - - HDINLINE float operator()(float value1, float value2) + namespace math { - return ::fminf(value1, value2); - } -}; - -template<> -struct Max -{ - typedef float result; - - HDINLINE float operator()(float value1, float value2) - { - return ::fmaxf(value1, value2); - } -}; - -} //namespace math -} //namespace algorithms -} //namespace pmacc + template<> + struct Min + { + typedef float result; + + HDINLINE float operator()(float value1, float value2) + { + return ::fminf(value1, value2); + } + }; + + template<> + struct Max + { + typedef float result; + + HDINLINE float operator()(float value1, float value2) + { + return ::fmaxf(value1, value2); + } + }; + + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/algorithms/math/floatMath/erf.tpp b/include/pmacc/algorithms/math/floatMath/erf.tpp deleted file mode 100644 index 10535fdf4d..0000000000 --- a/include/pmacc/algorithms/math/floatMath/erf.tpp +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright 2014-2020 Axel Huebl, Richard Pausch - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -#include "pmacc/types.hpp" -#include - - -namespace pmacc -{ -namespace algorithms -{ -namespace math -{ - -template<> -struct Erf -{ - typedef float result; - - HDINLINE float operator( )(const float& value ) - { - return ::erff( value ); - } -}; - -} /* namespace math */ -} /* namespace algorithms */ -} /* namespace pmacc */ diff --git a/include/pmacc/algorithms/math/floatMath/exp.tpp b/include/pmacc/algorithms/math/floatMath/exp.tpp index 19878a6202..d648fc8894 100644 --- a/include/pmacc/algorithms/math/floatMath/exp.tpp +++ b/include/pmacc/algorithms/math/floatMath/exp.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PMacc. * @@ -28,48 +28,22 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -template<> -struct Exp -{ - typedef float result; - - HDINLINE float operator( )(const float& value ) - { - return ::expf( value ); - } -}; - -template<> -struct Log -{ - typedef float result; - - HDINLINE float operator( )(const float& value ) - { - return ::logf( value ); - } -}; - -template<> -struct Log10 -{ - typedef float result; - - HDINLINE float operator( )(const float& value) + namespace math { -#if __CUDA_ARCH__ - return ::log10f( value ); + template<> + struct Log10 + { + typedef float result; + + HDINLINE float operator()(const float& value) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::log10f(value); #else - return ::log10( value ); + return ::log10(value); #endif - } -}; + } + }; -} //namespace math -} //namespace algorithms + } // namespace math } // namespace pmacc diff --git a/include/pmacc/algorithms/math/floatMath/floatingPoint.tpp b/include/pmacc/algorithms/math/floatMath/floatingPoint.tpp index a6d41ec318..11e88907d9 100644 --- a/include/pmacc/algorithms/math/floatMath/floatingPoint.tpp +++ b/include/pmacc/algorithms/math/floatMath/floatingPoint.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, * Alexander Grund * * This file is part of PMacc. @@ -29,101 +29,75 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -template<> -struct Floor -{ - typedef float result; - - HDINLINE result operator( )(result value) + namespace math { - return ::floorf( value ); - } -}; - -template<> -struct Ceil -{ - typedef float result; - - HDINLINE result operator( )(result value) - { - return ::ceil( value ); - } -}; - -template<> -struct Float2int_ru -{ - typedef int result; + template<> + struct Float2int_ru + { + typedef int result; - HDINLINE result operator( )(float value) - { -#if __CUDA_ARCH__ - return ::__float2int_ru( value ); + HDINLINE result operator()(float value) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::__float2int_ru(value); #else - return static_cast(ceil(value)); + return static_cast(ceil(value)); #endif - } -}; + } + }; -template<> -struct Float2int_rd -{ - typedef int result; + template<> + struct Float2int_rd + { + typedef int result; - HDINLINE result operator( )(float value) - { -#if __CUDA_ARCH__ - return ::__float2int_rd( value ); + HDINLINE result operator()(float value) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::__float2int_rd(value); #else - return static_cast(floor(value)); + return static_cast(floor(value)); #endif - } -}; + } + }; -template<> -struct Float2int_rn -{ - typedef int result; + template<> + struct Float2int_rn + { + typedef int result; - HDINLINE result operator( )(float value) - { -#if __CUDA_ARCH__ - return ::__float2int_rn( value ); + HDINLINE result operator()(float value) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::__float2int_rn(value); #else - if(value < 0.0f) - return -(*this)(-value); - float intPart; - float fracPart = std::modf(value, &intPart); - result res = static_cast(intPart); - /* epsilon in the following code is used to consider values - * "very close" to x.5 also as x.5 - */ - if(fracPart > 0.5f + std::numeric_limits::epsilon()) - { - /* >x.5 --> Round up */ - res = res + 1; - } - else if(!(fracPart < 0.5f - std::numeric_limits::epsilon())) - { - /* We are NOT >x.5 AND NOT ==x.5 --> use x if x is even, else x+1 - * The "&~1" cancels the last bit which results in an even value - * res is even -> res+1 is odd -> (res+1)&~1 = res - * res is odd -> res+1 is even -> (res+1)&~1 = res+1 - */ - res = (res + 1) & ~1; - } - /* else res = res (round down) */ - return res; + if(value < 0.0f) + return -(*this)(-value); + float intPart; + float fracPart = std::modf(value, &intPart); + result res = static_cast(intPart); + /* epsilon in the following code is used to consider values + * "very close" to x.5 also as x.5 + */ + if(fracPart > 0.5f + std::numeric_limits::epsilon()) + { + /* >x.5 --> Round up */ + res = res + 1; + } + else if(!(fracPart < 0.5f - std::numeric_limits::epsilon())) + { + /* We are NOT >x.5 AND NOT ==x.5 --> use x if x is even, else x+1 + * The "&~1" cancels the last bit which results in an even value + * res is even -> res+1 is odd -> (res+1)&~1 = res + * res is odd -> res+1 is even -> (res+1)&~1 = res+1 + */ + res = (res + 1) & ~1; + } + /* else res = res (round down) */ + return res; #endif - } -}; + } + }; -} //namespace math -} //namespace algorithms + } // namespace math } // namespace pmacc diff --git a/include/pmacc/algorithms/math/floatMath/fmod.tpp b/include/pmacc/algorithms/math/floatMath/fmod.tpp deleted file mode 100644 index 47ff15fc99..0000000000 --- a/include/pmacc/algorithms/math/floatMath/fmod.tpp +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright 2016-2020 Alexander Debus - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -#include "pmacc/types.hpp" -#include - -namespace pmacc -{ -namespace algorithms -{ -namespace math -{ - -template<> -struct Fmod -{ - typedef float result; - - HDINLINE result operator( )(result x, result y) - { -#if __CUDA_ARCH__ - return ::fmodf(x, y); -#else - return std::fmod(x, y); -#endif - } -}; - -} //namespace math -} //namespace algorithms -} //namespace pmacc - diff --git a/include/pmacc/algorithms/math/floatMath/modf.tpp b/include/pmacc/algorithms/math/floatMath/modf.tpp index afaa41412f..818298e028 100644 --- a/include/pmacc/algorithms/math/floatMath/modf.tpp +++ b/include/pmacc/algorithms/math/floatMath/modf.tpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PMacc. * @@ -26,26 +26,22 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -template<> -struct Modf -{ - typedef float result; - - HDINLINE float operator()(float value, float* intpart) + namespace math { -#if __CUDA_ARCH__ - return ::modff(value, intpart); + template<> + struct Modf + { + typedef float result; + + HDINLINE float operator()(float value, float* intpart) + { +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu + return ::modff(value, intpart); #else - return std::modf(value, intpart); + return std::modf(value, intpart); #endif - } -}; + } + }; -} //namespace math -} //namespace algorithms + } // namespace math } // namespace pmacc diff --git a/include/pmacc/algorithms/math/floatMath/pow.tpp b/include/pmacc/algorithms/math/floatMath/pow.tpp deleted file mode 100644 index 43b1433ea6..0000000000 --- a/include/pmacc/algorithms/math/floatMath/pow.tpp +++ /dev/null @@ -1,76 +0,0 @@ -/* Copyright 2013-2020 Rene Widera, Alexander Grund - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - - -#pragma once - -#include "pmacc/types.hpp" -#include - -namespace pmacc -{ -namespace algorithms -{ -namespace math -{ - -/*C++98 standard define a separate version for int and float exponent*/ - -template<> -struct Pow -{ - typedef float result; - - HDINLINE result operator()(const float& base, const float& exponent) - { -#ifdef __CUDA_ARCH__ /*device version*/ - /* CUDA seems to have an optimized version for powf which is faster and (maybe) less accurate. */ - return ::powf(base, exponent); -#else - return ::pow(base, exponent); -#endif - - } -}; - -template<> -struct Pow -{ - typedef float result; - - HDINLINE result operator()(const float& base, const int& exponent) - { -#ifdef __CUDA_ARCH__ /*device version*/ - /* @todo: There is an incompatibility with C++11 + CUDA + GCC where no device function - * pow(float, int) is defined: http://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-tools-title-known - * Use the powf(float, float) instead which reduces performance or implement an own (faster) version - */ - return ::powf(base, exponent); -#else - return ::pow(base, exponent); -#endif - - } -}; - -} //namespace math -} //namespace algorithms -} // namespace pmacc diff --git a/include/pmacc/algorithms/math/floatMath/sqrt.tpp b/include/pmacc/algorithms/math/floatMath/sqrt.tpp deleted file mode 100644 index 4ababcb778..0000000000 --- a/include/pmacc/algorithms/math/floatMath/sqrt.tpp +++ /dev/null @@ -1,65 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz, - * Richard Pausch - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - - -#pragma once - -#include "pmacc/types.hpp" -#include - - -namespace pmacc -{ -namespace algorithms -{ -namespace math -{ - -template<> -struct Sqrt -{ - typedef float result; - - HDINLINE float operator( )(const float& value ) - { - return ::sqrtf( value ); - } -}; - -template<> -struct RSqrt -{ - typedef float result; - - HDINLINE float operator( )(const float& value ) - { -#if !defined(__CUDACC__) - return 1.0f/::sqrtf(value); -#else - return ::rsqrtf(value); -#endif - } -}; - -} //namespace math -} //namespace algorithms -} // namespace pmacc diff --git a/include/pmacc/algorithms/math/floatMath/trigo.tpp b/include/pmacc/algorithms/math/floatMath/trigo.tpp index 543eb2c3e5..7b4151c1cb 100644 --- a/include/pmacc/algorithms/math/floatMath/trigo.tpp +++ b/include/pmacc/algorithms/math/floatMath/trigo.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, * Axel Huebl, Alexander Debus * * This file is part of PMacc. @@ -29,128 +29,37 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -template<> -struct Sin -{ - typedef float result; - - HDINLINE float operator( )(const float& value ) - { - return ::sinf( value ); - } -}; - -template<> -struct ASin -{ - typedef float result; - - HDINLINE float operator( )(const float& value) - { -#if __CUDA_ARCH__ - return ::asinf( value ); -#else - return ::asin( value ); -#endif - } -}; - -template<> -struct Cos -{ - typedef float result; - - HDINLINE float operator( )(const float& value ) + namespace math { - return ::cosf( value ); - } -}; - -template<> -struct ACos -{ - typedef float result; + template<> + struct SinCos + { + typedef void result; - HDINLINE float operator( )(const float& value) - { -#if __CUDA_ARCH__ - return ::acosf( value ); -#else - return ::acos( value ); -#endif - } -}; - -template<> -struct Tan -{ - typedef float result; - - HDINLINE float operator( )(const float& value ) - { - return ::tanf( value ); - } -}; - -template<> -struct ATan -{ - typedef float result; - - HDINLINE float operator( )(const float& value) - { - return ::atanf( value ); - } -}; - -template<> -struct SinCos -{ - typedef void result; - - HDINLINE void operator( )(float arg, float& sinValue, float& cosValue ) - { + HDINLINE void operator()(float arg, float& sinValue, float& cosValue) + { #if defined(_MSC_VER) && !defined(__CUDA_ARCH__) - sinValue = ::sinf(arg); - cosValue = ::cosf(arg); + sinValue = cupla::math::sin(arg); + cosValue = cupla::math::cos(arg); #else - ::sincosf( arg, &sinValue, &cosValue ); + ::sincosf(arg, &sinValue, &cosValue); #endif - } -}; - - - -template<> -struct Sinc -{ - typedef float result; - - HDINLINE float operator( )(const float& value ) - { - if(pmacc::algorithms::math::abs(value) < FLT_EPSILON) - return 1.0; - else - return pmacc::algorithms::math::sin( value )/value; - } -}; - -template<> -struct Atan2 -{ - typedef float result; - - HDINLINE float operator( )(const float& val1, const float& val2 ) - { - return ::atan2f( val1, val2 ); - } -}; - -} //namespace math -} //namespace algorithms + } + }; + + template<> + struct Sinc + { + typedef float result; + + HDINLINE float operator()(const float& value) + { + if(cupla::math::abs(value) < FLT_EPSILON) + return 1.0f; + else + return cupla::math::sin(value) / value; + } + }; + + } // namespace math } // namespace pmacc diff --git a/include/pmacc/algorithms/reverseBits.hpp b/include/pmacc/algorithms/reverseBits.hpp index 2724284eb6..ea4234545e 100644 --- a/include/pmacc/algorithms/reverseBits.hpp +++ b/include/pmacc/algorithms/reverseBits.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -26,37 +26,37 @@ #include #include -namespace pmacc{ - -/** - * Reverses the bit in an unsigned integral value - * - * Based on "Bit Twiddling Hacks" by Sean Eron Anderson - * published in public domain. Retrieved on 13th of August, 2015 from - * http://www.graphics.stanford.edu/~seander/bithacks.html - * - * @param value Value which should be reversed - * @return Reversed value - */ -template -T -reverseBits(T value) +namespace pmacc { - PMACC_STATIC_ASSERT_MSG( boost::is_integral::value && boost::is_unsigned::value, - Only_allowed_for_unsigned_integral_types ); - /* init with value (to get LSB) */ - T result = value; - /* extra shift needed at end */ - int s = sizeof(T) * CHAR_BIT - 1; - for (value >>= 1; value; value >>= 1) + /** + * Reverses the bit in an unsigned integral value + * + * Based on "Bit Twiddling Hacks" by Sean Eron Anderson + * published in public domain. Retrieved on 13th of August, 2015 from + * http://www.graphics.stanford.edu/~seander/bithacks.html + * + * @param value Value which should be reversed + * @return Reversed value + */ + template + T reverseBits(T value) { - result <<= 1; - result |= value & 1; - s--; + PMACC_STATIC_ASSERT_MSG( + boost::is_integral::value && boost::is_unsigned::value, + Only_allowed_for_unsigned_integral_types, ); + /* init with value (to get LSB) */ + T result = value; + /* extra shift needed at end */ + int s = sizeof(T) * CHAR_BIT - 1; + for(value >>= 1; value; value >>= 1) + { + result <<= 1; + result |= value & 1; + s--; + } + /* shift when values highest bits are zero */ + result <<= s; + return result; } - /* shift when values highest bits are zero */ - result <<= s; - return result; -} -} // namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/assert.hpp b/include/pmacc/assert.hpp index 245701de0a..5b985f78eb 100644 --- a/include/pmacc/assert.hpp +++ b/include/pmacc/assert.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera, Pawel Ordyna * * This file is part of PMacc. * @@ -24,37 +24,70 @@ #include "pmacc/debug/abortWithError.hpp" -#ifdef NDEBUG - // debug mode is disabled +#include - /* `(void)0` force a semicolon after the macro function */ -# define PMACC_ASSERT( expr ) ( (void) 0 ) +// disabled for no-debug mode or for the device compile path +#if defined(NDEBUG) || (CUPLA_DEVICE_COMPILE == 1) - /* `(void)0` force a semicolon after the macro function */ -# define PMACC_ASSERT_MSG( expr, msg ) ( (void) 0 ) +/* `(void)0` force a semicolon after the macro function */ +# define PMACC_ASSERT(expr) ((void) 0) + +/* `(void)0` force a semicolon after the macro function */ +# define PMACC_ASSERT_MSG(expr, msg) ((void) 0) + +#else + +/** assert check (host side only) + * + * if `NDEBUG` is defined: macro expands to (void)0 + * + * @param expr expression to be evaluated + */ +# define PMACC_ASSERT(expr) (!!(expr)) ? ((void) 0) : pmacc::abortWithError(# expr, __FILE__, __LINE__) + +/** assert check with message (host side only) + * + * if `NDEBUG` is defined: macro expands to (void)0 + * + * @param expr expression to be evaluated + * @param msg output message (of type `std::string`) which is printed if the + * expression is evaluated to false + */ +# define PMACC_ASSERT_MSG(expr, msg) (!!(expr)) ? ((void) 0) : pmacc::abortWithError(# expr, __FILE__, __LINE__, msg) + +#endif + +// disabled for no-debug mode or for the host compile path +#if defined(NDEBUG) || (CUPLA_DEVICE_COMPILE == 0) + +/* `(void)0` force a semicolon after the macro function */ +# define PMACC_DEVICE_ASSERT(expr) ((void) 0) + +// debug mode is disabled +/* `(void)0` force a semicolon after the macro function */ +# define PMACC_DEVICE_ASSERT_MSG(expr, ...) ((void) 0) #else - // debug mode is enabled - - /** assert check - * - * if `NDEBUG` is not defined: macro expands to (void)0 - * - * @param expr expression to be evaluated - */ -# define PMACC_ASSERT( expr ) \ - ( !!(expr) ) ? ( (void) 0 ) : pmacc::abortWithError( #expr, __FILE__, __LINE__ ) - - /** assert check with message - * - * if `NDEBUG` is not defined: macro expands to (void)0 - * - * @param expr expression to be evaluated - * @param msg output message (of type `std::string`) which is printed if the - * expression is evaluated to false - */ -# define PMACC_ASSERT_MSG( expr, msg ) \ - ( !!(expr) ) ? ( (void) 0 ) : pmacc::abortWithError( #expr, __FILE__, __LINE__, msg ) +/** assert check for kernels (device side) + * + * if `NDEBUG` is defined: macro expands to (void)0 + * @param expr expression to be evaluated + */ +# define PMACC_DEVICE_ASSERT(expr) assert(expr) +/** assert check with message (device side) + * + * if `NDEBUG` is defined: macro expands to (void)0 + * + * Beside the usual assert message an additional message is printed to stdout with `printf`. + * Pass your `printf` arguments after the evaluated expression, for example to print some local variables: + * @code{.cpp} + * PMACC_DEVICE_ASSERT_MSG((x > 0), "x was %e, a was %e", x, a); + * @endcode + * + * @param expr expression to be evaluated + * @param ... parameters passed to printf + */ +# define PMACC_DEVICE_ASSERT_MSG(expr, ...) (!!(expr)) ? ((void) 0) : (printf(__VA_ARGS__), assert(expr)) #endif diff --git a/include/pmacc/attribute/Constexpr.hpp b/include/pmacc/attribute/Constexpr.hpp index 92d1d48cbf..de241300ea 100644 --- a/include/pmacc/attribute/Constexpr.hpp +++ b/include/pmacc/attribute/Constexpr.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, * Alexander Grund * @@ -43,10 +43,10 @@ * constexpr is captured, but also it has to remain constexpr inside a lambda. */ #ifdef _MSC_VER -# define PMACC_CONSTEXPR_CAPTURE static constexpr -#elif ( defined __GNUC__ ) && ( __GNUC__ > 7 ) +# define PMACC_CONSTEXPR_CAPTURE static constexpr +#elif(defined __GNUC__) && (__GNUC__ > 7) // workaround for GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91377 -# define PMACC_CONSTEXPR_CAPTURE static constexpr +# define PMACC_CONSTEXPR_CAPTURE static constexpr #else -# define PMACC_CONSTEXPR_CAPTURE constexpr +# define PMACC_CONSTEXPR_CAPTURE constexpr #endif diff --git a/include/pmacc/attribute/Fallthrough.hpp b/include/pmacc/attribute/Fallthrough.hpp index 1d6056065d..2f48236191 100644 --- a/include/pmacc/attribute/Fallthrough.hpp +++ b/include/pmacc/attribute/Fallthrough.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, * Alexander Grund * @@ -36,10 +36,10 @@ * * Use [[fallthrough]] in C++17 */ -#if (BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(7,0,0)) -# define PMACC_FALLTHROUGH [[gnu::fallthrough]] +#if(BOOST_COMP_GNUC >= BOOST_VERSION_NUMBER(7, 0, 0)) +# define PMACC_FALLTHROUGH [[gnu::fallthrough]] #elif BOOST_COMP_CLANG -# define PMACC_FALLTHROUGH [[clang::fallthrough]] +# define PMACC_FALLTHROUGH [[clang::fallthrough]] #else -# define PMACC_FALLTHROUGH ( (void)0 ) +# define PMACC_FALLTHROUGH ((void) 0) #endif diff --git a/include/pmacc/attribute/FunctionSpecifier.hpp b/include/pmacc/attribute/FunctionSpecifier.hpp index 5b243f428c..753be0dd26 100644 --- a/include/pmacc/attribute/FunctionSpecifier.hpp +++ b/include/pmacc/attribute/FunctionSpecifier.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, * Alexander Grund * @@ -36,9 +36,9 @@ * 0 for host compilation */ #ifndef __CUDA_ARCH__ -# define PMACC_CUDA_ARCH 0 +# define PMACC_CUDA_ARCH 0 #else -# define PMACC_CUDA_ARCH __CUDA_ARCH__ +# define PMACC_CUDA_ARCH __CUDA_ARCH__ #endif /** PMacc global identifier for CUDA kernel */ @@ -59,7 +59,7 @@ * Most cases can solved by #ifdef __CUDA_ARCH__ or #ifdef __CUDACC__. */ #if defined(__CUDACC__) -# define PMACC_NO_NVCC_HDWARNING _Pragma("hd_warning_disable") +# define PMACC_NO_NVCC_HDWARNING _Pragma("hd_warning_disable") #else -# define PMACC_NO_NVCC_HDWARNING +# define PMACC_NO_NVCC_HDWARNING #endif diff --git a/include/pmacc/boost_workaround.hpp b/include/pmacc/boost_workaround.hpp new file mode 100644 index 0000000000..ad755a32eb --- /dev/null +++ b/include/pmacc/boost_workaround.hpp @@ -0,0 +1,44 @@ +/* Copyright 2020-2021 Rene Widera + * + * This file is part of PMacc. + * + * PMacc is free software: you can redistribute it and/or modify + * it under the terms of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PMacc is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with PMacc. + * If not, see . + */ +#pragma once + +/** @file This file should be included in each `cpp`-file before any other boost include + * to workaround compiler errors when compiling with clang-cuda and boost <1.69.0 + * + * https://github.com/ComputationalRadiationPhysics/picongpu/issues/3294 + */ +#include +#if(BOOST_VERSION < 106900 && defined(__CUDACC__) && defined(__clang__)) +# if defined(__CUDACC__) +# include +# endif +# if(!defined(__ibmxl__)) +# include +# endif +# undef __CUDACC__ +# include +# define __CUDACC__ +#endif +/* workaround for compile error with clang-cuda + * boost/type_traits/is_base_and_derived.hpp:142:25: error: invalid application of 'sizeof' to an incomplete type + * 'boost::in_place_factory_base' BOOST_STATIC_ASSERT(sizeof(B) != 0); + */ +#include diff --git a/include/pmacc/communication/AsyncCommunication.hpp b/include/pmacc/communication/AsyncCommunication.hpp index 545f1e8499..5998be388b 100644 --- a/include/pmacc/communication/AsyncCommunication.hpp +++ b/include/pmacc/communication/AsyncCommunication.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -21,53 +21,54 @@ #pragma once -namespace pmacc{ -namespace communication { - - /** - * Wrapper to convert a bool into a type - */ - template - struct Bool2Type; +namespace pmacc +{ + namespace communication + { + /** + * Wrapper to convert a bool into a type + */ + template + struct Bool2Type; - /** - * Implementations of \see AsyncCommunication should specialize this, - * but it is not intended to be called directly. Use \see AsyncCommunication - * - * The 2nd template parameter can be used to check for conditions on - * templated implementations. E.g.: - * - * template - * struct AsyncCommunicationImpl< - * T_Data, - * Bool2Type< boost::is_integral::value > - * >{...} - */ - template > - struct AsyncCommunicationImpl; + /** + * Implementations of \see AsyncCommunication should specialize this, + * but it is not intended to be called directly. Use \see AsyncCommunication + * + * The 2nd template parameter can be used to check for conditions on + * templated implementations. E.g.: + * + * template + * struct AsyncCommunicationImpl< + * T_Data, + * Bool2Type< boost::is_integral::value > + * >{...} + */ + template> + struct AsyncCommunicationImpl; - /** - * This policy starts an asynchronous communication of the given data - * (e.g. a particle species) - * - * It must be a functor with signature EventTask(T_Data&, EventTask parentEvent) - * but can be templated (again) over T_Data to get the actual type. This - * is helpful for generic implementations that apply to T_Data and all - * derived classes but want to use the possibly more derived type - * - * For different T_Data types you can either specialize this or the more - * generic \see AsyncCommunicationImpl - */ - template - struct AsyncCommunication: public AsyncCommunicationImpl - {}; + /** + * This policy starts an asynchronous communication of the given data + * (e.g. a particle species) + * + * It must be a functor with signature EventTask(T_Data&, EventTask parentEvent) + * but can be templated (again) over T_Data to get the actual type. This + * is helpful for generic implementations that apply to T_Data and all + * derived classes but want to use the possibly more derived type + * + * For different T_Data types you can either specialize this or the more + * generic \see AsyncCommunicationImpl + */ + template + struct AsyncCommunication : public AsyncCommunicationImpl + { + }; - template - EventTask - asyncCommunication(T_Data& data, EventTask parent) - { - return AsyncCommunication()(data, parent); - } + template + EventTask asyncCommunication(T_Data& data, EventTask parent) + { + return AsyncCommunication()(data, parent); + } -} // namespace communication -} // namespace pmacc + } // namespace communication +} // namespace pmacc diff --git a/include/pmacc/communication/CommunicatorMPI.hpp b/include/pmacc/communication/CommunicatorMPI.hpp index 416169a51c..676a3dbad3 100644 --- a/include/pmacc/communication/CommunicatorMPI.hpp +++ b/include/pmacc/communication/CommunicatorMPI.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, Alexander Grund * * This file is part of PMacc. @@ -36,409 +36,418 @@ namespace pmacc { - -namespace detail -{ - template - struct LogRankCoords; - - template <> - struct LogRankCoords + namespace detail { - void operator()(int rank, const int (&coords)[DIM1]) const + template + struct LogRankCoords; + + template<> + struct LogRankCoords { - log("Rank: %1% ; coords %2%") % rank % coords[0]; - } - }; - template <> - struct LogRankCoords - { - void operator()(int rank, const int (&coords)[DIM2]) const + void operator()(int rank, const int (&coords)[DIM1]) const + { + log("Rank: %1% ; coords %2%") % rank % coords[0]; + } + }; + template<> + struct LogRankCoords { - log("Rank: %1% ; coords %2% %3%") % rank % coords[0] % coords[1]; - } - }; - template <> - struct LogRankCoords - { - void operator()(int rank, const int (&coords)[DIM3]) const + void operator()(int rank, const int (&coords)[DIM2]) const + { + log("Rank: %1% ; coords %2% %3%") % rank % coords[0] % coords[1]; + } + }; + template<> + struct LogRankCoords { - log("Rank: %1% ; coords %2% %3% %4%") % rank % coords[0] % coords[1] % coords[2]; - } - }; - -} + void operator()(int rank, const int (&coords)[DIM3]) const + { + log("Rank: %1% ; coords %2% %3% %4%") % rank % coords[0] % coords[1] % coords[2]; + } + }; -/*! communication via MPI - */ -template -class CommunicatorMPI : public ICommunicator -{ -public: + } // namespace detail - /*! ctor + /*! communication via MPI */ - CommunicatorMPI() : hostRank(0) + template + class CommunicatorMPI : public ICommunicator { - //MPI_Init(nullptr, nullptr); - } - - /*! dtor - * - * calls MPI_Finalize - */ - virtual ~CommunicatorMPI() - {} - - virtual int getRank() - { - return mpiRank; - } - - virtual int getSize() - { - return mpiSize; - } - - MPI_Comm getMPIComm() const - { - return topology; - } + public: + /*! ctor + */ + CommunicatorMPI() : hostRank(0) + { + // MPI_Init(nullptr, nullptr); + } - MPI_Info getMPIInfo() const - { - return MPI_INFO_NULL; - } + /*! dtor + * + * calls MPI_Finalize + */ + virtual ~CommunicatorMPI() + { + } - DataSpace getPeriodic() const - { - return this->periodic; - } - - /*! initializes all processes to build a 3D-grid - * - * @param nodes number of GPU nodes in each dimension - * @param periodic specifying whether the grid is periodic (1) or not (0) in each dimension - * - * \warning throws invalid argument if cx*cy*cz != totalnodes - */ - void init(DataSpace numberProcesses, DataSpace periodic) - { - this->periodic = periodic; + virtual int getRank() + { + return mpiRank; + } - //check if parameters are correct - MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &mpiSize)); + virtual int getSize() + { + return mpiSize; + } - if (numberProcesses.productOfComponents() != mpiSize) + MPI_Comm getMPIComm() const { - throw std::invalid_argument("wrong parameters or wrong mpirun-call!"); + return topology; } - //1. create Communicator (computing_comm) of computing nodes (ranks 0...n) - MPI_Comm computing_comm = MPI_COMM_WORLD; + MPI_Info getMPIInfo() const + { + return MPI_INFO_NULL; + } - yoffset = 0; + DataSpace getPeriodic() const + { + return this->periodic; + } - // 2. create topology + /*! initializes all processes to build a 3D-grid + * + * @param nodes number of GPU nodes in each dimension + * @param periodic specifying whether the grid is periodic (1) or not (0) in each dimension + * + * \warning throws invalid argument if cx*cy*cz != totalnodes + */ + void init(DataSpace numberProcesses, DataSpace periodic) + { + this->periodic = periodic; - dims[0] = numberProcesses.x(); - dims[1] = numberProcesses.y(); - dims[2] = numberProcesses.z(); + // check if parameters are correct + MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &mpiSize)); - topology = MPI_COMM_NULL; + if(numberProcesses.productOfComponents() != mpiSize) + { + throw std::invalid_argument("wrong parameters or wrong mpirun-call!"); + } - int periods[] = {periodic.x(), periodic.y(), periodic.z()}; + // 1. create Communicator (computing_comm) of computing nodes (ranks 0...n) + MPI_Comm computing_comm = MPI_COMM_WORLD; - /*create new communicator based on cartesian coordinates*/ - MPI_CHECK(MPI_Cart_create(computing_comm, DIM, dims, periods, 0, &topology)); + yoffset = 0; - // 3. update Host rank - updateHostRank(); + // 2. create topology - //4. update Coordinates - updateCoordinates(); - } + dims[0] = numberProcesses.x(); + dims[1] = numberProcesses.y(); + dims[2] = numberProcesses.z(); - /*! returns a rank number (0-n) for each host - * - * E.g. if 8 GPUs are on 2 Hosts (4 GPUs each), the GPUs on each host will get hostrank 0 to 3 - * - */ - uint32_t getHostRank() - { - return hostRank; - } + topology = MPI_COMM_NULL; - // description in ICommunicator + int periods[] = {periodic.x(), periodic.y(), periodic.z()}; - virtual const Mask& getCommunicationMask() const - { - return communicationMask; - } + /*create new communicator based on cartesian coordinates*/ + MPI_CHECK(MPI_Cart_create(computing_comm, DIM, dims, periods, 0, &topology)); - /*! returns coordinate of this process in (via init) created grid - * - * Coordinates are between [0-cx, 0-cy, 0-cz] - * - */ - const DataSpace getCoordinates() const - { - return this->coordinates; - } + // 3. update Host rank + updateHostRank(); - // description in ICommunicator + // 4. update Coordinates + updateCoordinates(); + } - MPI_Request* startSend(uint32_t ex, const char *send_data, size_t send_data_count, uint32_t tag) - { - MPI_Request *request = new MPI_Request; + /*! returns a rank number (0-n) for each host + * + * E.g. if 8 GPUs are on 2 Hosts (4 GPUs each), the GPUs on each host will get hostrank 0 to 3 + * + */ + uint32_t getHostRank() + { + return hostRank; + } - MPI_CHECK(MPI_Isend( - (void*) send_data, - static_cast(send_data_count), - MPI_CHAR, - ExchangeTypeToRank(ex), - gridExchangeTag + tag, - topology, - request)); + // description in ICommunicator - return request; - } + virtual const Mask& getCommunicationMask() const + { + return communicationMask; + } - // description in ICommunicator + /*! returns coordinate of this process in (via init) created grid + * + * Coordinates are between [0-cx, 0-cy, 0-cz] + * + */ + const DataSpace getCoordinates() const + { + return this->coordinates; + } - MPI_Request* startReceive(uint32_t ex, char *recv_data, size_t recv_data_max, uint32_t tag) - { + // description in ICommunicator - MPI_Request *request = new MPI_Request; + MPI_Request* startSend(uint32_t ex, const char* send_data, size_t send_data_count, uint32_t tag) + { + MPI_Request* request = new MPI_Request; + + MPI_CHECK(MPI_Isend( + (void*) send_data, + static_cast(send_data_count), + MPI_CHAR, + ExchangeTypeToRank(ex), + gridExchangeTag + tag, + topology, + request)); + + return request; + } - MPI_CHECK(MPI_Irecv( - recv_data, - static_cast(recv_data_max), - MPI_CHAR, - ExchangeTypeToRank(ex), - gridExchangeTag + tag, - topology, - request)); + // description in ICommunicator - return request; - } + MPI_Request* startReceive(uint32_t ex, char* recv_data, size_t recv_data_max, uint32_t tag) + { + MPI_Request* request = new MPI_Request; + + MPI_CHECK(MPI_Irecv( + recv_data, + static_cast(recv_data_max), + MPI_CHAR, + ExchangeTypeToRank(ex), + gridExchangeTag + tag, + topology, + request)); + + return request; + } - // description in ICommunicator + // description in ICommunicator - bool slide() - { - // we can only slide in y direction right now - if(DIM < DIM2) - return false; + bool slide() + { + // we can only slide in y direction right now + if(DIM < DIM2) + return false; - // MPI_Barrier(topology); - yoffset--; - if (yoffset == -dims[1]) - yoffset = 0; + // MPI_Barrier(topology); + yoffset--; + if(yoffset == -dims[1]) + yoffset = 0; - updateCoordinates(); + updateCoordinates(); - return coordinates[1] == dims[1] - 1; - } + return coordinates[1] == dims[1] - 1; + } - bool setStateAfterSlides(size_t numSlides) - { - // nothing happens - if(numSlides == 0) - return false; + bool setStateAfterSlides(size_t numSlides) + { + // nothing happens + if(numSlides == 0) + return false; - // we can only slide in y direction right now - if(DIM < DIM2) - return false; + // we can only slide in y direction right now + if(DIM < DIM2) + return false; - bool result = false; + bool result = false; - // only need to apply (numSlides % num-gpus-y) slides - for (size_t i = 0; i < (numSlides % dims[1]); ++i) - result = slide(); + // only need to apply (numSlides % num-gpus-y) slides + for(size_t i = 0; i < (numSlides % dims[1]); ++i) + result = slide(); - return result; - } + return result; + } -protected: - /* Set the first found non charactor or number to 0 (nullptr) - * name like p1223(Pid=1233) is than p1223 - * in some MPI implementation /mpich) the hostname is unique - */ - void cleanHostname(char* name) - { - for (int i = 0; i < MPI_MAX_PROCESSOR_NAME; ++i) + protected: + /* Set the first found non charactor or number to 0 (nullptr) + * name like p1223(Pid=1233) is than p1223 + * in some MPI implementation /mpich) the hostname is unique + */ + void cleanHostname(char* name) { - if (!(name[i] >= 'A' && name[i] <= 'Z') && - !(name[i] >= 'a' && name[i] <= 'z') && - !(name[i] >= '0' && name[i] <= '9') && - !(name[i] == '_') && - !(name[i] == '-') ) + for(int i = 0; i < MPI_MAX_PROCESSOR_NAME; ++i) { - name[i] = 0; - return; + if(!(name[i] >= 'A' && name[i] <= 'Z') && !(name[i] >= 'a' && name[i] <= 'z') + && !(name[i] >= '0' && name[i] <= '9') && !(name[i] == '_') && !(name[i] == '-')) + { + name[i] = 0; + return; + } } } - } - - /*! gets hostRank - * - * process with MPI-rank 0 is the master and builds a map with hostname - * and number of already known processes on this host. - * Each rank will provide its hostname via MPISend and gets its HostRank - * from the master. - * - */ - void updateHostRank() - { - char hostname[MPI_MAX_PROCESSOR_NAME]; - int length; - MPI_CHECK(MPI_Get_processor_name(hostname, &length)); - cleanHostname(hostname); - hostname[length++] = '\0'; - - MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &mpiSize)); - MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank)); - - if (mpiRank == 0) + /*! gets hostRank + * + * process with MPI-rank 0 is the master and builds a map with hostname + * and number of already known processes on this host. + * Each rank will provide its hostname via MPISend and gets its HostRank + * from the master. + * + */ + void updateHostRank() { - std::map hosts; - hosts[hostname] = 0; - hostRank = 0; - for (int rank = 1; rank < mpiSize; ++rank) - { - MPI_CHECK(MPI_Recv(hostname, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, rank, gridHostnameTag, MPI_COMM_WORLD, MPI_STATUS_IGNORE)); + char hostname[MPI_MAX_PROCESSOR_NAME]; + int length; - //printf("Hostname: %s\n", hostname); - int hostrank = 0; - if (hosts.count(hostname) > 0) hostrank = hosts[hostname] + 1; + MPI_CHECK(MPI_Get_processor_name(hostname, &length)); + cleanHostname(hostname); + hostname[length++] = '\0'; - MPI_CHECK(MPI_Send(&hostrank, 1, MPI_INT, rank, gridHostRankTag, MPI_COMM_WORLD)); + MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &mpiSize)); + MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank)); - hosts[hostname] = hostrank; + if(mpiRank == 0) + { + std::map hosts; + hosts[hostname] = 0; + hostRank = 0; + for(int rank = 1; rank < mpiSize; ++rank) + { + MPI_CHECK(MPI_Recv( + hostname, + MPI_MAX_PROCESSOR_NAME, + MPI_CHAR, + rank, + gridHostnameTag, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE)); + + // printf("Hostname: %s\n", hostname); + int hostrank = 0; + if(hosts.count(hostname) > 0) + hostrank = hosts[hostname] + 1; + + MPI_CHECK(MPI_Send(&hostrank, 1, MPI_INT, rank, gridHostRankTag, MPI_COMM_WORLD)); + + hosts[hostname] = hostrank; + } + } + else + { + MPI_CHECK(MPI_Send(hostname, length, MPI_CHAR, GridManagerRank, gridHostnameTag, MPI_COMM_WORLD)); + + MPI_CHECK(MPI_Recv( + &hostRank, + 1, + MPI_INT, + GridManagerRank, + gridHostRankTag, + MPI_COMM_WORLD, + MPI_STATUS_IGNORE)); + + // if(hostRank!=0) hostRank--; //!\todo fix mpi hostrank start with 1 } - - } - else - { - MPI_CHECK(MPI_Send(hostname, length, MPI_CHAR, GridManagerRank, gridHostnameTag, MPI_COMM_WORLD)); - - MPI_CHECK(MPI_Recv(&hostRank, 1, MPI_INT, GridManagerRank, gridHostRankTag, MPI_COMM_WORLD, MPI_STATUS_IGNORE)); - - // if(hostRank!=0) hostRank--; //!\todo fix mpi hostrank start with 1 - } - - } - - /*! update coordinates \see getCoordinates - */ - void updateCoordinates() - { - // get own coordinates - int coords[DIM]; - int rank; - - MPI_CHECK(MPI_Comm_rank(topology, &rank)); - MPI_CHECK(MPI_Cart_coords(topology, rank, DIM, coords)); - - if (DIM >= DIM2) - { - if (dims[1] > 1) - coords[1] = (coords[1] + yoffset) % dims[1]; - - while (coords[1] < 0) - coords[1] += dims[1]; } - detail::LogRankCoords()(rank, coords); - - for (uint32_t i = 0; i < DIM; ++i) - this->coordinates[i] = coords[i]; - - // init ranks of other hosts - int mcoords[3]; - - communicationMask = Mask(); - - for (int i = 1; i<-12 * (int) DIM + 6 * (int) DIM * (int) DIM + 9; i++) + /*! update coordinates \see getCoordinates + */ + void updateCoordinates() { - for (uint32_t j = 0; j < DIM; j++) - mcoords[j] = coords[j]; + // get own coordinates + int coords[DIM]; + int rank; - Mask m(i); - if (m.containsExchangeType(LEFT)) - mcoords[0]--; - if (m.containsExchangeType(RIGHT)) - mcoords[0]++; + MPI_CHECK(MPI_Comm_rank(topology, &rank)); + MPI_CHECK(MPI_Cart_coords(topology, rank, DIM, coords)); - if (DIM >= DIM2) + if(DIM >= DIM2) { - if (m.containsExchangeType(TOP)) - mcoords[1]--; - if (m.containsExchangeType(BOTTOM)) - mcoords[1]++; - } + if(dims[1] > 1) + coords[1] = (coords[1] + yoffset) % dims[1]; - if (DIM == DIM3) - { - if (m.containsExchangeType(BACK)) - mcoords[2]++; - if (m.containsExchangeType(FRONT)) - mcoords[2]--; + while(coords[1] < 0) + coords[1] += dims[1]; } - bool ok = true; - for (uint32_t j = 0; j < DIM; j++) - if (periodic[j] == 0 && (mcoords[j] < 0 || mcoords[j] >= dims[j])) /*only check if no perodic for j dimension is set*/ - ok = false; + detail::LogRankCoords()(rank, coords); - if (ok) - { - if (dims[1] > 1) - mcoords[1] = (mcoords[1] - yoffset) % dims[1]; + for(uint32_t i = 0; i < DIM; ++i) + this->coordinates[i] = coords[i]; - MPI_CHECK(MPI_Cart_rank(topology, mcoords, &ranks[i])); - communicationMask = communicationMask + Mask(i); - } - else + // init ranks of other hosts + int mcoords[3]; + + communicationMask = Mask(); + + for(int i = 1; i < -12 * (int) DIM + 6 * (int) DIM * (int) DIM + 9; i++) { - ranks[i] = -1; + for(uint32_t j = 0; j < DIM; j++) + mcoords[j] = coords[j]; + + Mask m(i); + if(m.containsExchangeType(LEFT)) + mcoords[0]--; + if(m.containsExchangeType(RIGHT)) + mcoords[0]++; + + if(DIM >= DIM2) + { + if(m.containsExchangeType(TOP)) + mcoords[1]--; + if(m.containsExchangeType(BOTTOM)) + mcoords[1]++; + } + + if(DIM == DIM3) + { + if(m.containsExchangeType(BACK)) + mcoords[2]++; + if(m.containsExchangeType(FRONT)) + mcoords[2]--; + } + + bool ok = true; + for(uint32_t j = 0; j < DIM; j++) + if(periodic[j] == 0 + && (mcoords[j] < 0 + || mcoords[j] >= dims[j])) /*only check if no perodic for j dimension is set*/ + ok = false; + + if(ok) + { + if(dims[1] > 1) + mcoords[1] = (mcoords[1] - yoffset) % dims[1]; + + MPI_CHECK(MPI_Cart_rank(topology, mcoords, &ranks[i])); + communicationMask = communicationMask + Mask(i); + } + else + { + ranks[i] = -1; + } + + // std::cout << "rank: " << rank << " " << i << " : " << ranks[i] << std::endl; } + } - //std::cout << "rank: " << rank << " " << i << " : " << ranks[i] << std::endl; - + /*! converts an exchangeType (e.g. RIGHT) to an MPI-rank + */ + int ExchangeTypeToRank(uint32_t type) + { + return ranks[type]; } - } - /*! converts an exchangeType (e.g. RIGHT) to an MPI-rank - */ - int ExchangeTypeToRank(uint32_t type) - { - return ranks[type]; - } - -private: - //! coordinates in GPU-Grid [0:cx-1,0:cy-1,0:cz-1] - DataSpace coordinates; - - DataSpace periodic; - //! MPI communicator (currently MPI_COMM_WORLD) - MPI_Comm topology; - //! array for exchangetype-to-rank conversion \see ExchangeTypeToRank - int ranks[27]; - //! size of pmacc [cx,cy,cz] - int dims[3]; - //! \see getCommunicationMask - Mask communicationMask; - //! rank of this process local to its host (node) - int hostRank; - //! offset for sliding window - int yoffset; - - int mpiRank; - int mpiSize; -}; - -} //namespace pmacc + private: + //! coordinates in GPU-Grid [0:cx-1,0:cy-1,0:cz-1] + DataSpace coordinates; + + DataSpace periodic; + //! MPI communicator (currently MPI_COMM_WORLD) + MPI_Comm topology; + //! array for exchangetype-to-rank conversion \see ExchangeTypeToRank + int ranks[27]; + //! size of pmacc [cx,cy,cz] + int dims[3]; + //! \see getCommunicationMask + Mask communicationMask; + //! rank of this process local to its host (node) + int hostRank; + //! offset for sliding window + int yoffset; + + int mpiRank; + int mpiSize; + }; + +} // namespace pmacc diff --git a/include/pmacc/communication/ICommunicator.hpp b/include/pmacc/communication/ICommunicator.hpp index ae254b341d..c8bd436bec 100644 --- a/include/pmacc/communication/ICommunicator.hpp +++ b/include/pmacc/communication/ICommunicator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Wolfgang Hoenig, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Wolfgang Hoenig, Benjamin Worpitz * * This file is part of PMacc. * @@ -28,63 +28,63 @@ namespace pmacc { - -/*! Interface for communication - */ -class ICommunicator -{ -public: - - /*! returns available communication partners - * - * returns a mask with neighbors, e.g. if there is a right neighbor result.isSet(RIGHT) returns true + /*! Interface for communication */ - virtual const Mask& getCommunicationMask() const=0; + class ICommunicator + { + public: + /*! returns available communication partners + * + * returns a mask with neighbors, e.g. if there is a right neighbor result.isSet(RIGHT) returns true + */ + virtual const Mask& getCommunicationMask() const = 0; - /*! moves all GPUs from top to bottom (y-coordinate) - * - * @return true if the position of gpu is switched to the end, else false - */ - virtual bool slide() = 0; + /*! moves all GPUs from top to bottom (y-coordinate) + * + * @return true if the position of gpu is switched to the end, else false + */ + virtual bool slide() = 0; - /*! slides multiple times - * - * @param[in] numSlides number of slides - * @return true if the position of gpu is switched to the end, else false - */ - virtual bool setStateAfterSlides(size_t numSlides) = 0; + /*! slides multiple times + * + * @param[in] numSlides number of slides + * @return true if the position of gpu is switched to the end, else false + */ + virtual bool setStateAfterSlides(size_t numSlides) = 0; - //!\todo Interface should not depend on MPI! + //!\todo Interface should not depend on MPI! - /*! starts sending via MPI (non-blocking) - * - * \param[in] ex direction to send (enum ExchangeType) - * \param[in] send_data pointer to data; should have at least send_data_count bytes - * \param[in] send_data_count message size in bytes to sent - * \param[in] tag user-defined tag; only message with the same tag can be exchanged (i.e. startSend and startReceive must use the same tag) - * \returns an request for testing if this operation has already finished - */ - virtual MPI_Request* startSend(uint32_t ex, const char *send_data, size_t send_data_count, uint32_t tag) = 0; + /*! starts sending via MPI (non-blocking) + * + * \param[in] ex direction to send (enum ExchangeType) + * \param[in] send_data pointer to data; should have at least send_data_count bytes + * \param[in] send_data_count message size in bytes to sent + * \param[in] tag user-defined tag; only message with the same tag can be exchanged (i.e. + * startSend and startReceive must use the same tag) \returns an request for testing if this operation has + * already finished + */ + virtual MPI_Request* startSend(uint32_t ex, const char* send_data, size_t send_data_count, uint32_t tag) = 0; - /*! starts receiving via MPI (non-blocking) - * - * If recv_data_max is less then send_data_count (on other host) multiple startReceive are needed! - * - * \param[in] ex direction to send (enum ExchangeType) - * \param[in] recv_data pointer to data; should have at least recv_data_max bytes - * \param[in] recv_data_max maximum message size in bytes to receive - * \param[in] tag user-defined tag; only message with the same tag can be exchanged (i.e. startSend and startReceive must use the same tag) - * \returns an request for testing if this operation has already finished - */ - virtual MPI_Request* startReceive(uint32_t ex, char *recv_data, size_t recv_data_max, uint32_t tag) = 0; + /*! starts receiving via MPI (non-blocking) + * + * If recv_data_max is less then send_data_count (on other host) multiple startReceive are needed! + * + * \param[in] ex direction to send (enum ExchangeType) + * \param[in] recv_data pointer to data; should have at least recv_data_max bytes + * \param[in] recv_data_max maximum message size in bytes to receive + * \param[in] tag user-defined tag; only message with the same tag can be exchanged (i.e. + * startSend and startReceive must use the same tag) \returns an request for testing if this operation has + * already finished + */ + virtual MPI_Request* startReceive(uint32_t ex, char* recv_data, size_t recv_data_max, uint32_t tag) = 0; - virtual int getRank()=0; + virtual int getRank() = 0; - /*! Return which of the three directions are periodic - * - * \return for each direction a false (0) or true(1) value - */ - virtual DataSpace getPeriodic() const = 0; -}; + /*! Return which of the three directions are periodic + * + * \return for each direction a false (0) or true(1) value + */ + virtual DataSpace getPeriodic() const = 0; + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/communication/manager_common.hpp b/include/pmacc/communication/manager_common.hpp index 9aeb67eaea..71164cd55c 100644 --- a/include/pmacc/communication/manager_common.hpp +++ b/include/pmacc/communication/manager_common.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Wolfgang Hoenig, Axel Huebl +/* Copyright 2013-2021 Rene Widera, Wolfgang Hoenig, Axel Huebl * * This file is part of PMacc. * @@ -27,14 +27,30 @@ const int GridManagerRank = 0; -enum { - gridInitTag = 1, - gridHostnameTag = 2, - gridHostRankTag = 3, - gridExitTag = 4, - gridExchangeTag = 5 +enum +{ + gridInitTag = 1, + gridHostnameTag = 2, + gridHostRankTag = 3, + gridExitTag = 4, + gridExchangeTag = 5 }; -#define MPI_CHECK(cmd) {int error = cmd; if(error!=MPI_SUCCESS){std::cerr << "<" << __FILE__ << ">:" << __LINE__; throw std::runtime_error(std::string("[MPI] Error"));}} +#define MPI_CHECK(cmd) \ + { \ + int error = cmd; \ + if(error != MPI_SUCCESS) \ + { \ + std::cerr << "<" << __FILE__ << ">:" << __LINE__; \ + throw std::runtime_error(std::string("[MPI] Error")); \ + } \ + } -#define MPI_CHECK_NO_EXCEPT(cmd) {int error = cmd; if(error!=MPI_SUCCESS){std::cerr << "[MPI] Error code " << error << " in <" << __FILE__ << ">:" << __LINE__;}} +#define MPI_CHECK_NO_EXCEPT(cmd) \ + { \ + int error = cmd; \ + if(error != MPI_SUCCESS) \ + { \ + std::cerr << "[MPI] Error code " << error << " in <" << __FILE__ << ">:" << __LINE__; \ + } \ + } diff --git a/include/pmacc/cuSTL/algorithm/cudaBlock/Foreach.hpp b/include/pmacc/cuSTL/algorithm/cudaBlock/Foreach.hpp deleted file mode 100644 index 0bc20fce24..0000000000 --- a/include/pmacc/cuSTL/algorithm/cudaBlock/Foreach.hpp +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -#include "pmacc/types.hpp" -#include "pmacc/algorithms/TypeCast.hpp" -#include "pmacc/math/vector/Int.hpp" -#include "pmacc/math/Vector.hpp" -#include "pmacc/math/VectorOperations.hpp" - -namespace pmacc -{ -namespace algorithm -{ -namespace cudaBlock -{ - -#ifndef FOREACH_KERNEL_MAX_PARAMS -#define FOREACH_KERNEL_MAX_PARAMS 4 -#endif - -#define SHIFTACCESS_CURSOR(Z, N, _) c ## N [pos] - -#define FOREACH_OPERATOR(Z, N, _) \ - /* < , typename C0, ..., typename C(N-1) , > */ \ - template \ - /* ( C0 c0, ..., C(N-1) c(N-1) , ) */ \ - DINLINE void operator()(T_Acc const & acc, Zone, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), const Functor& functor) \ - { \ - const int dataVolume = math::CT::volume::type::value; \ - const int blockVolume = math::CT::volume::type::value; \ - \ - typedef typename math::Int PosType; \ - using namespace pmacc::algorithms::precisionCast; \ - \ - for(int i = this->linearThreadIdx; i < dataVolume; i += blockVolume) \ - { \ - PosType pos = Zone::Offset::toRT() + \ - precisionCast( \ - math::MapToPos()( typename Zone::Size(), i ) ); \ - functor(acc, BOOST_PP_ENUM(N, SHIFTACCESS_CURSOR, _)); \ - } \ - } - -/** Foreach algorithm that is executed by one cuda thread block - * - * \tparam BlockDim 3D compile-time vector (pmacc::math::CT::Int) of the size of the cuda blockDim. - * - * BlockDim could also be obtained from cuda itself at runtime but - * it is faster to know it at compile-time. - */ -template -struct Foreach -{ -private: - const int linearThreadIdx; -public: - - DINLINE Foreach(int linearThreadIdx) : linearThreadIdx(linearThreadIdx) {} - - /* operator()(zone, cursor0, cursor1, ..., cursorN-1, functor or lambdaFun) - * - * \param zone compile-time zone object, see zone::CT::SphericZone. (e.g. ContainerType::Zone()) - * \param cursorN cursor for the N-th data source (e.g. containerObj.origin()) - * \param functor or lambdaFun either a functor with N arguments or a N-ary lambda function (e.g. _1 = _2) - * - * The functor or lambdaFun is called for each cell within the zone. - * It is called like functor(*cursor0(cellId), ..., *cursorN(cellId)) - * - */ - BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), FOREACH_OPERATOR, _) -}; - -#undef SHIFTACCESS_CURSOR -#undef FOREACH_OPERATOR - -} // cudaBlock -} // algorithm -} // pmacc diff --git a/include/pmacc/cuSTL/algorithm/cuplaBlock/Foreach.hpp b/include/pmacc/cuSTL/algorithm/cuplaBlock/Foreach.hpp new file mode 100644 index 0000000000..c840bc430a --- /dev/null +++ b/include/pmacc/cuSTL/algorithm/cuplaBlock/Foreach.hpp @@ -0,0 +1,102 @@ +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl + * + * This file is part of PMacc. + * + * PMacc is free software: you can redistribute it and/or modify + * it under the terms of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PMacc is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with PMacc. + * If not, see . + */ + +#pragma once + +#include "pmacc/types.hpp" +#include "pmacc/algorithms/TypeCast.hpp" +#include "pmacc/math/vector/Int.hpp" +#include "pmacc/math/Vector.hpp" +#include "pmacc/math/VectorOperations.hpp" + +namespace pmacc +{ + namespace algorithm + { + namespace cuplaBlock + { +#ifndef FOREACH_KERNEL_MAX_PARAMS +# define FOREACH_KERNEL_MAX_PARAMS 4 +#endif + +#define SHIFTACCESS_CURSOR(Z, N, _) c##N[pos] + +#define FOREACH_OPERATOR(Z, N, _) \ + /* < , typename C0, ..., typename C(N-1) , > */ \ + template< \ + typename Zone, \ + BOOST_PP_ENUM_PARAMS(N, typename C), \ + typename Functor, \ + typename T_Acc> /* ( C0 c0, ..., C(N-1) c(N-1) , ) */ \ + DINLINE void operator()(T_Acc const& acc, Zone, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), const Functor& functor) \ + { \ + const int dataVolume = math::CT::volume::type::value; \ + const int blockVolume = math::CT::volume::type::value; \ + \ + typedef typename math::Int PosType; \ + using namespace pmacc::algorithms::precisionCast; \ + \ + for(int i = this->linearThreadIdx; i < dataVolume; i += blockVolume) \ + { \ + PosType pos = Zone::Offset::toRT() \ + + precisionCast(math::MapToPos()(typename Zone::Size(), i)); \ + functor(acc, BOOST_PP_ENUM(N, SHIFTACCESS_CURSOR, _)); \ + } \ + } + + /** Foreach algorithm that is executed by one cupla thread block + * + * \tparam BlockDim 3D compile-time vector (pmacc::math::CT::Int) of the size of the cupla blockDim. + * + * BlockDim could also be obtained from cupla itself at runtime but + * it is faster to know it at compile-time. + */ + template + struct Foreach + { + private: + const int linearThreadIdx; + + public: + DINLINE Foreach(int linearThreadIdx) : linearThreadIdx(linearThreadIdx) + { + } + + /* operator()(zone, cursor0, cursor1, ..., cursorN-1, functor or lambdaFun) + * + * \param zone compile-time zone object, see zone::CT::SphericZone. (e.g. ContainerType::Zone()) + * \param cursorN cursor for the N-th data source (e.g. containerObj.origin()) + * \param functor or lambdaFun either a functor with N arguments or a N-ary lambda function (e.g. _1 = + * _2) + * + * The functor or lambdaFun is called for each cell within the zone. + * It is called like functor(*cursor0(cellId), ..., *cursorN(cellId)) + * + */ + BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), FOREACH_OPERATOR, _) + }; + +#undef SHIFTACCESS_CURSOR +#undef FOREACH_OPERATOR + + } // namespace cuplaBlock + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/functor/Add.hpp b/include/pmacc/cuSTL/algorithm/functor/Add.hpp index 85b0dc825b..964f8c3214 100644 --- a/include/pmacc/cuSTL/algorithm/functor/Add.hpp +++ b/include/pmacc/cuSTL/algorithm/functor/Add.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Heiko Burau +/* Copyright 2017-2021 Heiko Burau * * This file is part of PMacc. * @@ -26,38 +26,25 @@ namespace pmacc { -namespace algorithm -{ -namespace functor -{ - - struct Add + namespace algorithm { - template< typename T_Type > - HDINLINE T_Type - operator()( - T_Type const & first, - T_Type const & second - ) const + namespace functor { - return first + second; - } + struct Add + { + template + HDINLINE T_Type operator()(T_Type const& first, T_Type const& second) const + { + return first + second; + } - template< - typename T_Type, - typename T_Acc - > - HDINLINE T_Type - operator()( - T_Acc const &, - T_Type const & first, - T_Type const & second - ) const - { - return first + second; - } - }; + template + HDINLINE T_Type operator()(T_Acc const&, T_Type const& first, T_Type const& second) const + { + return first + second; + } + }; -} // functor -} // algorithm -} // pmacc + } // namespace functor + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/functor/AssignValue.hpp b/include/pmacc/cuSTL/algorithm/functor/AssignValue.hpp index 710b88eaee..e8697448d3 100644 --- a/include/pmacc/cuSTL/algorithm/functor/AssignValue.hpp +++ b/include/pmacc/cuSTL/algorithm/functor/AssignValue.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Heiko Burau +/* Copyright 2017-2021 Heiko Burau * * This file is part of PMacc. * @@ -26,39 +26,33 @@ namespace pmacc { -namespace algorithm -{ -namespace functor -{ - - template< typename T_Type> - struct AssignValue + namespace algorithm { - using Type = T_Type; - Type m_value; - - HDINLINE - AssignValue( Type const & value ) : - m_value( value ) - { } - - HDINLINE void - operator()( Type & arg ) const - { - arg = m_value; - } - - template< typename T_Acc > - HDINLINE void - operator()( - T_Acc const &, - Type & arg - ) const + namespace functor { - arg = m_value; - } - }; - -} // functor -} // algorithm -} // pmacc + template + struct AssignValue + { + using Type = T_Type; + Type m_value; + + HDINLINE + AssignValue(Type const& value) : m_value(value) + { + } + + HDINLINE void operator()(Type& arg) const + { + arg = m_value; + } + + template + HDINLINE void operator()(T_Acc const&, Type& arg) const + { + arg = m_value; + } + }; + + } // namespace functor + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/functor/GetComponent.hpp b/include/pmacc/cuSTL/algorithm/functor/GetComponent.hpp index f566d0e9fb..329cdfbe4b 100644 --- a/include/pmacc/cuSTL/algorithm/functor/GetComponent.hpp +++ b/include/pmacc/cuSTL/algorithm/functor/GetComponent.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Heiko Burau +/* Copyright 2017-2021 Heiko Burau * * This file is part of PMacc. * @@ -26,43 +26,34 @@ namespace pmacc { -namespace algorithm -{ -namespace functor -{ - - template< typename T_Type > - struct GetComponent + namespace algorithm { - using Type = T_Type; - using result_type = Type; - uint32_t m_component; - - HDINLINE GetComponent( uint32_t const component ) : - m_component( component ) - { } - - template< - typename Array, - typename T_Acc - > - HDINLINE Type & - operator()( - T_Acc const &, - Array & array - ) const - { - return array[ m_component ]; - } - - template< typename Array > - HDINLINE Type & - operator()( Array & array ) const + namespace functor { - return array[ m_component ]; - } - }; - -} // functor -} // algorithm -} // pmacc + template + struct GetComponent + { + using Type = T_Type; + using result_type = Type; + uint32_t m_component; + + HDINLINE GetComponent(uint32_t const component) : m_component(component) + { + } + + template + HDINLINE Type& operator()(T_Acc const&, Array& array) const + { + return array[m_component]; + } + + template + HDINLINE Type& operator()(Array& array) const + { + return array[m_component]; + } + }; + + } // namespace functor + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/host/Foreach.hpp b/include/pmacc/cuSTL/algorithm/host/Foreach.hpp index ab7f31029d..a192c35133 100644 --- a/include/pmacc/cuSTL/algorithm/host/Foreach.hpp +++ b/include/pmacc/cuSTL/algorithm/host/Foreach.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -33,96 +33,98 @@ namespace pmacc { -namespace algorithm -{ -namespace host -{ - + namespace algorithm + { + namespace host + { #ifndef FOREACH_HOST_MAX_PARAMS -#define FOREACH_HOST_MAX_PARAMS 4 +# define FOREACH_HOST_MAX_PARAMS 4 #endif -#define SHIFT_CURSOR_ZONE(Z, N, _) C ## N c ## N ## _shifted = c ## N (p_zone.offset); -#define SHIFTACCESS_SHIFTEDCURSOR(Z, N, _) c ## N ## _shifted [cellIndex] +#define SHIFT_CURSOR_ZONE(Z, N, _) C##N c##N##_shifted = c##N(p_zone.offset); +#define SHIFTACCESS_SHIFTEDCURSOR(Z, N, _) c##N##_shifted[cellIndex] -namespace detail -{ - /** Return pseudo 3D-range of the zone as math::Int */ - template< uint32_t dim > - struct GetRange; + namespace detail + { + /** Return pseudo 3D-range of the zone as math::Int */ + template + struct GetRange; - template<> - struct GetRange<3u> - { - template - const math::Int<3u> operator()(const Zone p_zone) const - { - return math::Int<3u>(p_zone.size.x(), p_zone.size.y(), p_zone.size.z()); - } - }; - template<> - struct GetRange<2u> - { - template - const math::Int<3u> operator()(const Zone p_zone) const - { - return math::Int<3u>(p_zone.size.x(), p_zone.size.y(), 1); - } - }; - template<> - struct GetRange<1u> - { - template - const math::Int<3u> operator()(const Zone p_zone) const - { - return math::Int<3u>(p_zone.size.x(), 1, 1); - } - }; -} // namespace detail + template<> + struct GetRange<3u> + { + template + const math::Int<3u> operator()(const Zone p_zone) const + { + return math::Int<3u>(p_zone.size.x(), p_zone.size.y(), p_zone.size.z()); + } + }; + template<> + struct GetRange<2u> + { + template + const math::Int<3u> operator()(const Zone p_zone) const + { + return math::Int<3u>(p_zone.size.x(), p_zone.size.y(), 1); + } + }; + template<> + struct GetRange<1u> + { + template + const math::Int<3u> operator()(const Zone p_zone) const + { + return math::Int<3u>(p_zone.size.x(), 1, 1); + } + }; + } // namespace detail -#define FOREACH_OPERATOR(Z, N, _) \ - template \ - void operator()(const T_Acc& acc, const Zone& p_zone, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), const Functor& functor) \ - { \ - BOOST_PP_REPEAT(N, SHIFT_CURSOR_ZONE, _) \ - \ - detail::GetRange getRange; \ - for(int z = 0; z < getRange(p_zone).z(); z++) \ - { \ - for(int y = 0; y < getRange(p_zone).y(); y++) \ - { \ - for(int x = 0; x < getRange(p_zone).x(); x++) \ - { \ - math::Int cellIndex = \ - math::Int<3u>(x, y, z).shrink(); \ - functor(acc, BOOST_PP_ENUM(N, SHIFTACCESS_SHIFTEDCURSOR, _)); \ - } \ - } \ - } \ +#define FOREACH_OPERATOR(Z, N, _) \ + template \ + void operator()( \ + const T_Acc& acc, \ + const Zone& p_zone, \ + BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), \ + const Functor& functor) \ + { \ + BOOST_PP_REPEAT(N, SHIFT_CURSOR_ZONE, _) \ + \ + detail::GetRange getRange; \ + for(int z = 0; z < getRange(p_zone).z(); z++) \ + { \ + for(int y = 0; y < getRange(p_zone).y(); y++) \ + { \ + for(int x = 0; x < getRange(p_zone).x(); x++) \ + { \ + math::Int cellIndex = math::Int<3u>(x, y, z).shrink(); \ + functor(acc, BOOST_PP_ENUM(N, SHIFTACCESS_SHIFTEDCURSOR, _)); \ + } \ + } \ + } \ } -/** Foreach algorithm (restricted to 3D) - */ -struct Foreach -{ - /* operator()(zone, cursor0, cursor1, ..., cursorN-1, functor or lambdaFun) - * - * \param zone Accepts currently only a zone::SphericZone object (e.g. containerObj.zone()) - * \param cursorN cursor for the N-th data source (e.g. containerObj.origin()) - * \param functor or lambdaFun either a functor with N arguments or a N-ary lambda function (e.g. _1 = _2) - * - * The functor or lambdaFun is called for each cell within the zone. - * It is called like functor(*cursor0(cellId), ..., *cursorN(cellId)) - * - */ - BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_HOST_MAX_PARAMS), FOREACH_OPERATOR, _) -}; + /** Foreach algorithm (restricted to 3D) + */ + struct Foreach + { + /* operator()(zone, cursor0, cursor1, ..., cursorN-1, functor or lambdaFun) + * + * \param zone Accepts currently only a zone::SphericZone object (e.g. containerObj.zone()) + * \param cursorN cursor for the N-th data source (e.g. containerObj.origin()) + * \param functor or lambdaFun either a functor with N arguments or a N-ary lambda function (e.g. _1 = + * _2) + * + * The functor or lambdaFun is called for each cell within the zone. + * It is called like functor(*cursor0(cellId), ..., *cursorN(cellId)) + * + */ + BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_HOST_MAX_PARAMS), FOREACH_OPERATOR, _) + }; #undef FOREACH_OPERATOR #undef SHIFT_CURSOR_ZONE #undef SHIFTACCESS_SHIFTEDCURSOR -} // host -} // algorithm -} // pmacc - + } // namespace host + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/kernel/FFT.hpp b/include/pmacc/cuSTL/algorithm/kernel/FFT.hpp index 6f099c1921..35033cae0e 100644 --- a/include/pmacc/cuSTL/algorithm/kernel/FFT.hpp +++ b/include/pmacc/cuSTL/algorithm/kernel/FFT.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,21 +23,19 @@ namespace pmacc { -namespace algorithm -{ -namespace kernel -{ + namespace algorithm + { + namespace kernel + { + template + struct FFT + { + template + void operator()(const Zone& p_zone, const DestCursor& destCursor, const SrcCursor& srcCursor); + }; -template -struct FFT -{ - template - void operator()(const Zone& p_zone, const DestCursor& destCursor, const SrcCursor& srcCursor); -}; - -} // kernel -} // algorithm -} // pmacc + } // namespace kernel + } // namespace algorithm +} // namespace pmacc #include "FFT.tpp" - diff --git a/include/pmacc/cuSTL/algorithm/kernel/FFT.tpp b/include/pmacc/cuSTL/algorithm/kernel/FFT.tpp index c4b88c2cb4..86ae996539 100644 --- a/include/pmacc/cuSTL/algorithm/kernel/FFT.tpp +++ b/include/pmacc/cuSTL/algorithm/kernel/FFT.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,24 +28,25 @@ namespace pmacc { -namespace algorithm -{ -namespace kernel -{ - -template<> -template -void FFT<2>::operator()(const Zone& p_zone, const DestCursor& destCursor, const SrcCursor& srcCursor) -{ - cufftHandle plan; - CUFFT_CHECK(cufftPlan2d(&plan, p_zone.size.x(), p_zone.size.y(), CUFFT_R2C)); + namespace algorithm + { + namespace kernel + { + template<> + template + void FFT<2>::operator()(const Zone& p_zone, const DestCursor& destCursor, const SrcCursor& srcCursor) + { + cufftHandle plan; + CUFFT_CHECK(cufftPlan2d(&plan, p_zone.size.x(), p_zone.size.y(), CUFFT_R2C)); - CUFFT_CHECK(cufftExecR2C(plan, (cufftReal*)&(*(srcCursor(p_zone.offset))), - (cufftComplex*)&(*destCursor(p_zone.offset)))); + CUFFT_CHECK(cufftExecR2C( + plan, + (cufftReal*) &(*(srcCursor(p_zone.offset))), + (cufftComplex*) &(*destCursor(p_zone.offset)))); - CUFFT_CHECK(cufftDestroy(plan)); -} + CUFFT_CHECK(cufftDestroy(plan)); + } -} // kernel -} // algorithm -} // pmacc + } // namespace kernel + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/kernel/Foreach.hpp b/include/pmacc/cuSTL/algorithm/kernel/Foreach.hpp index fee62439d2..a954e4b4db 100644 --- a/include/pmacc/cuSTL/algorithm/kernel/Foreach.hpp +++ b/include/pmacc/cuSTL/algorithm/kernel/Foreach.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -39,110 +39,88 @@ namespace pmacc { -namespace algorithm -{ -namespace kernel -{ - + namespace algorithm + { + namespace kernel + { #ifndef FOREACH_KERNEL_MAX_PARAMS -#define FOREACH_KERNEL_MAX_PARAMS 4 +# define FOREACH_KERNEL_MAX_PARAMS 4 #endif -#define SHIFT_CURSOR_ZONE(Z, N, _) C ## N c ## N ## _shifted = c ## N (p_zone.offset); -#define SHIFTED_CURSOR(Z, N, _) c ## N ## _shifted +#define SHIFT_CURSOR_ZONE(Z, N, _) C##N c##N##_shifted = c##N(p_zone.offset); +#define SHIFTED_CURSOR(Z, N, _) c##N##_shifted -#define FOREACH_OPERATOR(Z, N, _) \ - /* typename C0, typename C1, ... */ \ - template \ - /* C0 c0, C1 c1, ... */ \ - void operator()(const Zone& p_zone, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), const Functor& functor) \ - { \ - /* C0 c0_shifted = c0(p_zone.offset); */ \ - /* C1 c1_shifted = c1(p_zone.offset); */ \ - /* ... */ \ - BOOST_PP_REPEAT(N, SHIFT_CURSOR_ZONE, _) \ - \ - auto blockSize = BlockDim::toRT(); \ - detail::SphericMapper mapper; \ - using namespace pmacc; \ - PMACC_KERNEL(detail::KernelForeach{})(mapper.cudaGridDim(p_zone.size), blockSize) \ - /* c0_shifted, c1_shifted, ... */ \ - (mapper, BOOST_PP_ENUM(N, SHIFTED_CURSOR, _), functor); \ +#define FOREACH_OPERATOR(Z, N, _) \ + /* typename C0, typename C1, ... */ \ + template /* C0 c0, C1 c1, ... */ \ + void operator()(const Zone& p_zone, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), const Functor& functor) \ + { \ + /* C0 c0_shifted = c0(p_zone.offset); */ \ + /* C1 c1_shifted = c1(p_zone.offset); */ \ + /* ... */ \ + BOOST_PP_REPEAT(N, SHIFT_CURSOR_ZONE, _) \ + \ + auto blockSize = BlockDim::toRT(); \ + detail::SphericMapper mapper; \ + using namespace pmacc; \ + PMACC_KERNEL(detail::KernelForeach{}) \ + (mapper.cuplaGridDim(p_zone.size), blockSize) /* c0_shifted, c1_shifted, ... */ \ + (mapper, BOOST_PP_ENUM(N, SHIFTED_CURSOR, _), functor); \ } -/** Foreach algorithm that calls a cuda kernel - * - * \tparam BlockDim 3D compile-time vector (pmacc::math::CT::Int) of the size of the cuda blockDim. - * - * blockDim has to fit into the computing volume. - * E.g. (8,8,4) fits into (256, 256, 256) - */ -template -struct Foreach -{ - /* operator()(zone, cursor0, cursor1, ..., cursorN-1, functor or lambdaFun) - * - * \param zone Accepts currently only a zone::SphericZone object (e.g. containerObj.zone()) - * \param cursorN cursor for the N-th data source (e.g. containerObj.origin()) - * \param functor or lambdaFun either a functor with N arguments or a N-ary lambda function (e.g. _1 = _2) - * - * The functor or lambdaFun is called for each cell within the zone. - * It is called like functor(*cursor0(cellId), ..., *cursorN(cellId)) - * - */ - BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), FOREACH_OPERATOR, _) -}; + /** Foreach algorithm that calls a cupla kernel + * + * \tparam BlockDim 3D compile-time vector (pmacc::math::CT::Int) of the size of the cupla blockDim. + * + * blockDim has to fit into the computing volume. + * E.g. (8,8,4) fits into (256, 256, 256) + */ + template + struct Foreach + { + /* operator()(zone, cursor0, cursor1, ..., cursorN-1, functor or lambdaFun) + * + * \param zone Accepts currently only a zone::SphericZone object (e.g. containerObj.zone()) + * \param cursorN cursor for the N-th data source (e.g. containerObj.origin()) + * \param functor or lambdaFun either a functor with N arguments or a N-ary lambda function (e.g. _1 = + * _2) + * + * The functor or lambdaFun is called for each cell within the zone. + * It is called like functor(*cursor0(cellId), ..., *cursorN(cellId)) + * + */ + BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), FOREACH_OPERATOR, _) + }; #undef FOREACH_OPERATOR #undef SHIFT_CURSOR_ZONE #undef SHIFTED_CURSOR -template< - uint32_t T_numWorkers, - typename BlockDim -> -struct ForeachLockstep -{ - - /* operator()(zone, functor, cursor0, cursor1, ..., cursorN-1) - * - * @param zone Accepts currently only a zone::SphericZone object (e.g. containerObj.zone()) - * @param functor either a functor with N arguments - * @param args cursor for the N-th data source (e.g. containerObj.origin()) - * - * The functor is called for each worker within the zone. - * It is called like - * @code[.cpp} - * functor(*cursor0(cellBlockOffset), ..., *cursorN(cellBlockOffset)) - * @endcode - */ - template< - int T_dim, - typename T_Functor, - typename... T_Args - > - void operator()( - zone::SphericZone< T_dim > const & p_zone, - T_Functor & functor, - T_Args ... args - ) - { - detail::SphericMapper< - T_dim, - BlockDim - > mapper; + template + struct ForeachLockstep + { + /* operator()(zone, functor, cursor0, cursor1, ..., cursorN-1) + * + * @param zone Accepts currently only a zone::SphericZone object (e.g. containerObj.zone()) + * @param functor either a functor with N arguments + * @param args cursor for the N-th data source (e.g. containerObj.origin()) + * + * The functor is called for each worker within the zone. + * It is called like + * @code[.cpp} + * functor(*cursor0(cellBlockOffset), ..., *cursorN(cellBlockOffset)) + * @endcode + */ + template + void operator()(zone::SphericZone const& p_zone, T_Functor& functor, T_Args... args) + { + detail::SphericMapper mapper; - PMACC_KERNEL( detail::KernelForeachLockstep{ } )( - mapper.cudaGridDim( p_zone.size ), - T_numWorkers - )( - mapper, - functor, - args( p_zone.offset )... - ); - } -}; + PMACC_KERNEL(detail::KernelForeachLockstep{}) + (mapper.cuplaGridDim(p_zone.size), T_numWorkers)(mapper, functor, args(p_zone.offset)...); + } + }; -} // kernel -} // algorithm -} // pmacc + } // namespace kernel + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/kernel/ForeachBlock.hpp b/include/pmacc/cuSTL/algorithm/kernel/ForeachBlock.hpp index c6f209fad0..3ee7e6362e 100644 --- a/include/pmacc/cuSTL/algorithm/kernel/ForeachBlock.hpp +++ b/include/pmacc/cuSTL/algorithm/kernel/ForeachBlock.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -38,93 +38,92 @@ namespace pmacc { -namespace algorithm -{ -namespace kernel -{ - + namespace algorithm + { + namespace kernel + { #ifndef FOREACH_KERNEL_MAX_PARAMS -#define FOREACH_KERNEL_MAX_PARAMS 4 +# define FOREACH_KERNEL_MAX_PARAMS 4 #endif -namespace detail -{ - -#define SHIFTACCESS_CURSOR(Z, N, _) c ## N [cellIndex] - -#define KERNEL_FOREACH(Z, N, _) \ - /* typename C0, typename C1, ... */ \ -template \ - /* C0 c0, C1 c1, ... */ \ -DINLINE void operator()(T_Acc const & acc, Mapper mapper, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), Functor functor) const \ -{ \ - math::Int cellIndex(mapper(acc, dim3(blockIdx))); \ - /* c0[cellIndex], c1[cellIndex], ... */ \ - functor(acc, BOOST_PP_ENUM(N, SHIFTACCESS_CURSOR, _)); \ -} - -struct KernelForeachBlock -{ - -BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), KERNEL_FOREACH, _) + namespace detail + { +#define SHIFTACCESS_CURSOR(Z, N, _) c##N[cellIndex] + +#define KERNEL_FOREACH(Z, N, _) \ + /* typename C0, typename C1, ... */ \ + template< \ + typename Mapper, \ + BOOST_PP_ENUM_PARAMS(N, typename C), \ + typename Functor, \ + typename T_Acc> /* C0 c0, C1 c1, ... */ \ + DINLINE void operator()(T_Acc const& acc, Mapper mapper, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), Functor functor) \ + const \ + { \ + math::Int cellIndex(mapper(acc, cupla::dim3(cupla::blockIdx(acc)))); \ + /* c0[cellIndex], c1[cellIndex], ... */ \ + functor(acc, BOOST_PP_ENUM(N, SHIFTACCESS_CURSOR, _)); \ + } -}; + struct KernelForeachBlock + { + BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), KERNEL_FOREACH, _) + }; #undef KERNEL_FOREACH #undef SHIFTACCESS_CURSOR -} - -#define SHIFT_CURSOR_ZONE(Z, N, _) C ## N c ## N ## _shifted = c ## N (p_zone.offset); -#define SHIFTED_CURSOR(Z, N, _) c ## N ## _shifted - -#define FOREACH_OPERATOR(Z, N, _) \ - /* typename C0, typename C1, ... */ \ - template \ - /* C0 c0, C1 c1, ... */ \ - void operator()(const Zone& p_zone, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), const Functor& functor) \ - { \ - /* C0 c0_shifted = c0(p_zone.offset); */ \ - /* C1 c1_shifted = c1(p_zone.offset); */ \ - /* ... */ \ - BOOST_PP_REPEAT(N, SHIFT_CURSOR_ZONE, _) \ - \ - auto blockDim = ThreadBlock::toRT(); \ - detail::SphericMapper mapper; \ - using namespace pmacc; \ - PMACC_KERNEL(detail::KernelForeachBlock{})(mapper.cudaGridDim(p_zone.size), blockDim) \ - /* c0_shifted, c1_shifted, ... */ \ - (mapper, BOOST_PP_ENUM(N, SHIFTED_CURSOR, _), functor); \ + } // namespace detail + +#define SHIFT_CURSOR_ZONE(Z, N, _) C##N c##N##_shifted = c##N(p_zone.offset); +#define SHIFTED_CURSOR(Z, N, _) c##N##_shifted + +#define FOREACH_OPERATOR(Z, N, _) \ + /* typename C0, typename C1, ... */ \ + template /* C0 c0, C1 c1, ... */ \ + void operator()(const Zone& p_zone, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), const Functor& functor) \ + { \ + /* C0 c0_shifted = c0(p_zone.offset); */ \ + /* C1 c1_shifted = c1(p_zone.offset); */ \ + /* ... */ \ + BOOST_PP_REPEAT(N, SHIFT_CURSOR_ZONE, _) \ + \ + auto blockDim = ThreadBlock::toRT(); \ + detail::SphericMapper mapper; \ + using namespace pmacc; \ + PMACC_KERNEL(detail::KernelForeachBlock{}) \ + (mapper.cuplaGridDim(p_zone.size), blockDim) /* c0_shifted, c1_shifted, ... */ \ + (mapper, BOOST_PP_ENUM(N, SHIFTED_CURSOR, _), functor); \ } -/** Special foreach algorithm that calls a cuda kernel - * - * Behaves like kernel::Foreach, except that is doesn't shift the cursors cell by cell, but - * shifts them to the top left (front) corner cell of their corresponding cuda block. - * So if BlockDim is 4x4x4 it shifts 64 cursors to (0,0,0), 64 to (4,0,0), 64 to (8,0,0), ... - * - * \tparam BlockDim 3D compile-time vector (pmacc::math::CT::Int) of the size of the cuda blockDim. - * \tparam ThreadBlock ignored - */ -template -struct ForeachBlock -{ - /* operator()(zone, cursor0, cursor1, ..., cursorN-1, functor or lambdaFun) - * - * \param zone Accepts currently only a zone::SphericZone object (e.g. containerObj.zone()) - * \param cursorN cursor for the N-th data source (e.g. containerObj.origin()) - * \param functor or lambdaFun either a functor with N arguments or a N-ary lambda function (e.g. _1 = _2) - * - * It is called like functor(*cursor0(cellId), ..., *cursorN(cellId)) - * - */ - BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), FOREACH_OPERATOR, _) -}; + /** Special foreach algorithm that calls a cupla kernel + * + * Behaves like kernel::Foreach, except that is doesn't shift the cursors cell by cell, but + * shifts them to the top left (front) corner cell of their corresponding cupla block. + * So if BlockDim is 4x4x4 it shifts 64 cursors to (0,0,0), 64 to (4,0,0), 64 to (8,0,0), ... + * + * \tparam BlockDim 3D compile-time vector (pmacc::math::CT::Int) of the size of the cupla blockDim. + * \tparam ThreadBlock ignored + */ + template + struct ForeachBlock + { + /* operator()(zone, cursor0, cursor1, ..., cursorN-1, functor or lambdaFun) + * + * \param zone Accepts currently only a zone::SphericZone object (e.g. containerObj.zone()) + * \param cursorN cursor for the N-th data source (e.g. containerObj.origin()) + * \param functor or lambdaFun either a functor with N arguments or a N-ary lambda function (e.g. _1 = + * _2) + * + * It is called like functor(*cursor0(cellId), ..., *cursorN(cellId)) + * + */ + BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), FOREACH_OPERATOR, _) + }; #undef FOREACH_OPERATOR #undef SHIFT_CURSOR_ZONE #undef SHIFTED_CURSOR -} // kernel -} // algorithm -} // pmacc - + } // namespace kernel + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/kernel/Reduce.hpp b/include/pmacc/cuSTL/algorithm/kernel/Reduce.hpp index 4912835076..77ddaf8a1b 100644 --- a/include/pmacc/cuSTL/algorithm/kernel/Reduce.hpp +++ b/include/pmacc/cuSTL/algorithm/kernel/Reduce.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,38 +28,37 @@ namespace pmacc { -namespace algorithm -{ -namespace kernel -{ - -/** Reduce algorithm that calls a cuda kernel - * - */ -struct Reduce -{ - - /* \param srcCursor Cursor located at the origin of the area of reduce - * \param p_zone Zone of cells spanning the area of reduce - * \param functor Functor with two arguments which returns the result of the reduce operation. - */ - template - typename SrcCursor::ValueType operator()(const SrcCursor& srcCursor, const Zone& p_zone, const NVidiaFunctor& functor) + namespace algorithm { - SrcCursor srcCursor_shifted = srcCursor(p_zone.offset); - - cursor::MapTo1DNavigator myNavi(p_zone.size); + namespace kernel + { + /** Reduce algorithm that calls a cupla kernel + * + */ + struct Reduce + { + /* \param srcCursor Cursor located at the origin of the area of reduce + * \param p_zone Zone of cells spanning the area of reduce + * \param functor Functor with two arguments which returns the result of the reduce operation. + */ + template + typename SrcCursor::ValueType operator()( + const SrcCursor& srcCursor, + const Zone& p_zone, + const NVidiaFunctor& functor) + { + SrcCursor srcCursor_shifted = srcCursor(p_zone.offset); - auto _srcCursor = cursor::make_Cursor(cursor::CursorAccessor(), - myNavi, - srcCursor_shifted); + cursor::MapTo1DNavigator myNavi(p_zone.size); - pmacc::nvidia::reduce::Reduce reduce(1024); - return reduce(functor, _srcCursor, p_zone.size.productOfComponents()); - } + auto _srcCursor + = cursor::make_Cursor(cursor::CursorAccessor(), myNavi, srcCursor_shifted); -}; + pmacc::nvidia::reduce::Reduce reduce(1024); + return reduce(functor, _srcCursor, p_zone.size.productOfComponents()); + } + }; -} // kernel -} // algorithm -} // pmacc + } // namespace kernel + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/kernel/detail/ForeachKernel.hpp b/include/pmacc/cuSTL/algorithm/kernel/detail/ForeachKernel.hpp index 1d2bb38816..d7fc904316 100644 --- a/include/pmacc/cuSTL/algorithm/kernel/detail/ForeachKernel.hpp +++ b/include/pmacc/cuSTL/algorithm/kernel/detail/ForeachKernel.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau +/* Copyright 2013-2021 Heiko Burau * * This file is part of PMacc. * @@ -29,148 +29,116 @@ namespace pmacc { -namespace algorithm -{ -namespace kernel -{ - + namespace algorithm + { + namespace kernel + { #ifndef FOREACH_KERNEL_MAX_PARAMS -#define FOREACH_KERNEL_MAX_PARAMS 4 +# define FOREACH_KERNEL_MAX_PARAMS 4 #endif -namespace detail -{ - -#define SHIFTACCESS_CURSOR(Z, N, _) c ## N [cellIndex] - -#define KERNEL_FOREACH(Z, N, _) \ -/* typename C0, ..., typename CN */ \ -template \ -/* C0 c0, ..., CN cN */ \ -DINLINE void operator()(T_Acc const & acc, Mapper mapper, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), Functor functor) const \ -{ \ - math::Int cellIndex(mapper(acc, dim3(blockIdx), dim3(threadIdx))); \ -/* c0[cellIndex]), ..., cN[cellIndex] */ \ - functor(acc, BOOST_PP_ENUM(N, SHIFTACCESS_CURSOR, _)); \ -} + namespace detail + { +#define SHIFTACCESS_CURSOR(Z, N, _) c##N[cellIndex] + +#define KERNEL_FOREACH(Z, N, _) \ + /* typename C0, ..., typename CN */ \ + template< \ + typename Mapper, \ + BOOST_PP_ENUM_PARAMS(N, typename C), \ + typename Functor, \ + typename T_Acc> /* C0 c0, ..., CN cN */ \ + DINLINE void operator()(T_Acc const& acc, Mapper mapper, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), Functor functor) \ + const \ + { \ + math::Int cellIndex( \ + mapper(acc, cupla::dim3(cupla::blockIdx(acc)), cupla::dim3(cupla::threadIdx(acc)))); \ + /* c0[cellIndex]), ..., cN[cellIndex] */ \ + functor(acc, BOOST_PP_ENUM(N, SHIFTACCESS_CURSOR, _)); \ + } -struct KernelForeach -{ -BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), KERNEL_FOREACH, _) -}; + struct KernelForeach + { + BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), KERNEL_FOREACH, _) + }; #undef KERNEL_FOREACH #undef SHIFTACCESS_CURSOR -struct KernelForeachLockstep -{ - /** call functor - * - * Each argument is shifted to the origin of the block before it is passed - * to the functor. - */ - template< - typename T_Acc, - typename T_Mapper, - typename T_Functor, - typename... T_Args> - ALPAKA_FN_ACC void operator()( - T_Acc const & acc, - T_Mapper const mapper, - T_Functor functor, - T_Args ... args - ) const - { - // map to the origin of the block - math::Int< - T_Mapper::dim - > cellIndex( - mapper( - acc, - dim3( blockIdx ), - dim3( - 0, - 0, - 0 - ) - ) - ); - - functor( - acc, - args[ cellIndex ]... - ); - } -}; - -namespace RT -{ - /** Run a cuSTL KernelForeach - * - * Allow to run the cuSTL foreach with runtime block sizes. - * @warning collective functors which contain synchronization are not supported - */ - struct KernelForeachLockstep - { - /** call functor - * - * Each argument is shifted to the origin of the block before it is passed - * to the functor. - */ - template< - typename T_Acc, - typename T_Mapper, - typename T_BlockSize, - typename T_Functor, - typename... T_Args> - ALPAKA_FN_ACC void operator()( - T_Acc const & acc, - T_Mapper const mapper, - T_BlockSize const blockSize, - T_Functor functor, - T_Args ... args - ) const - { - /* KernelForeachLockstep is always called as kernel with three dimensions - * therefore we have to reduce the dimension if the mapper is only 2D or 1D. - */ - auto const blockSizeShrinked = blockSize.template shrink< T_Mapper::dim >( ); - uint32_t const domainElementCount = blockSizeShrinked.productOfComponents(); - DataSpace< T_Mapper::dim > const domainSize( blockSizeShrinked ); - - // map to the origin of the block - math::Int< - T_Mapper::dim - > blockCellOffset( - mapper( - acc, - domainSize.toDim3(), - dim3( blockIdx ), - dim3( - 0, - 0, - 0 - ) - ) - ); - - - - for( uint32_t i = threadIdx.x; i < domainElementCount; i += blockDim.x ) - { - auto const inBlockOffset = DataSpaceOperations< T_Mapper::dim >::map( - domainSize, - i - ); - auto const cellOffset = blockCellOffset + inBlockOffset; - functor( - acc, - args[ cellOffset ]... - ); - } - } - }; -} // namespace RT -} // namespace detail -} // namespace kernel -} // namespace algorithm + struct KernelForeachLockstep + { + /** call functor + * + * Each argument is shifted to the origin of the block before it is passed + * to the functor. + */ + template + ALPAKA_FN_ACC void operator()( + T_Acc const& acc, + T_Mapper const mapper, + T_Functor functor, + T_Args... args) const + { + // map to the origin of the block + math::Int cellIndex( + mapper(acc, cupla::dim3(cupla::blockIdx(acc)), cupla::dim3(0, 0, 0))); + + functor(acc, args[cellIndex]...); + } + }; + + namespace RT + { + /** Run a cuSTL KernelForeach + * + * Allow to run the cuSTL foreach with runtime block sizes. + * @warning collective functors which contain synchronization are not supported + */ + struct KernelForeachLockstep + { + /** call functor + * + * Each argument is shifted to the origin of the block before it is passed + * to the functor. + */ + template< + typename T_Acc, + typename T_Mapper, + typename T_BlockSize, + typename T_Functor, + typename... T_Args> + ALPAKA_FN_ACC void operator()( + T_Acc const& acc, + T_Mapper const mapper, + T_BlockSize const blockSize, + T_Functor functor, + T_Args... args) const + { + /* KernelForeachLockstep is always called as kernel with three dimensions + * therefore we have to reduce the dimension if the mapper is only 2D or 1D. + */ + auto const blockSizeShrinked = blockSize.template shrink(); + uint32_t const domainElementCount = blockSizeShrinked.productOfComponents(); + DataSpace const domainSize(blockSizeShrinked); + + // map to the origin of the block + math::Int blockCellOffset(mapper( + acc, + domainSize.toDim3(), + cupla::dim3(cupla::blockIdx(acc)), + cupla::dim3(0, 0, 0))); + + + for(uint32_t i = cupla::threadIdx(acc).x; i < domainElementCount; + i += cupla::blockDim(acc).x) + { + auto const inBlockOffset = DataSpaceOperations::map(domainSize, i); + auto const cellOffset = blockCellOffset + inBlockOffset; + functor(acc, args[cellOffset]...); + } + } + }; + } // namespace RT + } // namespace detail + } // namespace kernel + } // namespace algorithm } // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/kernel/detail/SphericMapper.hpp b/include/pmacc/cuSTL/algorithm/kernel/detail/SphericMapper.hpp index 658c0cdf07..1117df1906 100644 --- a/include/pmacc/cuSTL/algorithm/kernel/detail/SphericMapper.hpp +++ b/include/pmacc/cuSTL/algorithm/kernel/detail/SphericMapper.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -28,295 +28,246 @@ namespace pmacc { -namespace algorithm -{ -namespace kernel -{ -namespace detail -{ - -namespace mpl = boost::mpl; - -/** The SphericMapper maps from cuda blockIdx and/or threadIdx to the cell index - * \tparam dim dimension - * \tparam BlockSize compile-time vector of the cuda block size (optional) - * \tparam dummy neccesary to implement the optional BlockSize parameter - * - * If BlockSize is given the cuda variable blockDim is not used which is faster. - */ -template -struct SphericMapper; - -/* Compile-time BlockSize */ - -template -struct SphericMapper<1, BlockSize> -{ - static constexpr int dim = 1; - - typename math::Size_t<3>::BaseType - cudaGridDim(const math::Size_t<1>& size) const - { - return math::Size_t<3>( - size.x() / BlockSize::x::value, - 1u, - 1u - ); - } - - template< typename T_Acc > - HDINLINE - math::Int<1> operator()( - T_Acc const & acc, - const math::Int<1>& _blockIdx, - const math::Int<1>& _threadIdx - ) const - { - return _blockIdx.x() * BlockSize::x::value + _threadIdx.x(); - } - - template< typename T_Acc > - HDINLINE - math::Int<1> operator()( - T_Acc const & acc, - const dim3& _blockIdx, - const dim3& _threadIdx = dim3(0,0,0) - ) const - { - return operator()( - acc, - math::Int<1>((int)_blockIdx.x), - math::Int<1>((int)_threadIdx.x) - ); - } -}; - -template -struct SphericMapper<2, BlockSize> -{ - static constexpr int dim = 2; - - typename math::Size_t<3>::BaseType - cudaGridDim(const math::Size_t<2>& size) const - { - return math::Size_t<3>( - size.x() / BlockSize::x::value, - size.y() / BlockSize::y::value, - 1u - ); - } - - template< typename T_Acc > - HDINLINE - math::Int<2> operator()( - T_Acc const & acc, - const math::Int<2>& _blockIdx, - const math::Int<2>& _threadIdx - ) const - { - return math::Int<2>( _blockIdx.x() * BlockSize::x::value + _threadIdx.x(), - _blockIdx.y() * BlockSize::y::value + _threadIdx.y() ); - } - - template< typename T_Acc > - HDINLINE - math::Int<2> operator()( - T_Acc const & acc, - const dim3& _blockIdx, - const dim3& _threadIdx = dim3(0,0,0) - ) const - { - return operator()( - acc, - math::Int<2>(_blockIdx.x, _blockIdx.y), - math::Int<2>(_threadIdx.x, _threadIdx.y) - ); - } -}; - -template -struct SphericMapper<3, BlockSize> -{ - static constexpr int dim = 3; - - typename math::Size_t<3>::BaseType - cudaGridDim(const math::Size_t<3>& size) const - { - return math::Size_t<3>( - size.x() / BlockSize::x::value, - size.y() / BlockSize::y::value, - size.z() / BlockSize::z::value - ); - } - - template< typename T_Acc > - HDINLINE - math::Int<3> operator()( - T_Acc const & acc, - const math::Int<3>& _blockIdx, - const math::Int<3>& _threadIdx - ) const - { - return math::Int<3>( _blockIdx * (math::Int<3>)BlockSize().toRT() + _threadIdx ); - } - - template< typename T_Acc > - HDINLINE - math::Int<3> operator()( - T_Acc const & acc, - const dim3& _blockIdx, - const dim3& _threadIdx = dim3(0,0,0) - ) const - { - return operator()( - acc, - math::Int<3>(_blockIdx.x, _blockIdx.y, _blockIdx.z), - math::Int<3>(_threadIdx.x, _threadIdx.y, _threadIdx.z) - ); - } -}; - -/* Runtime BlockSize */ - -template<> -struct SphericMapper<1, mpl::void_> -{ - static constexpr int dim = 1; - - typename math::Size_t<3>::BaseType - cudaGridDim(const math::Size_t<1>& size, const math::Size_t<3>& blockSize) const - { - return math::Size_t<3>( - size.x() / blockSize.x(), - 1u, - 1u - ); - } - - template< typename T_Acc > - DINLINE - math::Int<1> operator()( - T_Acc const & acc, - const math::Int<1>& _blockDim, - const math::Int<1>& _blockIdx, - const math::Int<1>& _threadIdx - ) const - { - return _blockIdx.x() * _blockDim.x() + _threadIdx.x(); - } - - template< typename T_Acc > - DINLINE - math::Int<1> operator()( - T_Acc const & acc, - const dim3& _blockDim, - const dim3& _blockIdx, - const dim3& _threadIdx - ) const - { - return operator()( - acc, - math::Int<1>((int)_blockDim.x), - math::Int<1>((int)_blockIdx.x), - math::Int<1>((int)_threadIdx.x) - ); - } -}; - -template<> -struct SphericMapper<2, mpl::void_> -{ - static constexpr int dim = 2; - - typename math::Size_t<3>::BaseType - cudaGridDim(const math::Size_t<2>& size, const math::Size_t<3>& blockSize) const - { - return math::Size_t<3>( - size.x() / blockSize.x(), - size.y() / blockSize.y(), - 1 - ); - } - - template< typename T_Acc > - DINLINE - math::Int<2> operator()( - T_Acc const & acc, - const math::Int<2>& _blockDim, - const math::Int<2>& _blockIdx, - const math::Int<2>& _threadIdx - ) const - { - return math::Int<2>( _blockIdx.x() * _blockDim.x() + _threadIdx.x(), - _blockIdx.y() * _blockDim.y() + _threadIdx.y() ); - } - - template< typename T_Acc > - DINLINE - math::Int<2> operator()( - T_Acc const & acc, - const dim3& _blockDim, - const dim3& _blockIdx, - const dim3& _threadIdx - ) const - { - return operator()( - acc, - math::Int<2>(_blockDim.x, _blockDim.y), - math::Int<2>(_blockIdx.x, _blockIdx.y), - math::Int<2>(_threadIdx.x, _threadIdx.y) - ); - } -}; - -template<> -struct SphericMapper<3, mpl::void_> -{ - static constexpr int dim = 3; - - typename math::Size_t<3>::BaseType - cudaGridDim(const math::Size_t<3>& size, const math::Size_t<3>& blockSize) const - { - return math::Size_t<3>( - size.x() / blockSize.x(), - size.y() / blockSize.y(), - size.z() / blockSize.z() - ); - } - - template< typename T_Acc > - DINLINE - math::Int<3> operator()( - T_Acc const & acc, - const math::Int<3>& _blockDim, - const math::Int<3>& _blockIdx, - const math::Int<3>& _threadIdx - ) const - { - return math::Int<3>( _blockIdx.x() * _blockDim.x() + _threadIdx.x(), - _blockIdx.y() * _blockDim.y() + _threadIdx.y(), - _blockIdx.z() * _blockDim.z() + _threadIdx.z() ); - } - - template< typename T_Acc > - DINLINE - math::Int<3> operator()( - T_Acc const & acc, - const dim3& _blockDim, - const dim3& _blockIdx, - const dim3& _threadIdx - ) const + namespace algorithm { - return operator()( - acc, - math::Int<3>(_blockDim.x, _blockDim.y, _blockDim.z), - math::Int<3>(_blockIdx.x, _blockIdx.y, _blockIdx.z), - math::Int<3>(_threadIdx.x, _threadIdx.y, _threadIdx.z) - ); - } -}; - -} // detail -} // kernel -} // algorithm -} // pmacc + namespace kernel + { + namespace detail + { + namespace mpl = boost::mpl; + + /** The SphericMapper maps from cupla blockIdx and/or threadIdx to the cell index + * \tparam dim dimension + * \tparam BlockSize compile-time vector of the cupla block size (optional) + * \tparam dummy neccesary to implement the optional BlockSize parameter + * + * If BlockSize is given the cupla variable blockDim is not used which is faster. + */ + template + struct SphericMapper; + + /* Compile-time BlockSize */ + + template + struct SphericMapper<1, BlockSize> + { + static constexpr int dim = 1; + + typename math::Size_t<3>::BaseType cuplaGridDim(const math::Size_t<1>& size) const + { + return math::Size_t<3>(size.x() / BlockSize::x::value, 1u, 1u); + } + + template + HDINLINE math::Int<1> operator()( + T_Acc const& acc, + const math::Int<1>& _blockIdx, + const math::Int<1>& _threadIdx) const + { + return _blockIdx.x() * BlockSize::x::value + _threadIdx.x(); + } + + template + HDINLINE math::Int<1> operator()( + T_Acc const& acc, + const cupla::dim3& _blockIdx, + const cupla::dim3& _threadIdx = cupla::dim3(0, 0, 0)) const + { + return operator()(acc, math::Int<1>((int) _blockIdx.x), math::Int<1>((int) _threadIdx.x)); + } + }; + + template + struct SphericMapper<2, BlockSize> + { + static constexpr int dim = 2; + + typename math::Size_t<3>::BaseType cuplaGridDim(const math::Size_t<2>& size) const + { + return math::Size_t<3>(size.x() / BlockSize::x::value, size.y() / BlockSize::y::value, 1u); + } + + template + HDINLINE math::Int<2> operator()( + T_Acc const& acc, + const math::Int<2>& _blockIdx, + const math::Int<2>& _threadIdx) const + { + return math::Int<2>( + _blockIdx.x() * BlockSize::x::value + _threadIdx.x(), + _blockIdx.y() * BlockSize::y::value + _threadIdx.y()); + } + + template + HDINLINE math::Int<2> operator()( + T_Acc const& acc, + const cupla::dim3& _blockIdx, + const cupla::dim3& _threadIdx = cupla::dim3(0, 0, 0)) const + { + return operator()( + acc, + math::Int<2>(_blockIdx.x, _blockIdx.y), + math::Int<2>(_threadIdx.x, _threadIdx.y)); + } + }; + + template + struct SphericMapper<3, BlockSize> + { + static constexpr int dim = 3; + + typename math::Size_t<3>::BaseType cuplaGridDim(const math::Size_t<3>& size) const + { + return math::Size_t<3>( + size.x() / BlockSize::x::value, + size.y() / BlockSize::y::value, + size.z() / BlockSize::z::value); + } + + template + HDINLINE math::Int<3> operator()( + T_Acc const& acc, + const math::Int<3>& _blockIdx, + const math::Int<3>& _threadIdx) const + { + return math::Int<3>(_blockIdx * (math::Int<3>) BlockSize().toRT() + _threadIdx); + } + + template + HDINLINE math::Int<3> operator()( + T_Acc const& acc, + const cupla::dim3& _blockIdx, + const cupla::dim3& _threadIdx = cupla::dim3(0, 0, 0)) const + { + return operator()( + acc, + math::Int<3>(_blockIdx.x, _blockIdx.y, _blockIdx.z), + math::Int<3>(_threadIdx.x, _threadIdx.y, _threadIdx.z)); + } + }; + + /* Runtime BlockSize */ + + template<> + struct SphericMapper<1, mpl::void_> + { + static constexpr int dim = 1; + + typename math::Size_t<3>::BaseType cuplaGridDim( + const math::Size_t<1>& size, + const math::Size_t<3>& blockSize) const + { + return math::Size_t<3>(size.x() / blockSize.x(), 1u, 1u); + } + + template + DINLINE math::Int<1> operator()( + T_Acc const& acc, + const math::Int<1>& _blockDim, + const math::Int<1>& _blockIdx, + const math::Int<1>& _threadIdx) const + { + return _blockIdx.x() * _blockDim.x() + _threadIdx.x(); + } + + template + DINLINE math::Int<1> operator()( + T_Acc const& acc, + const cupla::dim3& _blockDim, + const cupla::dim3& _blockIdx, + const cupla::dim3& _threadIdx) const + { + return operator()( + acc, + math::Int<1>((int) _blockDim.x), + math::Int<1>((int) _blockIdx.x), + math::Int<1>((int) _threadIdx.x)); + } + }; + + template<> + struct SphericMapper<2, mpl::void_> + { + static constexpr int dim = 2; + + typename math::Size_t<3>::BaseType cuplaGridDim( + const math::Size_t<2>& size, + const math::Size_t<3>& blockSize) const + { + return math::Size_t<3>(size.x() / blockSize.x(), size.y() / blockSize.y(), 1); + } + + template + DINLINE math::Int<2> operator()( + T_Acc const& acc, + const math::Int<2>& _blockDim, + const math::Int<2>& _blockIdx, + const math::Int<2>& _threadIdx) const + { + return math::Int<2>( + _blockIdx.x() * _blockDim.x() + _threadIdx.x(), + _blockIdx.y() * _blockDim.y() + _threadIdx.y()); + } + + template + DINLINE math::Int<2> operator()( + T_Acc const& acc, + const cupla::dim3& _blockDim, + const cupla::dim3& _blockIdx, + const cupla::dim3& _threadIdx) const + { + return operator()( + acc, + math::Int<2>(_blockDim.x, _blockDim.y), + math::Int<2>(_blockIdx.x, _blockIdx.y), + math::Int<2>(_threadIdx.x, _threadIdx.y)); + } + }; + + template<> + struct SphericMapper<3, mpl::void_> + { + static constexpr int dim = 3; + + typename math::Size_t<3>::BaseType cuplaGridDim( + const math::Size_t<3>& size, + const math::Size_t<3>& blockSize) const + { + return math::Size_t<3>( + size.x() / blockSize.x(), + size.y() / blockSize.y(), + size.z() / blockSize.z()); + } + + template + DINLINE math::Int<3> operator()( + T_Acc const& acc, + const math::Int<3>& _blockDim, + const math::Int<3>& _blockIdx, + const math::Int<3>& _threadIdx) const + { + return math::Int<3>( + _blockIdx.x() * _blockDim.x() + _threadIdx.x(), + _blockIdx.y() * _blockDim.y() + _threadIdx.y(), + _blockIdx.z() * _blockDim.z() + _threadIdx.z()); + } + + template + DINLINE math::Int<3> operator()( + T_Acc const& acc, + const cupla::dim3& _blockDim, + const cupla::dim3& _blockIdx, + const cupla::dim3& _threadIdx) const + { + return operator()( + acc, + math::Int<3>(_blockDim.x, _blockDim.y, _blockDim.z), + math::Int<3>(_blockIdx.x, _blockIdx.y, _blockIdx.z), + math::Int<3>(_threadIdx.x, _threadIdx.y, _threadIdx.z)); + } + }; + + } // namespace detail + } // namespace kernel + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/kernel/run-time/Foreach.hpp b/include/pmacc/cuSTL/algorithm/kernel/run-time/Foreach.hpp index be0604789d..34015e212d 100644 --- a/include/pmacc/cuSTL/algorithm/kernel/run-time/Foreach.hpp +++ b/include/pmacc/cuSTL/algorithm/kernel/run-time/Foreach.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Alexander Grund +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -45,152 +45,155 @@ namespace pmacc { -namespace algorithm -{ -namespace kernel -{ -namespace RT -{ - -/** Heuristic maximum threads per block and per axis - * in agreement to sm_2.x - sm_5.3 - * - * These values don't fully exploit the limits from the cuda specification - * but they give reasonable speed. - */ -template -struct MaxCudaBlockDim; - -template<> -struct MaxCudaBlockDim -{ - typedef math::CT::Size_t<1024, 1, 1> type; -}; - -template<> -struct MaxCudaBlockDim -{ - typedef math::CT::Size_t<32, 32, 1> type; -}; - -template<> -struct MaxCudaBlockDim -{ - typedef math::CT::Size_t<8, 8, 8> type; -}; - -/** Check if MaxCudaBlockDim holds the cuda specification limits - * - * @cond - */ -PMACC_CASSERT_MSG(_cuda_blockDim_exceeds_maximum_number_of_threads_per_block, - math::CT::volume::type >::type::value <= cudaSpecs::maxNumThreadsPerBlock); -PMACC_CASSERT_MSG(_cuda_blockDim_exceeds_maximum_number_of_threads_per_block, - math::CT::volume::type >::type::value <= cudaSpecs::maxNumThreadsPerBlock); -PMACC_CASSERT_MSG(_cuda_blockDim_exceeds_maximum_number_of_threads_per_block, - math::CT::volume::type >::type::value <= cudaSpecs::maxNumThreadsPerBlock); -/** @endcond */ - -/** Return a suitable cuda blockDim for a given gridDimension. - * - * @param gridDimension 1D, 2D or 3D grid size - * @return cuda blockDim - */ -template -math::Size_t getBestCudaBlockDim(const math::Size_t gridDimension) -{ - math::Size_t result = math::Size_t::create(1); - - /* The greatest common divisor of each component of the volume size - * and a certain power of two value yield the best suitable block size */ - const math::Size_t maxThreads = - MaxCudaBlockDim::type::toRT(); /* max threads per axis */ - for(int i = 0; i < dim; i++) + namespace algorithm { - result[i] = boost::integer::gcd(gridDimension[i], maxThreads[i]); - } - - return result; -} + namespace kernel + { + namespace RT + { + /** Heuristic maximum threads per block and per axis + * in agreement to sm_2.x - sm_5.3 + * + * These values don't fully exploit the limits from the cupla specification + * but they give reasonable speed. + */ + template + struct MaxCudaBlockDim; + + template<> + struct MaxCudaBlockDim + { + typedef math::CT::Size_t<1024, 1, 1> type; + }; + + template<> + struct MaxCudaBlockDim + { + typedef math::CT::Size_t<32, 32, 1> type; + }; + + template<> + struct MaxCudaBlockDim + { + typedef math::CT::Size_t<8, 8, 8> type; + }; + + /** Check if MaxCudaBlockDim holds the cupla specification limits + * + * @cond + */ + PMACC_CASSERT_MSG( + _cupla_blockDim_exceeds_maximum_number_of_threads_per_block, + math::CT::volume::type>::type::value + <= cudaSpecs::maxNumThreadsPerBlock); + PMACC_CASSERT_MSG( + _cupla_blockDim_exceeds_maximum_number_of_threads_per_block, + math::CT::volume::type>::type::value + <= cudaSpecs::maxNumThreadsPerBlock); + PMACC_CASSERT_MSG( + _cupla_blockDim_exceeds_maximum_number_of_threads_per_block, + math::CT::volume::type>::type::value + <= cudaSpecs::maxNumThreadsPerBlock); + /** @endcond */ + + /** Return a suitable cupla blockDim for a given gridDimension. + * + * @param gridDimension 1D, 2D or 3D grid size + * @return cupla blockDim + */ + template + math::Size_t getBestCudaBlockDim(const math::Size_t gridDimension) + { + math::Size_t result = math::Size_t::create(1); + + /* The greatest common divisor of each component of the volume size + * and a certain power of two value yield the best suitable block size */ + const math::Size_t maxThreads + = MaxCudaBlockDim::type::toRT(); /* max threads per axis */ + for(int i = 0; i < dim; i++) + { + result[i] = boost::integer::gcd(gridDimension[i], maxThreads[i]); + } + + return result; + } #ifndef FOREACH_KERNEL_MAX_PARAMS -#define FOREACH_KERNEL_MAX_PARAMS 4 +# define FOREACH_KERNEL_MAX_PARAMS 4 #endif -#define SHIFT_CURSOR_ZONE(Z, N, _) C ## N c ## N ## _shifted = c ## N (p_zone.offset); -#define SHIFTED_CURSOR(Z, N, _) c ## N ## _shifted +#define SHIFT_CURSOR_ZONE(Z, N, _) C##N c##N##_shifted = c##N(p_zone.offset); +#define SHIFTED_CURSOR(Z, N, _) c##N##_shifted -#define FOREACH_OPERATOR(Z, N, _) \ - /* typename C0, ..., typename CN */ \ - template \ - /* C0 c0, ..., CN cN */ \ - void operator()(const Zone& p_zone, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), const Functor& functor) \ - { \ +#define FOREACH_OPERATOR(Z, N, _) \ + /* typename C0, ..., typename CN */ \ + template /* C0 c0, ..., CN cN */ \ + void operator()(const Zone& p_zone, BOOST_PP_ENUM_BINARY_PARAMS(N, C, c), const Functor& functor) \ + { \ /* C0 c0_shifted = c0(p_zone.offset); ...; CN cN_shifted = cN(p_zone.offset); */ \ - BOOST_PP_REPEAT(N, SHIFT_CURSOR_ZONE, _) \ - \ - if(this->_blockDim == math::Size_t::create(0)) \ - this->_blockDim = getBestCudaBlockDim(p_zone.size); \ - \ - PMACC_VERIFY(this->_blockDim.productOfComponents() <= cudaSpecs::maxNumThreadsPerBlock); \ - PMACC_VERIFY(this->_blockDim.x() <= cudaSpecs::MaxNumThreadsPerBlockDim::x::value); \ - PMACC_VERIFY(this->_blockDim.y() <= cudaSpecs::MaxNumThreadsPerBlockDim::y::value); \ - PMACC_VERIFY(this->_blockDim.z() <= cudaSpecs::MaxNumThreadsPerBlockDim::z::value); \ - \ - typename math::Size_t<3>::BaseType blockSize( \ - this->_blockDim.x(), \ - this->_blockDim.y(), \ - this->_blockDim.z() \ - ); \ - uint32_t numWorkers = traits::GetNumWorkers< cudaSpecs::MaxNumThreadsPerBlockDim::x::value >::value; \ - if( numWorkers > blockSize.productOfComponents() ) \ - numWorkers = blockSize.productOfComponents(); \ - kernel::detail::SphericMapper mapper; \ - using namespace pmacc; \ - PMACC_KERNEL(kernel::detail::RT::KernelForeachLockstep{})(mapper.cudaGridDim(p_zone.size, this->_blockDim), numWorkers) \ - /* c0_shifted, ..., cN_shifted */ \ - (mapper, blockSize, functor, BOOST_PP_ENUM(N, SHIFTED_CURSOR, _)); \ + BOOST_PP_REPEAT(N, SHIFT_CURSOR_ZONE, _) \ + \ + if(this->_blockDim == math::Size_t::create(0)) \ + this->_blockDim = getBestCudaBlockDim(p_zone.size); \ + \ + PMACC_VERIFY(this->_blockDim.productOfComponents() <= cudaSpecs::maxNumThreadsPerBlock); \ + PMACC_VERIFY(this->_blockDim.x() <= cudaSpecs::MaxNumThreadsPerBlockDim::x::value); \ + PMACC_VERIFY(this->_blockDim.y() <= cudaSpecs::MaxNumThreadsPerBlockDim::y::value); \ + PMACC_VERIFY(this->_blockDim.z() <= cudaSpecs::MaxNumThreadsPerBlockDim::z::value); \ + \ + typename math::Size_t<3>::BaseType blockSize(this->_blockDim.x(), this->_blockDim.y(), this->_blockDim.z()); \ + uint32_t numWorkers = traits::GetNumWorkers::value; \ + if(numWorkers > blockSize.productOfComponents()) \ + numWorkers = blockSize.productOfComponents(); \ + kernel::detail::SphericMapper mapper; \ + using namespace pmacc; \ + PMACC_KERNEL(kernel::detail::RT::KernelForeachLockstep{}) \ + (mapper.cuplaGridDim(p_zone.size, this->_blockDim), numWorkers) /* c0_shifted, ..., cN_shifted */ \ + (mapper, blockSize, functor, BOOST_PP_ENUM(N, SHIFTED_CURSOR, _)); \ } -/** Foreach algorithm that calls a cuda kernel - * - * This is the run-time version of kernel::Foreach where the - * cuda blockDim is specified in the constructor - * - * @warning collective functors (containing synchronization) are not supported - */ -struct Foreach -{ - math::Size_t _blockDim; - - /* \param _blockDim size of the cuda blockDim. - * - * blockDim has to fit into the computing volume. - * E.g. (8,8,4) fits into (256, 256, 256) - * - * If no argument is given, the blockDim will be computed heuristically. - * - */ - Foreach(math::Size_t _blockDim = math::Size_t::create(0)) : _blockDim(_blockDim) {} - - /* operator()(zone, cursor0, cursor1, ..., cursorN-1, functor or lambdaFun) - * - * \param zone Accepts currently only a zone::SphericZone object (e.g. containerObj.zone()) - * \param cursorN cursor for the N-th data source (e.g. containerObj.origin()) - * \param functor or lambdaFun either a functor with N arguments or a N-ary lambda function (e.g. _1 = _2) - * - * The functor or lambdaFun is called for each cell within the zone. - * It is called like functor(*cursor0(cellId), ..., *cursorN(cellId)) - * - */ - BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), FOREACH_OPERATOR, _) -}; + /** Foreach algorithm that calls a cupla kernel + * + * This is the run-time version of kernel::Foreach where the + * cupla blockDim is specified in the constructor + * + * @warning collective functors (containing synchronization) are not supported + */ + struct Foreach + { + math::Size_t _blockDim; + + /* \param _blockDim size of the cupla blockDim. + * + * blockDim has to fit into the computing volume. + * E.g. (8,8,4) fits into (256, 256, 256) + * + * If no argument is given, the blockDim will be computed heuristically. + * + */ + Foreach(math::Size_t _blockDim = math::Size_t::create(0)) : _blockDim(_blockDim) + { + } + + /* operator()(zone, cursor0, cursor1, ..., cursorN-1, functor or lambdaFun) + * + * \param zone Accepts currently only a zone::SphericZone object (e.g. containerObj.zone()) + * \param cursorN cursor for the N-th data source (e.g. containerObj.origin()) + * \param functor or lambdaFun either a functor with N arguments or a N-ary lambda function (e.g. + * _1 = _2) + * + * The functor or lambdaFun is called for each cell within the zone. + * It is called like functor(*cursor0(cellId), ..., *cursorN(cellId)) + * + */ + BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(FOREACH_KERNEL_MAX_PARAMS), FOREACH_OPERATOR, _) + }; #undef FOREACH_OPERATOR #undef SHIFT_CURSOR_ZONE #undef SHIFTED_CURSOR -} // namespace RT -} // namespace kernel -} // namespace algorithm + } // namespace RT + } // namespace kernel + } // namespace algorithm } // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/mpi/Gather.hpp b/include/pmacc/cuSTL/algorithm/mpi/Gather.hpp index c49830c37b..b045a7f7b0 100644 --- a/include/pmacc/cuSTL/algorithm/mpi/Gather.hpp +++ b/include/pmacc/cuSTL/algorithm/mpi/Gather.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau +/* Copyright 2013-2021 Heiko Burau * * This file is part of PMacc. * @@ -29,48 +29,60 @@ namespace pmacc { -namespace algorithm -{ -namespace mpi -{ - -/** - */ -template -class Gather -{ -private: - MPI_Comm comm; - std::vector > positions; - bool m_participate; - - struct CopyToDest + namespace algorithm { - template - void operator()(const Gather& gather, + namespace mpi + { + /** + */ + template + class Gather + { + private: + MPI_Comm comm; + std::vector> positions; + bool m_participate; + + struct CopyToDest + { + template + void operator()( + const Gather& gather, container::CartBuffer& dest, std::vector& tmpDest, int dir, - const std::vector >& srcSizes, + const std::vector>& srcSizes, const std::vector& srcOffsets) const; - }; + }; -public: - Gather(const zone::SphericZone& p_zone); - ~Gather(); + public: + Gather(const zone::SphericZone& p_zone); + ~Gather(); - template - void operator()(container::CartBuffer& dest, + template< + typename Type, + int memDim, + class T_Alloc, + class T_Copy, + class T_Assign, + class T_Alloc2, + class T_Copy2, + class T_Assign2> + void operator()( + container::CartBuffer& dest, container::CartBuffer& source, int dir = -1) const; - inline bool participate() const {return m_participate;} - inline bool root() const; - inline int rank() const; -}; + inline bool participate() const + { + return m_participate; + } + inline bool root() const; + inline int rank() const; + }; -} // mpi -} // algorithm -} // pmacc + } // namespace mpi + } // namespace algorithm +} // namespace pmacc #include "Gather.tpp" diff --git a/include/pmacc/cuSTL/algorithm/mpi/Gather.tpp b/include/pmacc/cuSTL/algorithm/mpi/Gather.tpp index 3f4e92ce02..5a6ebaaffd 100644 --- a/include/pmacc/cuSTL/algorithm/mpi/Gather.tpp +++ b/include/pmacc/cuSTL/algorithm/mpi/Gather.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Benjamin Worpitz, Alexander Grund +/* Copyright 2013-2021 Heiko Burau, Benjamin Worpitz, Alexander Grund * * This file is part of PMacc. * @@ -27,262 +27,288 @@ #include "pmacc/communication/manager_common.hpp" #include -#include // std::partial_sum -#include // std::copy +#include // std::partial_sum +#include // std::copy namespace pmacc { -namespace algorithm -{ -namespace mpi -{ - -namespace GatherHelper -{ - -template -struct ContiguousPitch -{ - math::Size_t operator()(const math::Size_t& size) - { - math::Size_t pitch; - - pitch[0] = size[0] * sizeof(Type); - for(int axis = 1; axis < dim-1; axis++) - pitch[axis] = pitch[axis-1] * size[axis]; - - return pitch; - } -}; - -template -struct ContiguousPitch -{ - math::Size_t<0> operator()(const math::Size_t&) - { - return math::Size_t<0>(); - } -}; - -} // namespace GatherHelper - -template -Gather::Gather(const zone::SphericZone& p_zone) : comm(MPI_COMM_NULL) -{ - using namespace pmacc::math; - - pmacc::GridController& con = pmacc::Environment::get().GridController(); - Int pos = con.getPosition(); - - int numWorldRanks; MPI_Comm_size(MPI_COMM_WORLD, &numWorldRanks); - std::vector > allPositions(numWorldRanks); - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Allgather(static_cast(&pos), sizeof(Int), MPI_CHAR, - static_cast(allPositions.data()), sizeof(Int), MPI_CHAR, - MPI_COMM_WORLD)); - - std::vector new_ranks; - int myWorldId; MPI_Comm_rank(MPI_COMM_WORLD, &myWorldId); - - this->m_participate = false; - for(int i = 0; i < static_cast(allPositions.size()); i++) - { - Int pos = allPositions[i]; - if(!p_zone.within(pos)) continue; - - new_ranks.push_back(i); - this->positions.push_back(allPositions[i]); - if(i == myWorldId) this->m_participate = true; - } - MPI_Group world_group, new_group; - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &world_group)); - MPI_CHECK(MPI_Group_incl(world_group, new_ranks.size(), new_ranks.data(), &new_group)); - MPI_CHECK(MPI_Comm_create(MPI_COMM_WORLD, new_group, &this->comm)); - MPI_CHECK(MPI_Group_free(&new_group)); -} - -template -Gather::~Gather() -{ - if(this->comm != MPI_COMM_NULL) - { - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK_NO_EXCEPT(MPI_Comm_free(&this->comm)); - } -} - -template -bool Gather::root() const -{ - if(!this->m_participate) - { - std::cerr << "error[mpi::Gather::root()]: this process does not participate in gathering.\n"; - return false; - } - int myId; MPI_Comm_rank(this->comm, &myId); - return myId == 0; -} - -template -int Gather::rank() const -{ - if(!this->m_participate) - { - std::cerr << "error[mpi::Gather::rank()]: this process does not participate in gathering.\n"; - return -1; - } - int myId; MPI_Comm_rank(this->comm, &myId); - return myId; -} - -template -template -void Gather::CopyToDest::operator()( - const Gather& gather, - container::CartBuffer& dest, - std::vector& tmpDest, - int dir, - const std::vector >& srcSizes, - const std::vector& srcOffsets1D) const -{ - using namespace math; - - int numRanks = static_cast(gather.positions.size()); - - // calculate sizes per axis in destination buffer - std::vector sizesPerAxis[memDim]; - - // sizes per axis - for(int i = 0; i < numRanks; i++) - { - Int pos = gather.positions[i]; - Int posInMem = pos.template shrink(dir+1); - for(int axis = 0; axis < memDim; axis++) - { - size_t posOnAxis = static_cast(posInMem[axis]); - if(posOnAxis >= sizesPerAxis[axis].size()) - sizesPerAxis[axis].resize(posOnAxis + 1); - sizesPerAxis[axis][posOnAxis] = srcSizes[i][axis]; - } - } - - // calculate offsets per axis in destination buffer - std::vector offsetsPerAxis[memDim]; - - // offsets per axis - for(int axis = 0; axis < memDim; axis++) - { - offsetsPerAxis[axis].resize(sizesPerAxis[axis].size()); - std::vector partialSum(offsetsPerAxis[axis].size()); - std::partial_sum(sizesPerAxis[axis].begin(), sizesPerAxis[axis].end(), partialSum.begin()); - offsetsPerAxis[axis][0] = 0; - std::copy(partialSum.begin(), partialSum.end()-1, offsetsPerAxis[axis].begin()+1); - } - - // copy from one dimensional mpi buffer to n dimensional destination buffer - for(int i = 0; i < numRanks; i++) - { - Int pos = gather.positions[i]; - Int posInMem = pos.template shrink(dir+1); - Int ndim_offset; - for(int axis = 0; axis < memDim; axis++) - ndim_offset[axis] = offsetsPerAxis[axis][posInMem[axis]]; - - // calculate srcPitch (contiguous memory) - Size_t srcPitch = GatherHelper::ContiguousPitch()(srcSizes[i]); - - cudaWrapper::Memcopy()( - &(*dest.origin()(ndim_offset)), - dest.getPitch(), - tmpDest.data() + srcOffsets1D[i], - srcPitch, - srcSizes[i], - cudaWrapper::flags::Memcopy::hostToHost); - } -} - -template -template -void Gather::operator()(container::CartBuffer& dest, - container::CartBuffer& source, int dir) const -{ - using namespace pmacc::math; - - if(!this->m_participate) return; - typedef container::CartBuffer DestBuffer; - typedef container::CartBuffer SrcBuffer; - PMACC_CASSERT_MSG( - Can_Only_Gather_Host_Memory, - boost::is_same::value && - boost::is_same::value); - - const bool useTmpSrc = source.isContigousMemory(); - int numRanks; MPI_Comm_size(this->comm, &numRanks); - std::vector tmpDest(root() ? numRanks * source.size().productOfComponents() : 0); - container::HostBuffer tmpSrc(useTmpSrc ? source.size() : math::Size_t::create(0)); - if(useTmpSrc) - tmpSrc = source; /* Mem copy */ - - // Get number of elements for each source buffer - std::vector > srcBufferSizes(numRanks); - Size_t srcBufferSize = source.size(); - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Gather( - static_cast(&srcBufferSize), - sizeof(Size_t), - MPI_CHAR, - static_cast(srcBufferSizes.data()), - sizeof(Size_t), - MPI_CHAR, - 0, this->comm)); - - // 1D offsets in destination buffer - std::vector srcBufferOffsets1D(numRanks); - std::vector srcBufferSizes1D(numRanks); - std::vector srcBufferOffsets1D_char(numRanks); // `MPI_Gatherv` demands `int*` - std::vector srcBufferSizes1D_char(numRanks); - - if(this->root()) + namespace algorithm { - for(int i = 0; i < numRanks; i++) - srcBufferSizes1D[i] = srcBufferSizes[i].productOfComponents(); - std::vector partialSum(numRanks); - std::partial_sum(srcBufferSizes1D.begin(), srcBufferSizes1D.end(), partialSum.begin()); - srcBufferOffsets1D[0] = 0; - std::copy(partialSum.begin(), partialSum.end()-1, srcBufferOffsets1D.begin()+1); - - for(int i = 0; i < numRanks; i++) + namespace mpi { - srcBufferOffsets1D_char[i] = static_cast(srcBufferOffsets1D[i]) * sizeof(Type); - srcBufferSizes1D_char[i] = static_cast(srcBufferSizes1D[i]) * sizeof(Type); - } - } - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - // gather - MPI_CHECK(MPI_Gatherv( - useTmpSrc ? static_cast(tmpSrc.getDataPointer()) : static_cast(source.getDataPointer()), - source.size().productOfComponents() * sizeof(Type), - MPI_CHAR, - root() ? static_cast(tmpDest.data()) : nullptr, - srcBufferSizes1D_char.data(), - srcBufferOffsets1D_char.data(), - MPI_CHAR, - 0, this->comm)); - if(!root()) return; - - CopyToDest()(*this, dest, tmpDest, dir, srcBufferSizes, srcBufferOffsets1D); -} - -} // mpi -} // algorithm -} // pmacc + namespace GatherHelper + { + template + struct ContiguousPitch + { + math::Size_t operator()(const math::Size_t& size) + { + math::Size_t pitch; + + pitch[0] = size[0] * sizeof(Type); + for(int axis = 1; axis < dim - 1; axis++) + pitch[axis] = pitch[axis - 1] * size[axis]; + + return pitch; + } + }; + + template + struct ContiguousPitch + { + math::Size_t<0> operator()(const math::Size_t&) + { + return math::Size_t<0>(); + } + }; + + } // namespace GatherHelper + + template + Gather::Gather(const zone::SphericZone& p_zone) : comm(MPI_COMM_NULL) + { + using namespace pmacc::math; + + pmacc::GridController& con = pmacc::Environment::get().GridController(); + Int pos = con.getPosition(); + + int numWorldRanks; + MPI_Comm_size(MPI_COMM_WORLD, &numWorldRanks); + std::vector> allPositions(numWorldRanks); + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather( + static_cast(&pos), + sizeof(Int), + MPI_CHAR, + static_cast(allPositions.data()), + sizeof(Int), + MPI_CHAR, + MPI_COMM_WORLD)); + + std::vector new_ranks; + int myWorldId; + MPI_Comm_rank(MPI_COMM_WORLD, &myWorldId); + + this->m_participate = false; + for(int i = 0; i < static_cast(allPositions.size()); i++) + { + Int pos = allPositions[i]; + if(!p_zone.within(pos)) + continue; + + new_ranks.push_back(i); + this->positions.push_back(allPositions[i]); + if(i == myWorldId) + this->m_participate = true; + } + MPI_Group world_group, new_group; + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &world_group)); + MPI_CHECK(MPI_Group_incl(world_group, new_ranks.size(), new_ranks.data(), &new_group)); + MPI_CHECK(MPI_Comm_create(MPI_COMM_WORLD, new_group, &this->comm)); + MPI_CHECK(MPI_Group_free(&new_group)); + } + + template + Gather::~Gather() + { + if(this->comm != MPI_COMM_NULL) + { + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK_NO_EXCEPT(MPI_Comm_free(&this->comm)); + } + } + + template + bool Gather::root() const + { + if(!this->m_participate) + { + std::cerr << "error[mpi::Gather::root()]: this process does not participate in gathering.\n"; + return false; + } + int myId; + MPI_Comm_rank(this->comm, &myId); + return myId == 0; + } + + template + int Gather::rank() const + { + if(!this->m_participate) + { + std::cerr << "error[mpi::Gather::rank()]: this process does not participate in gathering.\n"; + return -1; + } + int myId; + MPI_Comm_rank(this->comm, &myId); + return myId; + } + + template + template + void Gather::CopyToDest::operator()( + const Gather& gather, + container::CartBuffer& dest, + std::vector& tmpDest, + int dir, + const std::vector>& srcSizes, + const std::vector& srcOffsets1D) const + { + using namespace math; + + int numRanks = static_cast(gather.positions.size()); + + // calculate sizes per axis in destination buffer + std::vector sizesPerAxis[memDim]; + + // sizes per axis + for(int i = 0; i < numRanks; i++) + { + Int pos = gather.positions[i]; + Int posInMem = pos.template shrink(dir + 1); + for(int axis = 0; axis < memDim; axis++) + { + size_t posOnAxis = static_cast(posInMem[axis]); + if(posOnAxis >= sizesPerAxis[axis].size()) + sizesPerAxis[axis].resize(posOnAxis + 1); + sizesPerAxis[axis][posOnAxis] = srcSizes[i][axis]; + } + } + + // calculate offsets per axis in destination buffer + std::vector offsetsPerAxis[memDim]; + + // offsets per axis + for(int axis = 0; axis < memDim; axis++) + { + offsetsPerAxis[axis].resize(sizesPerAxis[axis].size()); + std::vector partialSum(offsetsPerAxis[axis].size()); + std::partial_sum(sizesPerAxis[axis].begin(), sizesPerAxis[axis].end(), partialSum.begin()); + offsetsPerAxis[axis][0] = 0; + std::copy(partialSum.begin(), partialSum.end() - 1, offsetsPerAxis[axis].begin() + 1); + } + + // copy from one dimensional mpi buffer to n dimensional destination buffer + for(int i = 0; i < numRanks; i++) + { + Int pos = gather.positions[i]; + Int posInMem = pos.template shrink(dir + 1); + Int ndim_offset; + for(int axis = 0; axis < memDim; axis++) + ndim_offset[axis] = offsetsPerAxis[axis][posInMem[axis]]; + + // calculate srcPitch (contiguous memory) + Size_t srcPitch = GatherHelper::ContiguousPitch()(srcSizes[i]); + + cuplaWrapper::Memcopy()( + &(*dest.origin()(ndim_offset)), + dest.getPitch(), + tmpDest.data() + srcOffsets1D[i], + srcPitch, + srcSizes[i], + cuplaWrapper::flags::Memcopy::hostToHost); + } + } + + template + template< + typename Type, + int memDim, + class T_Alloc, + class T_Copy, + class T_Assign, + class T_Alloc2, + class T_Copy2, + class T_Assign2> + void Gather::operator()( + container::CartBuffer& dest, + container::CartBuffer& source, + int dir) const + { + using namespace pmacc::math; + + if(!this->m_participate) + return; + typedef container::CartBuffer DestBuffer; + typedef container::CartBuffer SrcBuffer; + PMACC_CASSERT_MSG( + Can_Only_Gather_Host_Memory, + boost::is_same::value + && boost::is_same::value); + + const bool useTmpSrc = source.isContigousMemory(); + int numRanks; + MPI_Comm_size(this->comm, &numRanks); + std::vector tmpDest(root() ? numRanks * source.size().productOfComponents() : 0); + container::HostBuffer tmpSrc( + useTmpSrc ? source.size() : math::Size_t::create(0)); + if(useTmpSrc) + tmpSrc = source; /* Mem copy */ + + // Get number of elements for each source buffer + std::vector> srcBufferSizes(numRanks); + Size_t srcBufferSize = source.size(); + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Gather( + static_cast(&srcBufferSize), + sizeof(Size_t), + MPI_CHAR, + static_cast(srcBufferSizes.data()), + sizeof(Size_t), + MPI_CHAR, + 0, + this->comm)); + + // 1D offsets in destination buffer + std::vector srcBufferOffsets1D(numRanks); + std::vector srcBufferSizes1D(numRanks); + std::vector srcBufferOffsets1D_char(numRanks); // `MPI_Gatherv` demands `int*` + std::vector srcBufferSizes1D_char(numRanks); + + if(this->root()) + { + for(int i = 0; i < numRanks; i++) + srcBufferSizes1D[i] = srcBufferSizes[i].productOfComponents(); + std::vector partialSum(numRanks); + std::partial_sum(srcBufferSizes1D.begin(), srcBufferSizes1D.end(), partialSum.begin()); + srcBufferOffsets1D[0] = 0; + std::copy(partialSum.begin(), partialSum.end() - 1, srcBufferOffsets1D.begin() + 1); + + for(int i = 0; i < numRanks; i++) + { + srcBufferOffsets1D_char[i] = static_cast(srcBufferOffsets1D[i]) * sizeof(Type); + srcBufferSizes1D_char[i] = static_cast(srcBufferSizes1D[i]) * sizeof(Type); + } + } + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + // gather + MPI_CHECK(MPI_Gatherv( + useTmpSrc ? static_cast(tmpSrc.getDataPointer()) + : static_cast(source.getDataPointer()), + source.size().productOfComponents() * sizeof(Type), + MPI_CHAR, + root() ? static_cast(tmpDest.data()) : nullptr, + srcBufferSizes1D_char.data(), + srcBufferOffsets1D_char.data(), + MPI_CHAR, + 0, + this->comm)); + if(!root()) + return; + + CopyToDest()(*this, dest, tmpDest, dir, srcBufferSizes, srcBufferOffsets1D); + } + + } // namespace mpi + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/algorithm/mpi/Reduce.hpp b/include/pmacc/cuSTL/algorithm/mpi/Reduce.hpp index 6037987eaa..88a0409e04 100644 --- a/include/pmacc/cuSTL/algorithm/mpi/Reduce.hpp +++ b/include/pmacc/cuSTL/algorithm/mpi/Reduce.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau +/* Copyright 2013-2021 Heiko Burau * * This file is part of PMacc. * @@ -29,66 +29,70 @@ namespace pmacc { -namespace algorithm -{ -namespace mpi -{ + namespace algorithm + { + namespace mpi + { + /** Reduce algorithm for mpi + * + * \tparam dim dimension of the mpi node volume which has to be reduced. + * + * This algorithm reduces node-wise. For each node you pass a data container as source + * and another container of the same size as destination. The result is stored in + * the destination container of the root node. + * + * The data values of the container are reduced independently of each other. + * + * The dimension of the container need not be the same as dim. + * + */ + template + class Reduce + { + private: + MPI_Comm comm; + bool m_participate; -/** Reduce algorithm for mpi - * - * \tparam dim dimension of the mpi node volume which has to be reduced. - * - * This algorithm reduces node-wise. For each node you pass a data container as source - * and another container of the same size as destination. The result is stored in - * the destination container of the root node. - * - * The data values of the container are reduced independently of each other. - * - * The dimension of the container need not be the same as dim. - * - */ -template -class Reduce -{ -private: - MPI_Comm comm; - bool m_participate; -public: - /** constructor - * - * \param zone The zone specifies which mpi-nodes participate in the reduce operation. - * \param setThisAsRoot Set this node explicitly as root. May only be true for one node. - * - * if setThisAsRoot is not set mpi chooses the root node. - * - */ - Reduce(const zone::SphericZone& zone, bool setThisAsRoot = false); - ~Reduce(); + public: + /** constructor + * + * \param zone The zone specifies which mpi-nodes participate in the reduce operation. + * \param setThisAsRoot Set this node explicitly as root. May only be true for one node. + * + * if setThisAsRoot is not set mpi chooses the root node. + * + */ + Reduce(const zone::SphericZone& zone, bool setThisAsRoot = false); + ~Reduce(); - /* execute the algorithm - * - * \param dest destination container - * \param src source container - * \param ExprOrFunctor functor with two arguments which returns the result of the reduce operation. - * - * Since only the functor's type is given, the functor must have a standart constructor. - * - */ - template - void operator()(container::HostBuffer& dest, + /* execute the algorithm + * + * \param dest destination container + * \param src source container + * \param ExprOrFunctor functor with two arguments which returns the result of the reduce operation. + * + * Since only the functor's type is given, the functor must have a standart constructor. + * + */ + template + void operator()( + container::HostBuffer& dest, const container::HostBuffer& src, ExprOrFunctor) const; - // Returns whether this node is within the zone. - inline bool participate() const {return m_participate;} - // Returns whether this node is the root node. - inline bool root() const; - // Returns the mpi rank of this node. - inline int rank() const; -}; + // Returns whether this node is within the zone. + inline bool participate() const + { + return m_participate; + } + // Returns whether this node is the root node. + inline bool root() const; + // Returns the mpi rank of this node. + inline int rank() const; + }; -} // mpi -} // algorithm -} // pmacc + } // namespace mpi + } // namespace algorithm +} // namespace pmacc #include "pmacc/cuSTL/algorithm/mpi/Reduce.tpp" diff --git a/include/pmacc/cuSTL/algorithm/mpi/Reduce.tpp b/include/pmacc/cuSTL/algorithm/mpi/Reduce.tpp index 5afee1e020..f82afcd54e 100644 --- a/include/pmacc/cuSTL/algorithm/mpi/Reduce.tpp +++ b/include/pmacc/cuSTL/algorithm/mpi/Reduce.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Axel Huebl +/* Copyright 2013-2021 Heiko Burau, Axel Huebl * * This file is part of PMacc. * @@ -33,138 +33,157 @@ namespace pmacc { -namespace algorithm -{ -namespace mpi -{ - -template -Reduce::Reduce(const zone::SphericZone& p_zone, bool setThisAsRoot) : comm(MPI_COMM_NULL) -{ - using namespace math; - - auto& con = Environment::get().GridController(); - - typedef std::pair, bool> PosFlag; - PosFlag posFlag; - posFlag.first = (Int)con.getPosition(); - posFlag.second = setThisAsRoot; - - int numWorldRanks; MPI_Comm_size(MPI_COMM_WORLD, &numWorldRanks); - std::vector allPositionsFlags(numWorldRanks); - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Allgather((void*)&posFlag, sizeof(PosFlag), MPI_CHAR, - (void*)allPositionsFlags.data(), sizeof(PosFlag), MPI_CHAR, - MPI_COMM_WORLD)); - - std::vector new_ranks; - int myWorldId; MPI_Comm_rank(MPI_COMM_WORLD, &myWorldId); - - this->m_participate = false; - for(int i = 0; i < (int)allPositionsFlags.size(); i++) - { - Int pos = allPositionsFlags[i].first; - bool flag = allPositionsFlags[i].second; - if(!p_zone.within(pos)) continue; - - new_ranks.push_back(i); - //if rank i is supposed to be the new root put him at the front - if(flag) std::swap(new_ranks.front(), new_ranks.back()); - if(i == myWorldId) this->m_participate = true; - } - - MPI_Group world_group = MPI_GROUP_NULL; - MPI_Group new_group = MPI_GROUP_NULL; - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &world_group)); - MPI_CHECK(MPI_Group_incl(world_group, new_ranks.size(), &(new_ranks.front()), &new_group)); - MPI_CHECK(MPI_Comm_create(MPI_COMM_WORLD, new_group, &this->comm)); - MPI_CHECK(MPI_Group_free(&new_group)); - MPI_CHECK(MPI_Group_free(&world_group)); -} - -template -Reduce::~Reduce() -{ - if(this->comm != MPI_COMM_NULL) - { - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK_NO_EXCEPT(MPI_Comm_free(&this->comm)); - } -} - -template -bool Reduce::root() const -{ - if(!this->m_participate) + namespace algorithm { - std::cerr << "error[mpi::Reduce::root()]: this process does not participate in reducing.\n"; - return false; - } - int myId; MPI_Comm_rank(this->comm, &myId); - return myId == 0; -} - -template -int Reduce::rank() const -{ - if(!this->m_participate) - { - std::cerr << "error[mpi::Reduce::rank()]: this process does not participate in reducing.\n"; - return -1; - } - int myId; MPI_Comm_rank(this->comm, &myId); - return myId; -} - -namespace detail -{ - -template -struct MPI_User_Op -{ - static void callback(void* invec, void* inoutvec, int *len, MPI_Datatype*) - { - Functor functor; - type* inoutvec_t = (type*)inoutvec; - type* invec_t = (type*)invec; - - int size = (*len)/sizeof(type); - for(int i = 0; i < size; i++) + namespace mpi { - inoutvec_t[i] = functor(inoutvec_t[i], invec_t[i]); - } - } -}; - -} // detail - -template -template -void Reduce::operator() - (container::HostBuffer& dest, - const container::HostBuffer& src, - Functor) const -{ - if(!this->m_participate) return; - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - - MPI_Op user_op; - MPI_CHECK(MPI_Op_create(&detail::MPI_User_Op::callback, 1, &user_op)); - - MPI_CHECK(MPI_Reduce(&(*src.origin()), &(*dest.origin()), sizeof(Type) * dest.size().productOfComponents(), - MPI_CHAR, user_op, 0, this->comm)); - - MPI_CHECK(MPI_Op_free(&user_op)); -} - -} // mpi -} // algorithm -} // pmacc + template + Reduce::Reduce(const zone::SphericZone& p_zone, bool setThisAsRoot) : comm(MPI_COMM_NULL) + { + using namespace math; + + auto& con = Environment::get().GridController(); + + typedef std::pair, bool> PosFlag; + PosFlag posFlag; + posFlag.first = (Int) con.getPosition(); + posFlag.second = setThisAsRoot; + + int numWorldRanks; + MPI_Comm_size(MPI_COMM_WORLD, &numWorldRanks); + std::vector allPositionsFlags(numWorldRanks); + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather( + (void*) &posFlag, + sizeof(PosFlag), + MPI_CHAR, + (void*) allPositionsFlags.data(), + sizeof(PosFlag), + MPI_CHAR, + MPI_COMM_WORLD)); + + std::vector new_ranks; + int myWorldId; + MPI_Comm_rank(MPI_COMM_WORLD, &myWorldId); + + this->m_participate = false; + for(int i = 0; i < (int) allPositionsFlags.size(); i++) + { + Int pos = allPositionsFlags[i].first; + bool flag = allPositionsFlags[i].second; + if(!p_zone.within(pos)) + continue; + + new_ranks.push_back(i); + // if rank i is supposed to be the new root put him at the front + if(flag) + std::swap(new_ranks.front(), new_ranks.back()); + if(i == myWorldId) + this->m_participate = true; + } + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + if(new_ranks.size()) + { + MPI_Group world_group = MPI_GROUP_NULL; + MPI_Group new_group = MPI_GROUP_NULL; + MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &world_group)); + MPI_CHECK(MPI_Group_incl(world_group, new_ranks.size(), &(new_ranks.front()), &new_group)); + MPI_CHECK(MPI_Comm_create(MPI_COMM_WORLD, new_group, &this->comm)); + MPI_CHECK(MPI_Group_free(&new_group)); + MPI_CHECK(MPI_Group_free(&world_group)); + } + } + + template + Reduce::~Reduce() + { + if(this->comm != MPI_COMM_NULL) + { + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK_NO_EXCEPT(MPI_Comm_free(&this->comm)); + } + } + + template + bool Reduce::root() const + { + if(!this->m_participate) + { + std::cerr << "error[mpi::Reduce::root()]: this process does not participate in reducing.\n"; + return false; + } + int myId; + MPI_Comm_rank(this->comm, &myId); + return myId == 0; + } + + template + int Reduce::rank() const + { + if(!this->m_participate) + { + std::cerr << "error[mpi::Reduce::rank()]: this process does not participate in reducing.\n"; + return -1; + } + int myId; + MPI_Comm_rank(this->comm, &myId); + return myId; + } + + namespace detail + { + template + struct MPI_User_Op + { + static void callback(void* invec, void* inoutvec, int* len, MPI_Datatype*) + { + Functor functor; + type* inoutvec_t = (type*) inoutvec; + type* invec_t = (type*) invec; + + int size = (*len) / sizeof(type); + for(int i = 0; i < size; i++) + { + inoutvec_t[i] = functor(inoutvec_t[i], invec_t[i]); + } + } + }; + + } // namespace detail + + template + template + void Reduce::operator()( + container::HostBuffer& dest, + const container::HostBuffer& src, + Functor) const + { + if(!this->m_participate) + return; + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + + MPI_Op user_op; + MPI_CHECK(MPI_Op_create(&detail::MPI_User_Op::callback, 1, &user_op)); + + MPI_CHECK(MPI_Reduce( + &(*src.origin()), + &(*dest.origin()), + sizeof(Type) * dest.size().productOfComponents(), + MPI_CHAR, + user_op, + 0, + this->comm)); + + MPI_CHECK(MPI_Op_free(&user_op)); + } + + } // namespace mpi + } // namespace algorithm +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/CartBuffer.hpp b/include/pmacc/cuSTL/container/CartBuffer.hpp index 0dcfbdb2b0..be158e66a5 100644 --- a/include/pmacc/cuSTL/container/CartBuffer.hpp +++ b/include/pmacc/cuSTL/container/CartBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -42,98 +42,111 @@ namespace pmacc { -namespace container -{ + namespace container + { + namespace bmpl = boost::mpl; -namespace bmpl = boost::mpl; + /** Implementation of a box-shaped (cartesian) container type. + * Holds a reference counter so one can have several containers sharing one buffer. + * Is designed to be an RAII class, but does not fully obey the RAII rules (see copy-ctor). + * The way memory gets allocated, copied and assigned is + * fully controlled by three policy classes. + * \tparam Type type of a single value + * \tparam T_dim dimension of the container + * \tparam Allocator allocates and releases memory + * \tparam Copier copies one memory buffer to another + * \tparam Assigner assigns a value to every datum of a memory buffer + * + * Assigner policy has to support `apply2`: Assigner + * + */ + template< + typename Type, + int T_dim, + typename Allocator = allocator::EmptyAllocator, + typename Copier = mpl::void_, + typename Assigner = bmpl::vector> + class CartBuffer + : public + /* "Curiously recurring template pattern" */ + bmpl::apply, CartBuffer>::type + { + public: + typedef Type type; + static constexpr int dim = T_dim; + typedef cursor::BufferCursor Cursor; + typedef typename Allocator::tag memoryTag; + typedef math::Size_t SizeType; + typedef math::Size_t PitchType; -/** Implementation of a box-shaped (cartesian) container type. - * Holds a reference counter so one can have several containers sharing one buffer. - * Is designed to be an RAII class, but does not fully obey the RAII rules (see copy-ctor). - * The way memory gets allocated, copied and assigned is - * fully controlled by three policy classes. - * \tparam Type type of a single value - * \tparam T_dim dimension of the container - * \tparam Allocator allocates and releases memory - * \tparam Copier copies one memory buffer to another - * \tparam Assigner assigns a value to every datum of a memory buffer - * - * Assigner policy has to support `apply2`: Assigner - * - */ -template > -class CartBuffer : public - /* "Curiously recurring template pattern" */ - bmpl::apply, CartBuffer >::type -{ -public: - typedef Type type; - static constexpr int dim = T_dim; - typedef cursor::BufferCursor Cursor; - typedef typename Allocator::tag memoryTag; - typedef math::Size_t SizeType; - typedef math::Size_t PitchType; -public: - Type* dataPointer; - int* refCount; - SizeType _size; - PitchType pitch; - HDINLINE void init(); - HDINLINE void exit(); - HDINLINE CartBuffer() : refCount(nullptr) {} + public: + Type* dataPointer; + int* refCount; + SizeType _size; + PitchType pitch; + HDINLINE void init(); + HDINLINE void exit(); + HDINLINE CartBuffer() : refCount(nullptr) + { + } -public: - HDINLINE CartBuffer(const math::Size_t& size); - HDINLINE CartBuffer(size_t x); - HDINLINE CartBuffer(size_t x, size_t y); - HDINLINE CartBuffer(size_t x, size_t y, size_t z); - /* the copy constructor just increments the reference counter but does not copy memory */ - HDINLINE CartBuffer(const CartBuffer& other); - HDINLINE CartBuffer(CartBuffer&& other); - HDINLINE ~CartBuffer(); + public: + HDINLINE CartBuffer(const math::Size_t& size); + HDINLINE CartBuffer(size_t x); + HDINLINE CartBuffer(size_t x, size_t y); + HDINLINE CartBuffer(size_t x, size_t y, size_t z); + /* the copy constructor just increments the reference counter but does not copy memory */ + HDINLINE CartBuffer(const CartBuffer& other); + HDINLINE CartBuffer(CartBuffer&& other); + HDINLINE ~CartBuffer(); - /* copy another container into this one (hard data copy) */ - HDINLINE CartBuffer& - operator=(const CartBuffer& rhs); - /* use the memory from another container and increment the reference counter */ - HDINLINE CartBuffer& - operator=(CartBuffer&& rhs); + /* copy another container into this one (hard data copy) */ + HDINLINE CartBuffer& operator=(const CartBuffer& rhs); + /* use the memory from another container and increment the reference counter */ + HDINLINE CartBuffer& operator=(CartBuffer&& rhs); - /* get a view. Views represent a clipped area of the container. - * \param a Top left corner of the view, inside the view. - * Negative values are remapped, e.g. Int<2>(-1,-2) == Int<2>(width-1, height-2) - * \param b Bottom right corner of the view, outside the view. - * Values are remapped, so that Int<2>(0,0) == Int<2>(width, height) - */ - HDINLINE View - view(math::Int a = math::Int(0), - math::Int b = math::Int(0)) const; + /* get a view. Views represent a clipped area of the container. + * \param a Top left corner of the view, inside the view. + * Negative values are remapped, e.g. Int<2>(-1,-2) == Int<2>(width-1, height-2) + * \param b Bottom right corner of the view, outside the view. + * Values are remapped, so that Int<2>(0,0) == Int<2>(width, height) + */ + HDINLINE View view( + math::Int a = math::Int(0), + math::Int b = math::Int(0)) const; - /* get a cursor at the container's origin cell */ - HDINLINE cursor::BufferCursor origin() const; - /* get a safe cursor at the container's origin cell */ - HDINLINE cursor::SafeCursor > originSafe() const; - /* get a component-twisted cursor at the container's origin cell - * \param axes x-axis -> axes[0], y-axis -> axes[1], ... - * */ - HDINLINE cursor::Cursor, cursor::CartNavigator, char*> - originCustomAxes(const math::UInt32& axes) const; + /* get a cursor at the container's origin cell */ + HDINLINE cursor::BufferCursor origin() const; + /* get a safe cursor at the container's origin cell */ + HDINLINE cursor::SafeCursor> originSafe() const; + /* get a component-twisted cursor at the container's origin cell + * \param axes x-axis -> axes[0], y-axis -> axes[1], ... + * */ + HDINLINE cursor::Cursor, cursor::CartNavigator, char*> + originCustomAxes(const math::UInt32& axes) const; - /* get a zone spanning the whole container */ - HDINLINE zone::SphericZone zone() const; + /* get a zone spanning the whole container */ + HDINLINE zone::SphericZone zone() const; - HDINLINE Type* getDataPointer() const {return dataPointer;} - HDINLINE math::Size_t size() const {return this->_size;} - HDINLINE math::Size_t getPitch() const {return this->pitch;} - /** Returns whether the buffer has no additional pitches - * The expected pitches are: 2D: size.x, 3D: size.x/size.x*size.y - */ - HDINLINE bool isContigousMemory() const; -}; + HDINLINE Type* getDataPointer() const + { + return dataPointer; + } + HDINLINE math::Size_t size() const + { + return this->_size; + } + HDINLINE math::Size_t getPitch() const + { + return this->pitch; + } + /** Returns whether the buffer has no additional pitches + * The expected pitches are: 2D: size.x, 3D: size.x/size.x*size.y + */ + HDINLINE bool isContigousMemory() const; + }; -} // container -} // pmacc + } // namespace container +} // namespace pmacc #include "CartBuffer.tpp" diff --git a/include/pmacc/cuSTL/container/CartBuffer.tpp b/include/pmacc/cuSTL/container/CartBuffer.tpp index 0e08bad4b2..faa8190dbd 100644 --- a/include/pmacc/cuSTL/container/CartBuffer.tpp +++ b/include/pmacc/cuSTL/container/CartBuffer.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -34,326 +34,325 @@ namespace pmacc { -namespace container -{ - -namespace detail -{ - template - struct PitchHelper; - - template<> - struct PitchHelper<1> + namespace container { - template - HDINLINE math::Size_t<0u> operator()(const TCursor&) {return math::Size_t<0u>();} + namespace detail + { + template + struct PitchHelper; + + template<> + struct PitchHelper<1> + { + template + HDINLINE math::Size_t<0u> operator()(const TCursor&) + { + return math::Size_t<0u>(); + } + + HDINLINE math::Size_t<0u> operator()(const math::Size_t<1u>&) + { + return math::Size_t<0u>(); + } + }; + template<> + struct PitchHelper<2> + { + template + HDINLINE math::Size_t<1> operator()(const TCursor& cursor) + { + return math::Size_t<1>(size_t((char*) cursor(0, 1).getMarker() - (char*) cursor.getMarker())); + } + + HDINLINE math::Size_t<1> operator()(const math::Size_t<2>& size) + { + return math::Size_t<1>(size.x()); + } + }; + template<> + struct PitchHelper<3> + { + template + HDINLINE math::Size_t<2> operator()(const TCursor& cursor) + { + return math::Size_t<2>( + (size_t)((char*) cursor(0, 1, 0).getMarker() - (char*) cursor.getMarker()), + (size_t)((char*) cursor(0, 0, 1).getMarker() - (char*) cursor.getMarker())); + } + + HDINLINE math::Size_t<2> operator()(const math::Size_t<3>& size) + { + return math::Size_t<2>(size.x(), size.x() * size.y()); + } + }; + + template + HDINLINE void notifyEventSystem() + { + } + + template<> + HDINLINE void notifyEventSystem() + { +#ifndef __CUDA_ARCH__ + using namespace pmacc; + __startOperation(ITask::TASK_DEVICE); +#endif + } - HDINLINE math::Size_t<0u> operator()(const math::Size_t<1u>&) {return math::Size_t<0u>();} - }; - template<> - struct PitchHelper<2> - { - template - HDINLINE math::Size_t<1> operator()(const TCursor& cursor) + template<> + HDINLINE void notifyEventSystem() + { +#ifndef __CUDA_ARCH__ + using namespace pmacc; + __startOperation(ITask::TASK_HOST); +#endif + } + } // namespace detail + + template + HDINLINE CartBuffer::CartBuffer(const math::Size_t& _size) + : refCount(nullptr) { - return math::Size_t<1>(size_t((char*)cursor(0, 1).getMarker() - (char*)cursor.getMarker())); + this->_size = _size; + init(); } - HDINLINE math::Size_t<1> operator()(const math::Size_t<2>& size) + template + HDINLINE CartBuffer::CartBuffer(size_t x) : refCount(nullptr) { - return math::Size_t<1>(size.x()); + this->_size = math::Size_t<1>(x); + init(); } - }; - template<> - struct PitchHelper<3> - { - template - HDINLINE math::Size_t<2> operator()(const TCursor& cursor) + + template + HDINLINE CartBuffer::CartBuffer(size_t x, size_t y) + : refCount(nullptr) { - return math::Size_t<2>((size_t)((char*)cursor(0, 1, 0).getMarker() - (char*)cursor.getMarker()), - (size_t)((char*)cursor(0, 0, 1).getMarker() - (char*)cursor.getMarker())); + this->_size = math::Size_t<2>(x, y); + init(); } - HDINLINE math::Size_t<2> operator()(const math::Size_t<3>& size) + template + HDINLINE CartBuffer::CartBuffer(size_t x, size_t y, size_t z) + : refCount(nullptr) { - return math::Size_t<2>(size.x(), size.x() * size.y()); + this->_size = math::Size_t<3>(x, y, z); + init(); } - }; - - template - HDINLINE void notifyEventSystem() {} - - template<> - HDINLINE void notifyEventSystem() - { -#ifndef __CUDA_ARCH__ - using namespace pmacc; - __startOperation(ITask::TASK_CUDA); -#endif - } - - template<> - HDINLINE void notifyEventSystem() - { -#ifndef __CUDA_ARCH__ - using namespace pmacc; - __startOperation(ITask::TASK_HOST); -#endif - } -} - -template -HDINLINE -CartBuffer::CartBuffer -(const math::Size_t& _size) : refCount(nullptr) -{ - this->_size = _size; - init(); -} - -template -HDINLINE -CartBuffer::CartBuffer -(size_t x) : refCount(nullptr) -{ - this->_size = math::Size_t<1>(x); init(); -} -template -HDINLINE -CartBuffer::CartBuffer -(size_t x, size_t y) : refCount(nullptr) -{ - this->_size = math::Size_t<2>(x, y); init(); -} + template + HDINLINE CartBuffer::CartBuffer( + const CartBuffer& other) + : refCount(nullptr) + { + this->dataPointer = other.dataPointer; + this->refCount = other.refCount; + (*this->refCount)++; + this->_size = other._size; + this->pitch = other.pitch; + } -template -HDINLINE -CartBuffer::CartBuffer -(size_t x, size_t y, size_t z) : refCount(nullptr) -{ - this->_size = math::Size_t<3>(x, y, z); init(); -} + template + HDINLINE CartBuffer::CartBuffer( + CartBuffer&& other) + : refCount(nullptr) + { + this->dataPointer = other.dataPointer; + this->refCount = other.refCount; + this->_size = other._size; + this->pitch = other.pitch; + other.dataPointer = nullptr; + other.refCount = nullptr; + } -template -HDINLINE -CartBuffer::CartBuffer -(const CartBuffer& other) : refCount(nullptr) -{ - this->dataPointer = other.dataPointer; - this->refCount = other.refCount; - (*this->refCount)++; - this->_size = other._size; - this->pitch = other.pitch; -} - -template -HDINLINE -CartBuffer::CartBuffer -(CartBuffer&& other) : refCount(nullptr) -{ - this->dataPointer = other.dataPointer; - this->refCount = other.refCount; - this->_size = other._size; - this->pitch = other.pitch; - other.dataPointer = nullptr; - other.refCount = nullptr; -} - -template -HDINLINE -void CartBuffer::init() -{ - typename Allocator::Cursor cursor = Allocator::allocate(this->_size); - this->dataPointer = cursor.getMarker(); + template + HDINLINE void CartBuffer::init() + { + typename Allocator::Cursor cursor = Allocator::allocate(this->_size); + this->dataPointer = cursor.getMarker(); #ifndef __CUDA_ARCH__ - this->refCount = new int; + this->refCount = new int; + *this->refCount = 1; #endif - *this->refCount = 1; - this->pitch = detail::PitchHelper()(cursor); -} + this->pitch = detail::PitchHelper()(cursor); + } -template -HDINLINE -CartBuffer::~CartBuffer() -{ - exit(); -} + template + HDINLINE CartBuffer::~CartBuffer() + { + exit(); + } -template -HDINLINE -void CartBuffer::exit() -{ - if(!this->refCount) return; - (*(this->refCount))--; - if(*(this->refCount) > 0) - return; - Allocator::deallocate(origin()); - this->dataPointer = nullptr; + template + HDINLINE void CartBuffer::exit() + { + if(!this->refCount) + return; + (*(this->refCount))--; + if(*(this->refCount) > 0) + return; + Allocator::deallocate(origin()); + this->dataPointer = nullptr; #ifndef __CUDA_ARCH__ - delete this->refCount; - this->refCount = 0; + delete this->refCount; + this->refCount = 0; #endif -} + } -template -HDINLINE -CartBuffer& -CartBuffer::operator= -(const CartBuffer& rhs) -{ + template + HDINLINE CartBuffer& + CartBuffer::operator=(const CartBuffer& rhs) + { #ifndef __CUDA_ARCH__ - if(rhs.size() != this->size()) - throw std::invalid_argument(static_cast( - std::stringstream() << "Assignment: Sizes of buffers do not match: " - << this->size() << " <-> " << rhs.size() << std::endl).str()); + if(rhs.size() != this->size()) + throw std::invalid_argument(static_cast( + std::stringstream() + << "Assignment: Sizes of buffers do not match: " << this->size() + << " <-> " << rhs.size() << std::endl) + .str()); #else - assert(rhs.size() == this->size()); + assert(rhs.size() == this->size()); #endif - if(this->dataPointer == rhs.dataPointer) return *this; - Copier::copy(this->dataPointer, this->pitch, rhs.dataPointer, rhs.pitch, rhs._size); - return *this; -} + if(this->dataPointer == rhs.dataPointer) + return *this; + Copier::copy(this->dataPointer, this->pitch, rhs.dataPointer, rhs.pitch, rhs._size); + return *this; + } -template -HDINLINE -CartBuffer& -CartBuffer::operator= -(CartBuffer&& rhs) -{ + template + HDINLINE CartBuffer& + CartBuffer::operator=(CartBuffer&& rhs) + { #ifndef __CUDA_ARCH__ - if(rhs.size() != this->size()) - throw std::invalid_argument(static_cast( - std::stringstream() << "Assignment: Sizes of buffers do not match: " - << this->size() << " <-> " << rhs.size() << std::endl).str()); + if(rhs.size() != this->size()) + throw std::invalid_argument(static_cast( + std::stringstream() + << "Assignment: Sizes of buffers do not match: " << this->size() + << " <-> " << rhs.size() << std::endl) + .str()); #else - assert(rhs.size() == this->size()); + assert(rhs.size() == this->size()); #endif - if(this->dataPointer == rhs.dataPointer) return *this; - - exit(); - this->dataPointer = rhs.dataPointer; - this->refCount = rhs.refCount; - this->_size = rhs._size; - this->pitch = rhs.pitch; - rhs.dataPointer = nullptr; - rhs.refCount = nullptr; - return *this; -} - -template -HDINLINE -View > -CartBuffer::view -(math::Int a, math::Int b) const -{ - a = (a + (math::Int)this->size()) % (math::Int)this->size(); - b = (b + (math::Int)this->size()) - % ((math::Int)this->size() + math::Int::create(1)); - - View > result; - - result.dataPointer = &(*origin()(a)); - result._size = (math::Size_t)(b - a); - result.pitch = this->pitch; - result.refCount = this->refCount; - return result; -} - -template -HDINLINE -cursor::BufferCursor CartBuffer::origin() const -{ - detail::notifyEventSystem(); - return cursor::BufferCursor(this->dataPointer, this->pitch); -} - -template -HDINLINE -cursor::SafeCursor > -CartBuffer::originSafe() const -{ - return cursor::make_SafeCursor(this->origin(), - math::Int::create(0), - math::Int(size())); -} - -template -HDINLINE -cursor::Cursor, cursor::CartNavigator, char*> -CartBuffer::originCustomAxes(const math::UInt32& axes) const -{ - math::Size_t factor; - factor[0] = sizeof(Type); - if(dim > 1) factor[1] = this->pitch[0]; - if(dim > 2) factor[2] = this->pitch[1]; - //\todo: is the conversation from size_t to int32_t allowed? - math::Int customFactor; - for(int i = 0; i < dim; i++) - customFactor[i] = (int)factor[axes[i]]; - cursor::CartNavigator navi(customFactor); - - detail::notifyEventSystem(); - - return cursor::Cursor, cursor::CartNavigator, char*> - (cursor::PointerAccessor(), navi, (char*)this->dataPointer); -} - -template -HDINLINE -zone::SphericZone -CartBuffer::zone() const -{ - zone::SphericZone myZone; - myZone.offset = math::Int::create(0); - myZone.size = this->_size; - return myZone; -} - -template -HDINLINE -bool -CartBuffer::isContigousMemory() const -{ - return this->pitch == detail::PitchHelper()(this->_size); -} + if(this->dataPointer == rhs.dataPointer) + return *this; + + exit(); + this->dataPointer = rhs.dataPointer; + this->refCount = rhs.refCount; + this->_size = rhs._size; + this->pitch = rhs.pitch; + rhs.dataPointer = nullptr; + rhs.refCount = nullptr; + return *this; + } -template -std::ostream& operator<<(std::ostream& s, const CartBuffer& con) -{ - for(size_t x = 0; x < con.size().x(); x++) - s << con.origin()[x] << " "; - return s << std::endl; -} + template + HDINLINE View> + CartBuffer::view(math::Int a, math::Int b) const + { + a = (a + (math::Int) this->size()) % (math::Int) this->size(); + b = (b + (math::Int) this->size()) + % ((math::Int) this->size() + math::Int::create(1)); -template -std::ostream& operator<<(std::ostream& s, const CartBuffer& con) -{ - for(size_t y = 0; y < con.size().y(); y++) - { - for(size_t x = 0; x < con.size().x(); x++) - s << *con.origin()(x,y) << " "; - s << std::endl; - } - return s << std::endl; -} - -template -std::ostream& operator<<(std::ostream& s, const CartBuffer& con) -{ - for(size_t z = 0; z < con.size().z(); z++) - { - for(size_t y = 0; y < con.size().y(); y++) + View> result; + + result.dataPointer = &(*origin()(a)); + result._size = (math::Size_t) (b - a); + result.pitch = this->pitch; + result.refCount = this->refCount; + return result; + } + + template + HDINLINE cursor::BufferCursor CartBuffer::origin() const + { + detail::notifyEventSystem(); + return cursor::BufferCursor(this->dataPointer, this->pitch); + } + + template + HDINLINE cursor::SafeCursor> + CartBuffer::originSafe() const + { + return cursor::make_SafeCursor(this->origin(), math::Int::create(0), math::Int(size())); + } + + template + HDINLINE cursor::Cursor, cursor::CartNavigator, char*> + CartBuffer::originCustomAxes(const math::UInt32& axes) const + { + math::Size_t factor; + factor[0] = sizeof(Type); + if(dim > 1) + factor[1] = this->pitch[0]; + if(dim > 2) + factor[2] = this->pitch[1]; + //\todo: is the conversation from size_t to int32_t allowed? + math::Int customFactor; + for(int i = 0; i < dim; i++) + customFactor[i] = (int) factor[axes[i]]; + cursor::CartNavigator navi(customFactor); + + detail::notifyEventSystem(); + + return cursor::Cursor, cursor::CartNavigator, char*>( + cursor::PointerAccessor(), + navi, + (char*) this->dataPointer); + } + + template + HDINLINE zone::SphericZone CartBuffer::zone() const + { + zone::SphericZone myZone; + myZone.offset = math::Int::create(0); + myZone.size = this->_size; + return myZone; + } + + template + HDINLINE bool CartBuffer::isContigousMemory() const + { + return this->pitch == detail::PitchHelper()(this->_size); + } + + template + std::ostream& operator<<(std::ostream& s, const CartBuffer& con) { for(size_t x = 0; x < con.size().x(); x++) - s << *con.origin()(x,y,z) << " "; - s << std::endl; + s << con.origin()[x] << " "; + return s << std::endl; + } + + template + std::ostream& operator<<(std::ostream& s, const CartBuffer& con) + { + for(size_t y = 0; y < con.size().y(); y++) + { + for(size_t x = 0; x < con.size().x(); x++) + s << *con.origin()(x, y) << " "; + s << std::endl; + } + return s << std::endl; + } + + template + std::ostream& operator<<(std::ostream& s, const CartBuffer& con) + { + for(size_t z = 0; z < con.size().z(); z++) + { + for(size_t y = 0; y < con.size().y(); y++) + { + for(size_t x = 0; x < con.size().x(); x++) + s << *con.origin()(x, y, z) << " "; + s << std::endl; + } + s << std::endl; + } + return s << std::endl; } - s << std::endl; - } - return s << std::endl; -} -} // container -} // pmacc + } // namespace container +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/DeviceBuffer.hpp b/include/pmacc/cuSTL/container/DeviceBuffer.hpp index aef988a7f8..911e371f6b 100644 --- a/include/pmacc/cuSTL/container/DeviceBuffer.hpp +++ b/include/pmacc/cuSTL/container/DeviceBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -40,108 +40,134 @@ namespace pmacc { -namespace container -{ - -/** typedef version of a CartBuffer for a GPU. - * Additional feature: Able to copy data from a HostBuffer - * \tparam Type type of a single datum - * \tparam T_dim Dimension of the container - */ -template -class DeviceBuffer - : public CartBuffer, - copier::D2DCopier, - assigner::DeviceMemAssigner<> > -{ -private: - typedef CartBuffer, - copier::D2DCopier, - assigner::DeviceMemAssigner<> > Base; - -protected: - HDINLINE DeviceBuffer() {} - -public: - typedef typename Base::PitchType PitchType; - - /* constructors - * - * \param _size size of the container - * - * \param x,y,z convenient wrapper - * - */ - HDINLINE DeviceBuffer(const math::Size_t& size) : Base(size) {} - HDINLINE DeviceBuffer(size_t x) : Base(x) {} - HDINLINE DeviceBuffer(size_t x, size_t y) : Base(x, y) {} - HDINLINE DeviceBuffer(size_t x, size_t y, size_t z) : Base(x, y, z) {} - /** - * Creates a device buffer from a pointer with a size. Assumes dense layout (no padding) - * - * @param ptr Pointer to the first element - * @param size Size of the buffer - * @param ownMemory Set to false if the memory is only a reference and managed outside of this class - * Ignored for device side creation!y - * @param pitch Pitch in bytes (number of bytes in the lower dimensions) - */ - HDINLINE DeviceBuffer(Type* ptr, const math::Size_t& size, bool ownMemory, PitchType pitch = PitchType::create(0)) + namespace container { - this->dataPointer = ptr; - this->_size = size; - if(T_dim >= 2) - this->pitch[0] = (pitch[0]) ? pitch[0] : size.x() * sizeof(Type); - if(T_dim == 3) - this->pitch[1] = (pitch[1]) ? pitch[1] : this->pitch[0] * size.y(); + /** typedef version of a CartBuffer for a GPU. + * Additional feature: Able to copy data from a HostBuffer + * \tparam Type type of a single datum + * \tparam T_dim Dimension of the container + */ + template + class DeviceBuffer + : public CartBuffer< + Type, + T_dim, + allocator::DeviceMemAllocator, + copier::D2DCopier, + assigner::DeviceMemAssigner<>> + { + private: + typedef CartBuffer< + Type, + T_dim, + allocator::DeviceMemAllocator, + copier::D2DCopier, + assigner::DeviceMemAssigner<>> + Base; + + protected: + HDINLINE DeviceBuffer() + { + } + + public: + typedef typename Base::PitchType PitchType; + + /* constructors + * + * \param _size size of the container + * + * \param x,y,z convenient wrapper + * + */ + HDINLINE DeviceBuffer(const math::Size_t& size) : Base(size) + { + } + HDINLINE DeviceBuffer(size_t x) : Base(x) + { + } + HDINLINE DeviceBuffer(size_t x, size_t y) : Base(x, y) + { + } + HDINLINE DeviceBuffer(size_t x, size_t y, size_t z) : Base(x, y, z) + { + } + /** + * Creates a device buffer from a pointer with a size. Assumes dense layout (no padding) + * + * @param ptr Pointer to the first element + * @param size Size of the buffer + * @param ownMemory Set to false if the memory is only a reference and managed outside of this class + * Ignored for device side creation!y + * @param pitch Pitch in bytes (number of bytes in the lower dimensions) + */ + HDINLINE DeviceBuffer( + Type* ptr, + const math::Size_t& size, + bool ownMemory, + PitchType pitch = PitchType::create(0)) + { + this->dataPointer = ptr; + this->_size = size; + if(T_dim >= 2) + this->pitch[0] = (pitch[0]) ? pitch[0] : size.x() * sizeof(Type); + if(T_dim == 3) + this->pitch[1] = (pitch[1]) ? pitch[1] : this->pitch[0] * size.y(); #ifndef __CUDA_ARCH__ - this->refCount = new int; - *this->refCount = (ownMemory) ? 1 : 2; + this->refCount = new int; + *this->refCount = (ownMemory) ? 1 : 2; #endif - } - HDINLINE DeviceBuffer(const Base& base) : Base(base) {} - HDINLINE DeviceBuffer(DeviceBuffer&& obj): Base(std::move(static_cast(obj))) {} - - HDINLINE DeviceBuffer& - operator=(DeviceBuffer&& rhs) - { - Base::operator=(std::move(static_cast(rhs))); - return *this; - } - - template - HINLINE - typename boost::enable_if< - boost::is_same, - DeviceBuffer& - >::type - operator=(const HBuffer& rhs) - { - BOOST_STATIC_ASSERT((boost::is_same::value)); - BOOST_STATIC_ASSERT(HBuffer::dim == T_dim); - if(rhs.size() != this->size()) - throw std::invalid_argument(static_cast( - std::stringstream() << "Assignment: Sizes of buffers do not match: " - << this->size() << " <-> " << rhs.size() << std::endl).str()); - - cudaWrapper::Memcopy()(this->dataPointer, this->pitch, rhs.getDataPointer(), rhs.getPitch(), - this->_size, cudaWrapper::flags::Memcopy::hostToDevice); - - return *this; - } - - HINLINE DeviceBuffer& operator=(const Base& rhs) - { - Base::operator=(rhs); - return *this; - } - - HINLINE DeviceBuffer& operator=(const DeviceBuffer& rhs) - { - Base::operator=(rhs); - return *this; - } -}; - -} // container -} // pmacc - + } + HDINLINE DeviceBuffer(const Base& base) : Base(base) + { + } + HDINLINE DeviceBuffer(DeviceBuffer&& obj) : Base(std::move(static_cast(obj))) + { + } + + HDINLINE DeviceBuffer& operator=(DeviceBuffer&& rhs) + { + Base::operator=(std::move(static_cast(rhs))); + return *this; + } + + template + HINLINE typename boost:: + enable_if, DeviceBuffer&>::type + operator=(const HBuffer& rhs) + { + BOOST_STATIC_ASSERT((boost::is_same::value)); + BOOST_STATIC_ASSERT(HBuffer::dim == T_dim); + if(rhs.size() != this->size()) + throw std::invalid_argument(static_cast( + std::stringstream() + << "Assignment: Sizes of buffers do not match: " << this->size() + << " <-> " << rhs.size() << std::endl) + .str()); + + cuplaWrapper::Memcopy()( + this->dataPointer, + this->pitch, + rhs.getDataPointer(), + rhs.getPitch(), + this->_size, + cuplaWrapper::flags::Memcopy::hostToDevice); + + return *this; + } + + HINLINE DeviceBuffer& operator=(const Base& rhs) + { + Base::operator=(rhs); + return *this; + } + + HINLINE DeviceBuffer& operator=(const DeviceBuffer& rhs) + { + Base::operator=(rhs); + return *this; + } + }; + + } // namespace container +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/HostBuffer.hpp b/include/pmacc/cuSTL/container/HostBuffer.hpp index 72d2a86690..0d1b10643c 100644 --- a/include/pmacc/cuSTL/container/HostBuffer.hpp +++ b/include/pmacc/cuSTL/container/HostBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Alexander Grund +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -39,118 +39,149 @@ namespace pmacc { -namespace container -{ - -/** typedef version of a CartBuffer for a CPU. - * Additional feature: Able to copy data from a DeviceBuffer - * \tparam Type type of a single datum - * \tparam T_dim Dimension of the container - */ -template -class HostBuffer - : public CartBuffer, - copier::H2HCopier, - assigner::HostMemAssigner<> > -{ -private: - using Base = CartBuffer, - copier::H2HCopier, - assigner::HostMemAssigner<> >; -protected: - HostBuffer() {} -public: - using PitchType = typename Base::PitchType; - - /* constructors - * - * \param _size size of the container - * - * \param x,y,z convenient wrapper - * - */ - HINLINE HostBuffer(const math::Size_t& size) : Base(size) {} - HINLINE HostBuffer(size_t x) : Base(x) {} - HINLINE HostBuffer(size_t x, size_t y) : Base(x, y) {} - HINLINE HostBuffer(size_t x, size_t y, size_t z) : Base(x, y, z) {} - /** - * Creates a host buffer from a pointer with a size. Assumes dense layout (no padding) - * - * @param ptr Pointer to the first element - * @param size Size of the buffer - * @param ownMemory Set to false if the memory is only a reference and managed outside of this class - * @param pitch Pitch in bytes (number of bytes in the lower dimensions) - */ - HINLINE HostBuffer(Type* ptr, const math::Size_t<3>& size, bool ownMemory, math::Size_t<2> pitch = math::Size_t<2>::create(0) ) - { - this->dataPointer = ptr; - this->_size = size; - this->pitch[0] = (pitch[0]) ? pitch[0] : size.x() * sizeof(Type); - this->pitch[1] = (pitch[1]) ? pitch[1] : this->pitch[0] * size.y(); - this->refCount = new int; - *this->refCount = (ownMemory) ? 1 : 2; - } - HINLINE HostBuffer(Type* ptr, const math::Size_t<2>& size, bool ownMemory, math::Size_t<1> pitch = math::Size_t<1>::create(0) ) - { - this->dataPointer = ptr; - this->_size = size; - this->pitch[0] = (pitch[0]) ? pitch[0] : size.x() * sizeof(Type); - this->refCount = new int; - *this->refCount = (ownMemory) ? 1 : 2; - } - HINLINE HostBuffer(Type* ptr, const math::Size_t<1>& size, bool ownMemory) - { - this->dataPointer = ptr; - this->_size = size; - // intentionally uninitialized and not RT accessible via [] - // this->pitch = pitch; - this->refCount = new int; - *this->refCount = (ownMemory) ? 1 : 2; - } - HINLINE HostBuffer(const Base& base) : Base(base) {} - HINLINE HostBuffer(HostBuffer&& obj): Base(std::move(static_cast(obj))) {} - - HINLINE HostBuffer& - operator=(HostBuffer&& rhs) - { - Base::operator=(std::move(static_cast(rhs))); - return *this; - } - - template - HINLINE - typename boost::enable_if< - boost::is_same, - HostBuffer& - >::type - operator=(const DBuffer& rhs) + namespace container { - BOOST_STATIC_ASSERT((boost::is_same::value)); - BOOST_STATIC_ASSERT(DBuffer::dim == T_dim); - if(rhs.size() != this->size()) - throw std::invalid_argument(static_cast( - std::stringstream() << "Assignment: Sizes of buffers do not match: " - << this->size() << " <-> " << rhs.size() << std::endl).str()); - - cudaWrapper::Memcopy()(this->dataPointer, this->pitch, rhs.getDataPointer(), rhs.getPitch(), - this->_size, cudaWrapper::flags::Memcopy::deviceToHost); - - return *this; - } - - HINLINE HostBuffer& operator=(const Base& rhs) - { - Base::operator=(rhs); - return *this; - } - - HINLINE HostBuffer& operator=(const HostBuffer& rhs) - { - Base::operator=(rhs); - return *this; - } -}; - -} // container -} // pmacc - + /** typedef version of a CartBuffer for a CPU. + * Additional feature: Able to copy data from a DeviceBuffer + * \tparam Type type of a single datum + * \tparam T_dim Dimension of the container + */ + template + class HostBuffer + : public CartBuffer< + Type, + T_dim, + allocator::HostMemAllocator, + copier::H2HCopier, + assigner::HostMemAssigner<>> + { + private: + using Base = CartBuffer< + Type, + T_dim, + allocator::HostMemAllocator, + copier::H2HCopier, + assigner::HostMemAssigner<>>; + + protected: + HostBuffer() + { + } + + public: + using PitchType = typename Base::PitchType; + + /* constructors + * + * \param _size size of the container + * + * \param x,y,z convenient wrapper + * + */ + HINLINE HostBuffer(const math::Size_t& size) : Base(size) + { + } + HINLINE HostBuffer(size_t x) : Base(x) + { + } + HINLINE HostBuffer(size_t x, size_t y) : Base(x, y) + { + } + HINLINE HostBuffer(size_t x, size_t y, size_t z) : Base(x, y, z) + { + } + /** + * Creates a host buffer from a pointer with a size. Assumes dense layout (no padding) + * + * @param ptr Pointer to the first element + * @param size Size of the buffer + * @param ownMemory Set to false if the memory is only a reference and managed outside of this class + * @param pitch Pitch in bytes (number of bytes in the lower dimensions) + */ + HINLINE HostBuffer( + Type* ptr, + const math::Size_t<3>& size, + bool ownMemory, + math::Size_t<2> pitch = math::Size_t<2>::create(0)) + { + this->dataPointer = ptr; + this->_size = size; + this->pitch[0] = (pitch[0]) ? pitch[0] : size.x() * sizeof(Type); + this->pitch[1] = (pitch[1]) ? pitch[1] : this->pitch[0] * size.y(); + this->refCount = new int; + *this->refCount = (ownMemory) ? 1 : 2; + } + HINLINE HostBuffer( + Type* ptr, + const math::Size_t<2>& size, + bool ownMemory, + math::Size_t<1> pitch = math::Size_t<1>::create(0)) + { + this->dataPointer = ptr; + this->_size = size; + this->pitch[0] = (pitch[0]) ? pitch[0] : size.x() * sizeof(Type); + this->refCount = new int; + *this->refCount = (ownMemory) ? 1 : 2; + } + HINLINE HostBuffer(Type* ptr, const math::Size_t<1>& size, bool ownMemory) + { + this->dataPointer = ptr; + this->_size = size; + // intentionally uninitialized and not RT accessible via [] + // this->pitch = pitch; + this->refCount = new int; + *this->refCount = (ownMemory) ? 1 : 2; + } + HINLINE HostBuffer(const Base& base) : Base(base) + { + } + HINLINE HostBuffer(HostBuffer&& obj) : Base(std::move(static_cast(obj))) + { + } + + HINLINE HostBuffer& operator=(HostBuffer&& rhs) + { + Base::operator=(std::move(static_cast(rhs))); + return *this; + } + + template + HINLINE typename boost:: + enable_if, HostBuffer&>::type + operator=(const DBuffer& rhs) + { + BOOST_STATIC_ASSERT((boost::is_same::value)); + BOOST_STATIC_ASSERT(DBuffer::dim == T_dim); + if(rhs.size() != this->size()) + throw std::invalid_argument(static_cast( + std::stringstream() + << "Assignment: Sizes of buffers do not match: " << this->size() + << " <-> " << rhs.size() << std::endl) + .str()); + + cuplaWrapper::Memcopy()( + this->dataPointer, + this->pitch, + rhs.getDataPointer(), + rhs.getPitch(), + this->_size, + cuplaWrapper::flags::Memcopy::deviceToHost); + + return *this; + } + + HINLINE HostBuffer& operator=(const Base& rhs) + { + Base::operator=(rhs); + return *this; + } + + HINLINE HostBuffer& operator=(const HostBuffer& rhs) + { + Base::operator=(rhs); + return *this; + } + }; + + } // namespace container +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/IndexBuffer.hpp b/include/pmacc/cuSTL/container/IndexBuffer.hpp index 0265c2084a..a297cf17ff 100644 --- a/include/pmacc/cuSTL/container/IndexBuffer.hpp +++ b/include/pmacc/cuSTL/container/IndexBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -32,63 +32,66 @@ namespace pmacc { -namespace container -{ - -template -class IndexBuffer -{ -private: - math::UInt32 _size; -public: - IndexBuffer(const math::UInt32& _size) : _size(_size) {} - IndexBuffer(uint32_t x) : _size(x) {} - IndexBuffer(uint32_t x, uint32_t y) : _size(x,y) {} - IndexBuffer(uint32_t x, uint32_t y, uint32_t z) : _size(x,y,z) {} - - inline - cursor::Cursor >, - cursor::CartNavigator, - math::Int > - origin() const + namespace container { - math::Int factor; - factor[0] = 1; factor[1] = this->_size.x(); - if(dim == 3) factor[2] = this->_size.x() * this->_size.y(); + template + class IndexBuffer + { + private: + math::UInt32 _size; - return cursor::Cursor >, - cursor::CartNavigator, - math::Int > - (cursor::MarkerAccessor >(), - cursor::CartNavigator(factor), - math::Int(0)); - } - inline - cursor::Cursor >, - cursor::CartNavigator, - math::Int > - originCustomAxes(const math::UInt32& axes) const - { - math::Int factor; - factor[0] = 1; factor[1] = this->_size.x(); - if(dim == 3) factor[2] = this->_size.x() * this->_size.y(); - math::Int customFactor; - for(uint32_t i = 0; i < dim; i++) - customFactor[i] = factor[axes[i]]; + public: + IndexBuffer(const math::UInt32& _size) : _size(_size) + { + } + IndexBuffer(uint32_t x) : _size(x) + { + } + IndexBuffer(uint32_t x, uint32_t y) : _size(x, y) + { + } + IndexBuffer(uint32_t x, uint32_t y, uint32_t z) : _size(x, y, z) + { + } - return cursor::Cursor >, - cursor::CartNavigator, - math::Int > - (cursor::MarkerAccessor >(), - cursor::CartNavigator(customFactor), - math::Int(0)); - } - inline zone::SphericZone zone() const - { - return zone::SphericZone((math::Size_t)this->_size); - } -}; + inline cursor::Cursor>, cursor::CartNavigator, math::Int> + origin() const + { + math::Int factor; + factor[0] = 1; + factor[1] = this->_size.x(); + if(dim == 3) + factor[2] = this->_size.x() * this->_size.y(); + + return cursor:: + Cursor>, cursor::CartNavigator, math::Int>( + cursor::MarkerAccessor>(), + cursor::CartNavigator(factor), + math::Int(0)); + } + inline cursor::Cursor>, cursor::CartNavigator, math::Int> + originCustomAxes(const math::UInt32& axes) const + { + math::Int factor; + factor[0] = 1; + factor[1] = this->_size.x(); + if(dim == 3) + factor[2] = this->_size.x() * this->_size.y(); + math::Int customFactor; + for(uint32_t i = 0; i < dim; i++) + customFactor[i] = factor[axes[i]]; -} // container -} // pmacc + return cursor:: + Cursor>, cursor::CartNavigator, math::Int>( + cursor::MarkerAccessor>(), + cursor::CartNavigator(customFactor), + math::Int(0)); + } + inline zone::SphericZone zone() const + { + return zone::SphericZone((math::Size_t) this->_size); + } + }; + } // namespace container +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/PNGBuffer.hpp b/include/pmacc/cuSTL/container/PNGBuffer.hpp index 0658c2186e..9fa88c0d0f 100644 --- a/include/pmacc/cuSTL/container/PNGBuffer.hpp +++ b/include/pmacc/cuSTL/container/PNGBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -32,70 +32,86 @@ namespace pmacc { -namespace container -{ - -/** Think of a container being a PNG-image - * offers only write-only access - */ -class PNGBuffer -{ -private: - class Plotter + namespace container { - private: - pngwriter& png; - math::Int<2> pos; - public: - Plotter(pngwriter& png) : png(png) {} - inline Plotter& operator=(const math::Float<3>& color) - { - png.plot(pos.x()+1, pos.y()+1, (double)color.x(), (double)color.y(), (double)color.z()); - return *this; - } - void setPos(const math::Int<2>& pos) + /** Think of a container being a PNG-image + * offers only write-only access + */ + class PNGBuffer { - this->pos = pos; - } - }; - struct Accessor - { - typedef Plotter& type; - pngwriter& png; - Plotter plotter; - Accessor(pngwriter& png) : png(png), plotter(png) {} - inline type operator()(math::Int<2>& index) - { - plotter.setPos(index); - return this->plotter; - } - }; - pngwriter png; - math::Size_t<2> size; -public: - typedef cursor::Cursor, math::Int<2> > Cursor; + private: + class Plotter + { + private: + pngwriter& png; + math::Int<2> pos; - /* constructor - * \param x width of png image - * \param y height of png image - * \name name of png file - */ - PNGBuffer(int x, int y, const std::string& name) : png(x, y, 0.0, name.data()), size(x,y) {} - PNGBuffer(math::Size_t<2> size, const std::string& name) : png(size.x(), size.y(), 0.0, name.data()), size(size) {} - ~PNGBuffer() {png.close();} + public: + Plotter(pngwriter& png) : png(png) + { + } + inline Plotter& operator=(const math::Float<3>& color) + { + png.plot(pos.x() + 1, pos.y() + 1, (double) color.x(), (double) color.y(), (double) color.z()); + return *this; + } + void setPos(const math::Int<2>& pos) + { + this->pos = pos; + } + }; + struct Accessor + { + typedef Plotter& type; + pngwriter& png; + Plotter plotter; + Accessor(pngwriter& png) : png(png), plotter(png) + { + } + inline type operator()(math::Int<2>& index) + { + plotter.setPos(index); + return this->plotter; + } + }; + pngwriter png; + math::Size_t<2> size; - /* get a cursor at the top left pixel - * access via a Float<3> reference - */ - inline Cursor origin() - { - return Cursor(Accessor(this->png), cursor::MultiIndexNavigator<2>(), math::Int<2>(0)); - } + public: + typedef cursor::Cursor, math::Int<2>> Cursor; + + /* constructor + * \param x width of png image + * \param y height of png image + * \name name of png file + */ + PNGBuffer(int x, int y, const std::string& name) : png(x, y, 0.0, name.data()), size(x, y) + { + } + PNGBuffer(math::Size_t<2> size, const std::string& name) + : png(size.x(), size.y(), 0.0, name.data()) + , size(size) + { + } + ~PNGBuffer() + { + png.close(); + } - /* get a zone spanning the whole container */ - inline zone::SphericZone<2> zone() const {return zone::SphericZone<2>(this->size);} -}; + /* get a cursor at the top left pixel + * access via a Float<3> reference + */ + inline Cursor origin() + { + return Cursor(Accessor(this->png), cursor::MultiIndexNavigator<2>(), math::Int<2>(0)); + } -} // container -} // pmacc + /* get a zone spanning the whole container */ + inline zone::SphericZone<2> zone() const + { + return zone::SphericZone<2>(this->size); + } + }; + } // namespace container +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/PseudoBuffer.hpp b/include/pmacc/cuSTL/container/PseudoBuffer.hpp index 348e2cff3b..b02675b41f 100644 --- a/include/pmacc/cuSTL/container/PseudoBuffer.hpp +++ b/include/pmacc/cuSTL/container/PseudoBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,20 +27,18 @@ namespace pmacc { -namespace container -{ - -template -struct PseudoBuffer : public container::CartBuffer -{ - template - PseudoBuffer(pmacc::DeviceBuffer<_Type, dim>& devBuffer); - template - PseudoBuffer(pmacc::HostBuffer<_Type, dim>& hostBuffer); -}; + namespace container + { + template + struct PseudoBuffer : public container::CartBuffer + { + template + PseudoBuffer(pmacc::DeviceBuffer<_Type, dim>& devBuffer); + template + PseudoBuffer(pmacc::HostBuffer<_Type, dim>& hostBuffer); + }; -} // container -} // pmacc + } // namespace container +} // namespace pmacc #include "PseudoBuffer.tpp" - diff --git a/include/pmacc/cuSTL/container/PseudoBuffer.tpp b/include/pmacc/cuSTL/container/PseudoBuffer.tpp index 53365ad561..c70372e71e 100644 --- a/include/pmacc/cuSTL/container/PseudoBuffer.tpp +++ b/include/pmacc/cuSTL/container/PseudoBuffer.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,37 +23,38 @@ namespace pmacc { -namespace container -{ - -template -template -PseudoBuffer::PseudoBuffer(pmacc::DeviceBuffer<_Type, dim>& devBuffer) -{ - cudaPitchedPtr cudaData = devBuffer.getCudaPitched(); - this->dataPointer = (Type*)cudaData.ptr; - this->_size = (math::Size_t)devBuffer.getDataSpace(); - if(dim == 2) this->pitch[0] = cudaData.pitch; - if(dim == 3) + namespace container { - this->pitch[0] = cudaData.pitch; - this->pitch[1] = cudaData.pitch * this->_size.y(); - } -} + template + template + PseudoBuffer::PseudoBuffer(pmacc::DeviceBuffer<_Type, dim>& devBuffer) + { + cuplaPitchedPtr cuplaData = devBuffer.getCudaPitched(); + this->dataPointer = (Type*) cuplaData.ptr; + this->_size = (math::Size_t) devBuffer.getDataSpace(); + if(dim == 2) + this->pitch[0] = cuplaData.pitch; + if(dim == 3) + { + this->pitch[0] = cuplaData.pitch; + this->pitch[1] = cuplaData.pitch * this->_size.y(); + } + } -template -template -PseudoBuffer::PseudoBuffer(pmacc::HostBuffer<_Type, dim>& hostBuffer) -{ - this->dataPointer = (Type*)hostBuffer.getBasePointer(); - this->_size = (math::Size_t)hostBuffer.getDataSpace(); - if(dim == 2) this->pitch[0] = sizeof(Type) * this->_size[0]; - if(dim == 3) - { - this->pitch[0] = sizeof(Type) * this->_size[0]; - this->pitch[1] = this->pitch[0] * this->_size[1]; - } -} + template + template + PseudoBuffer::PseudoBuffer(pmacc::HostBuffer<_Type, dim>& hostBuffer) + { + this->dataPointer = (Type*) hostBuffer.getBasePointer(); + this->_size = (math::Size_t) hostBuffer.getDataSpace(); + if(dim == 2) + this->pitch[0] = sizeof(Type) * this->_size[0]; + if(dim == 3) + { + this->pitch[0] = sizeof(Type) * this->_size[0]; + this->pitch[1] = this->pitch[0] * this->_size[1]; + } + } -} // container -} // pmacc + } // namespace container +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/allocator/DeviceMemAllocator.hpp b/include/pmacc/cuSTL/container/allocator/DeviceMemAllocator.hpp index 8f2b601092..17aa2025f5 100644 --- a/include/pmacc/cuSTL/container/allocator/DeviceMemAllocator.hpp +++ b/include/pmacc/cuSTL/container/allocator/DeviceMemAllocator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -29,41 +29,37 @@ namespace pmacc { -namespace allocator -{ + namespace allocator + { + template + struct DeviceMemAllocator + { + typedef Type type; + static constexpr int dim = T_dim; + typedef cursor::BufferCursor Cursor; + typedef allocator::tag::device tag; -template -struct DeviceMemAllocator -{ - typedef Type type; - static constexpr int dim = T_dim; - typedef cursor::BufferCursor Cursor; - typedef allocator::tag::device tag; + HDINLINE + static cursor::BufferCursor allocate(const math::Size_t& size); + template + HDINLINE static void deallocate(const TCursor& cursor); + }; - HDINLINE - static cursor::BufferCursor allocate(const math::Size_t& size); - template - HDINLINE - static void deallocate(const TCursor& cursor); -}; + template + struct DeviceMemAllocator + { + typedef Type type; + static constexpr int dim = 1; + typedef cursor::BufferCursor Cursor; + typedef allocator::tag::device tag; -template -struct DeviceMemAllocator -{ - typedef Type type; - static constexpr int dim = 1; - typedef cursor::BufferCursor Cursor; - typedef allocator::tag::device tag; + HDINLINE + static cursor::BufferCursor allocate(const math::Size_t<1>& size); + template + HDINLINE static void deallocate(const TCursor& cursor); + }; - HDINLINE - static cursor::BufferCursor allocate(const math::Size_t<1>& size); - template - HDINLINE - static void deallocate(const TCursor& cursor); -}; - -} // allocator -} // pmacc + } // namespace allocator +} // namespace pmacc #include "DeviceMemAllocator.tpp" - diff --git a/include/pmacc/cuSTL/container/allocator/DeviceMemAllocator.tpp b/include/pmacc/cuSTL/container/allocator/DeviceMemAllocator.tpp index 07a8f70aac..939c1a317f 100644 --- a/include/pmacc/cuSTL/container/allocator/DeviceMemAllocator.tpp +++ b/include/pmacc/cuSTL/container/allocator/DeviceMemAllocator.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Alexander Grund +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -23,94 +23,88 @@ namespace pmacc { -namespace allocator -{ - -template -HDINLINE -cursor::BufferCursor -DeviceMemAllocator::allocate(const math::Size_t& size) -{ + namespace allocator + { + template + HDINLINE cursor::BufferCursor DeviceMemAllocator::allocate( + const math::Size_t& size) + { #ifndef __CUDA_ARCH__ - Type* dataPointer; - math::Size_t pitch; - cudaPitchedPtr cudaData; + Type* dataPointer; + math::Size_t pitch; + cuplaPitchedPtr cuplaData; - cudaData.ptr = nullptr; - cudaData.pitch = 1; - cudaData.xsize = size[0] * sizeof (Type); - cudaData.ysize = 1; + cuplaData.ptr = nullptr; + cuplaData.pitch = 1; + cuplaData.xsize = size[0] * sizeof(Type); + cuplaData.ysize = 1; - if (dim == 2u) - { - cudaData.xsize = size[0] * sizeof (Type); - cudaData.ysize = size[1]; - if(size.productOfComponents()) - CUDA_CHECK(cudaMallocPitch(&cudaData.ptr, &cudaData.pitch, cudaData.xsize, cudaData.ysize)); - pitch[0] = cudaData.pitch; - } - else if (dim == 3u) - { - cudaExtent extent; - extent.width = size[0] * sizeof (Type); - extent.height = size[1]; - extent.depth = size[2]; - if(size.productOfComponents()) - CUDA_CHECK(cudaMalloc3D(&cudaData, extent)); - pitch[0] = cudaData.pitch; - pitch[1] = cudaData.pitch * size[1]; - } - dataPointer = (Type*)cudaData.ptr; + if(dim == 2u) + { + cuplaData.xsize = size[0] * sizeof(Type); + cuplaData.ysize = size[1]; + if(size.productOfComponents()) + CUDA_CHECK(cuplaMallocPitch(&cuplaData.ptr, &cuplaData.pitch, cuplaData.xsize, cuplaData.ysize)); + pitch[0] = cuplaData.pitch; + } + else if(dim == 3u) + { + cuplaExtent extent; + extent.width = size[0] * sizeof(Type); + extent.height = size[1]; + extent.depth = size[2]; + if(size.productOfComponents()) + CUDA_CHECK(cuplaMalloc3D(&cuplaData, extent)); + pitch[0] = cuplaData.pitch; + pitch[1] = cuplaData.pitch * size[1]; + } + dataPointer = (Type*) cuplaData.ptr; - return cursor::BufferCursor(dataPointer, pitch); + return cursor::BufferCursor(dataPointer, pitch); #endif #ifdef __CUDA_ARCH__ - Type* dataPointer = nullptr; - math::Size_t pitch; - return cursor::BufferCursor(dataPointer, pitch); + Type* dataPointer = nullptr; + math::Size_t pitch; + return cursor::BufferCursor(dataPointer, pitch); #endif -} + } -template -HDINLINE -cursor::BufferCursor -DeviceMemAllocator::allocate(const math::Size_t<1>& size) -{ + template + HDINLINE cursor::BufferCursor DeviceMemAllocator::allocate(const math::Size_t<1>& size) + { #ifndef __CUDA_ARCH__ - Type* dataPointer = nullptr; + Type* dataPointer = nullptr; - if(size[0]) - CUDA_CHECK(cudaMalloc((void**)&dataPointer, size[0] * sizeof(Type))); + if(size[0]) + CUDA_CHECK(cuplaMalloc((void**) &dataPointer, size[0] * sizeof(Type))); - return cursor::BufferCursor(dataPointer, math::Size_t<0>()); + return cursor::BufferCursor(dataPointer, math::Size_t<0>()); #endif #ifdef __CUDA_ARCH__ - Type* dataPointer = nullptr; - return cursor::BufferCursor(dataPointer, math::Size_t<0>()); + Type* dataPointer = nullptr; + return cursor::BufferCursor(dataPointer, math::Size_t<0>()); #endif -} + } -template -template -HDINLINE -void DeviceMemAllocator::deallocate(const TCursor& cursor) -{ + template + template + HDINLINE void DeviceMemAllocator::deallocate(const TCursor& cursor) + { #ifndef __CUDA_ARCH__ - CUDA_CHECK(cudaFree(cursor.getMarker())); + CUDA_CHECK(cuplaFree(cursor.getMarker())); #endif -} + } -template -template -HDINLINE -void DeviceMemAllocator::deallocate(const TCursor& cursor) -{ + template + template + HDINLINE void DeviceMemAllocator::deallocate(const TCursor& cursor) + { #ifndef __CUDA_ARCH__ - CUDA_CHECK(cudaFree(cursor.getMarker())); + CUDA_CHECK(cuplaFree(cursor.getMarker())); #endif -} + } -} // allocator -} // pmacc + } // namespace allocator +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/allocator/DeviceMemEvenPitchAllocator.hpp b/include/pmacc/cuSTL/container/allocator/DeviceMemEvenPitchAllocator.hpp index d13fde5400..ea86594d9f 100644 --- a/include/pmacc/cuSTL/container/allocator/DeviceMemEvenPitchAllocator.hpp +++ b/include/pmacc/cuSTL/container/allocator/DeviceMemEvenPitchAllocator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -29,37 +29,35 @@ namespace pmacc { -namespace allocator -{ - -template -struct DeviceMemEvenPitch -{ - typedef Type type; - static constexpr int dim = T_dim; - typedef cursor::BufferCursor Cursor; - typedef allocator::tag::device tag; - - static cursor::BufferCursor allocate(const math::Size_t& size); - template - static void deallocate(const TCursor& cursor); -}; - -template -struct DeviceMemEvenPitch -{ - typedef Type type; - static constexpr int dim = 1; - typedef cursor::BufferCursor Cursor; - typedef allocator::tag::device tag; - - static cursor::BufferCursor allocate(const math::Size_t<1>& size); - template - static void deallocate(const TCursor& cursor); -}; - -} // allocator -} // pmacc + namespace allocator + { + template + struct DeviceMemEvenPitch + { + typedef Type type; + static constexpr int dim = T_dim; + typedef cursor::BufferCursor Cursor; + typedef allocator::tag::device tag; + + static cursor::BufferCursor allocate(const math::Size_t& size); + template + static void deallocate(const TCursor& cursor); + }; + + template + struct DeviceMemEvenPitch + { + typedef Type type; + static constexpr int dim = 1; + typedef cursor::BufferCursor Cursor; + typedef allocator::tag::device tag; + + static cursor::BufferCursor allocate(const math::Size_t<1>& size); + template + static void deallocate(const TCursor& cursor); + }; + + } // namespace allocator +} // namespace pmacc #include "DeviceMemEvenPitchAllocator.tpp" - diff --git a/include/pmacc/cuSTL/container/allocator/DeviceMemEvenPitchAllocator.tpp b/include/pmacc/cuSTL/container/allocator/DeviceMemEvenPitchAllocator.tpp index 49fd3d88a9..d3de542aef 100644 --- a/include/pmacc/cuSTL/container/allocator/DeviceMemEvenPitchAllocator.tpp +++ b/include/pmacc/cuSTL/container/allocator/DeviceMemEvenPitchAllocator.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Alexander Grund +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -23,57 +23,54 @@ namespace pmacc { -namespace allocator -{ + namespace allocator + { + template + cursor::BufferCursor DeviceMemEvenPitch::allocate(const math::Size_t& size) + { + Type* dataPointer = nullptr; + math::Size_t pitch; -template -cursor::BufferCursor -DeviceMemEvenPitch::allocate(const math::Size_t& size) -{ - Type* dataPointer = nullptr; - math::Size_t pitch; + if(size.productOfComponents()) + CUDA_CHECK(cuplaMalloc((void**) &dataPointer, sizeof(Type) * size.productOfComponents())); - if(size.productOfComponents()) - CUDA_CHECK(cudaMalloc((void**)&dataPointer, sizeof(Type) * size.productOfComponents())); + if(dim == 2u) + { + pitch[0] = sizeof(Type) * size[0]; + } + else if(dim == 3u) + { + pitch[0] = sizeof(Type) * size[0]; + pitch[1] = pitch[0] * size[1]; + } - if (dim == 2u) - { - pitch[0] = sizeof(Type) * size[0]; - } - else if (dim == 3u) - { - pitch[0] = sizeof(Type) * size[0]; - pitch[1] = pitch[0] * size[1]; - } + return cursor::BufferCursor(dataPointer, pitch); + } - return cursor::BufferCursor(dataPointer, pitch); -} + template + cursor::BufferCursor DeviceMemEvenPitch::allocate(const math::Size_t<1>& size) + { + Type* dataPointer = nullptr; -template -cursor::BufferCursor -DeviceMemEvenPitch::allocate(const math::Size_t<1>& size) -{ - Type* dataPointer = nullptr; - - if(size.productOfComponents()) - CUDA_CHECK(cudaMalloc((void**)&dataPointer, size[0] * sizeof(Type))); + if(size.productOfComponents()) + CUDA_CHECK(cuplaMalloc((void**) &dataPointer, size[0] * sizeof(Type))); - return cursor::BufferCursor(dataPointer, math::Size_t<0>()); -} + return cursor::BufferCursor(dataPointer, math::Size_t<0>()); + } -template -template -void DeviceMemEvenPitch::deallocate(const TCursor& cursor) -{ - CUDA_CHECK(cudaFree(cursor.getMarker())); -} + template + template + void DeviceMemEvenPitch::deallocate(const TCursor& cursor) + { + CUDA_CHECK(cuplaFree(cursor.getMarker())); + } -template -template -void DeviceMemEvenPitch::deallocate(const TCursor& cursor) -{ - CUDA_CHECK(cudaFree(cursor.getMarker())); -} + template + template + void DeviceMemEvenPitch::deallocate(const TCursor& cursor) + { + CUDA_CHECK(cuplaFree(cursor.getMarker())); + } -} // allocator -} // pmacc + } // namespace allocator +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/allocator/EmptyAllocator.hpp b/include/pmacc/cuSTL/container/allocator/EmptyAllocator.hpp index 4bc0bfea95..4c0879201d 100644 --- a/include/pmacc/cuSTL/container/allocator/EmptyAllocator.hpp +++ b/include/pmacc/cuSTL/container/allocator/EmptyAllocator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,18 +28,17 @@ namespace pmacc { -namespace allocator -{ - -struct EmptyAllocator -{ - typedef allocator::tag::unspecified tag; - - template - HDINLINE - static void deallocate(const TCursor&) {} -}; - -} // allocator -} // pmacc - + namespace allocator + { + struct EmptyAllocator + { + typedef allocator::tag::unspecified tag; + + template + HDINLINE static void deallocate(const TCursor&) + { + } + }; + + } // namespace allocator +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/allocator/HostMemAllocator.hpp b/include/pmacc/cuSTL/container/allocator/HostMemAllocator.hpp index 6a19edff95..1d54069392 100644 --- a/include/pmacc/cuSTL/container/allocator/HostMemAllocator.hpp +++ b/include/pmacc/cuSTL/container/allocator/HostMemAllocator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -31,41 +31,37 @@ namespace pmacc { -namespace allocator -{ + namespace allocator + { + template + struct HostMemAllocator + { + typedef Type type; + static constexpr int dim = T_dim; + typedef cursor::BufferCursor Cursor; + typedef allocator::tag::host tag; -template -struct HostMemAllocator -{ - typedef Type type; - static constexpr int dim = T_dim; - typedef cursor::BufferCursor Cursor; - typedef allocator::tag::host tag; + HDINLINE + static cursor::BufferCursor allocate(const math::Size_t& size); + template + HDINLINE static void deallocate(const TCursor& cursor); + }; - HDINLINE - static cursor::BufferCursor allocate(const math::Size_t& size); - template - HDINLINE - static void deallocate(const TCursor& cursor); -}; + template + struct HostMemAllocator + { + typedef Type type; + static constexpr int dim = 1; + typedef cursor::BufferCursor Cursor; + typedef allocator::tag::host tag; -template -struct HostMemAllocator -{ - typedef Type type; - static constexpr int dim = 1; - typedef cursor::BufferCursor Cursor; - typedef allocator::tag::host tag; + HDINLINE + static cursor::BufferCursor allocate(const math::Size_t<1>& size); + template + HDINLINE static void deallocate(const TCursor& cursor); + }; - HDINLINE - static cursor::BufferCursor allocate(const math::Size_t<1>& size); - template - HDINLINE - static void deallocate(const TCursor& cursor); -}; - -} // allocator -} // pmacc + } // namespace allocator +} // namespace pmacc #include "HostMemAllocator.tpp" - diff --git a/include/pmacc/cuSTL/container/allocator/HostMemAllocator.tpp b/include/pmacc/cuSTL/container/allocator/HostMemAllocator.tpp index be90462acb..2a9fdbb7b8 100644 --- a/include/pmacc/cuSTL/container/allocator/HostMemAllocator.tpp +++ b/include/pmacc/cuSTL/container/allocator/HostMemAllocator.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Alexander Grund +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -23,81 +23,75 @@ namespace pmacc { -namespace allocator -{ - -template -HDINLINE -cursor::BufferCursor -HostMemAllocator::allocate(const math::Size_t& size) -{ + namespace allocator + { + template + HDINLINE cursor::BufferCursor HostMemAllocator::allocate( + const math::Size_t& size) + { #ifndef __CUDA_ARCH__ - Type* dataPointer = nullptr; - math::Size_t pitch; + Type* dataPointer = nullptr; + math::Size_t pitch; - if(size.productOfComponents()) - CUDA_CHECK(cudaMallocHost((void**)&dataPointer, sizeof(Type) * size.productOfComponents())); - if(dim == 2u) - { - pitch[0] = size[0] * sizeof(Type); - } - else if(dim == 3u) - { - pitch[0] = size[0] * sizeof(Type); - pitch[1] = pitch[0] * size[1]; - } + if(size.productOfComponents()) + CUDA_CHECK(cuplaMallocHost((void**) &dataPointer, sizeof(Type) * size.productOfComponents())); + if(dim == 2u) + { + pitch[0] = size[0] * sizeof(Type); + } + else if(dim == 3u) + { + pitch[0] = size[0] * sizeof(Type); + pitch[1] = pitch[0] * size[1]; + } - return cursor::BufferCursor(dataPointer, pitch); + return cursor::BufferCursor(dataPointer, pitch); #endif #ifdef __CUDA_ARCH__ - Type* dataPointer = nullptr; - math::Size_t pitch; - return cursor::BufferCursor(dataPointer, pitch); + Type* dataPointer = nullptr; + math::Size_t pitch; + return cursor::BufferCursor(dataPointer, pitch); #endif -} + } -template -HDINLINE -cursor::BufferCursor -HostMemAllocator::allocate(const math::Size_t<1>& size) -{ + template + HDINLINE cursor::BufferCursor HostMemAllocator::allocate(const math::Size_t<1>& size) + { #ifndef __CUDA_ARCH__ - Type* dataPointer = nullptr; - math::Size_t<0> pitch; + Type* dataPointer = nullptr; + math::Size_t<0> pitch; - if(size.productOfComponents()) - CUDA_CHECK(cudaMallocHost((void**)&dataPointer, sizeof(Type) * size.productOfComponents())); + if(size.productOfComponents()) + CUDA_CHECK(cuplaMallocHost((void**) &dataPointer, sizeof(Type) * size.productOfComponents())); - return cursor::BufferCursor(dataPointer, pitch); + return cursor::BufferCursor(dataPointer, pitch); #endif #ifdef __CUDA_ARCH__ - Type* dataPointer = nullptr; - math::Size_t<0> pitch; - return cursor::BufferCursor(dataPointer, pitch); + Type* dataPointer = nullptr; + math::Size_t<0> pitch; + return cursor::BufferCursor(dataPointer, pitch); #endif -} + } -template -template -HDINLINE -void HostMemAllocator::deallocate(const TCursor& cursor) -{ + template + template + HDINLINE void HostMemAllocator::deallocate(const TCursor& cursor) + { #ifndef __CUDA_ARCH__ - CUDA_CHECK(cudaFreeHost(cursor.getMarker())); + CUDA_CHECK(cuplaFreeHost(cursor.getMarker())); #endif -} + } -template -template -HDINLINE -void HostMemAllocator::deallocate(const TCursor& cursor) -{ + template + template + HDINLINE void HostMemAllocator::deallocate(const TCursor& cursor) + { #ifndef __CUDA_ARCH__ - CUDA_CHECK(cudaFreeHost(cursor.getMarker())); + CUDA_CHECK(cuplaFreeHost(cursor.getMarker())); #endif -} + } -} // allocator -} // pmacc + } // namespace allocator +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/allocator/compile-time/SharedMemAllocator.hpp b/include/pmacc/cuSTL/container/allocator/compile-time/SharedMemAllocator.hpp index b408f37907..c42308973a 100644 --- a/include/pmacc/cuSTL/container/allocator/compile-time/SharedMemAllocator.hpp +++ b/include/pmacc/cuSTL/container/allocator/compile-time/SharedMemAllocator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -30,81 +30,65 @@ namespace pmacc { -namespace allocator -{ -namespace CT -{ -template -struct SharedMemAllocator; - -template -struct SharedMemAllocator -{ - typedef Type type; - typedef math::CT::UInt32<> Pitch; - static constexpr int dim = 1; - typedef cursor::CT::BufferCursor > Cursor; - - template< typename T_Acc > - DEVICEONLY static Cursor allocate( T_Acc const & acc ) + namespace allocator { - auto& shMem = pmacc::memory::shared::allocate< - uid, - memory::Array< - Type, - math::CT::volume< Size >::type::value - > - >( acc ); - return Cursor(shMem.data()); - } -}; + namespace CT + { + template + struct SharedMemAllocator; -template -struct SharedMemAllocator -{ - typedef Type type; - typedef math::CT::UInt32 Pitch; - static constexpr int dim = 2; - typedef cursor::CT::BufferCursor Cursor; + template + struct SharedMemAllocator + { + typedef Type type; + typedef math::CT::UInt32<> Pitch; + static constexpr int dim = 1; + typedef cursor::CT::BufferCursor> Cursor; - template< typename T_Acc > - DEVICEONLY static Cursor allocate( T_Acc const & acc ) - { - auto& shMem = pmacc::memory::shared::allocate< - uid, - memory::Array< - Type, - math::CT::volume< Size >::type::value - > - >( acc ); - return Cursor(shMem.data()); - } -}; + template + DINLINE static Cursor allocate(T_Acc const& acc) + { + auto& shMem = pmacc::memory::shared:: + allocate::type::value>>(acc); + return Cursor(shMem.data()); + } + }; -template -struct SharedMemAllocator -{ - typedef Type type; - typedef math::CT::UInt32 Pitch; - static constexpr int dim = 3; - typedef cursor::CT::BufferCursor Cursor; + template + struct SharedMemAllocator + { + typedef Type type; + typedef math::CT::UInt32 Pitch; + static constexpr int dim = 2; + typedef cursor::CT::BufferCursor Cursor; - template< typename T_Acc > - DEVICEONLY static Cursor allocate( T_Acc const & acc ) - { - auto& shMem = pmacc::memory::shared::allocate< - uid, - memory::Array< - Type, - math::CT::volume< Size >::type::value - > - >( acc ); - return Cursor(shMem.data()); - } -}; + template + DINLINE static Cursor allocate(T_Acc const& acc) + { + auto& shMem = pmacc::memory::shared:: + allocate::type::value>>(acc); + return Cursor(shMem.data()); + } + }; + + template + struct SharedMemAllocator + { + typedef Type type; + typedef math::CT::UInt32 + Pitch; + static constexpr int dim = 3; + typedef cursor::CT::BufferCursor Cursor; -} // CT -} // allocator -} // pmacc + template + DINLINE static Cursor allocate(T_Acc const& acc) + { + auto& shMem = pmacc::memory::shared:: + allocate::type::value>>(acc); + return Cursor(shMem.data()); + } + }; + } // namespace CT + } // namespace allocator +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/allocator/tag.hpp b/include/pmacc/cuSTL/container/allocator/tag.hpp index b5c3ce63a3..482adb9c01 100644 --- a/include/pmacc/cuSTL/container/allocator/tag.hpp +++ b/include/pmacc/cuSTL/container/allocator/tag.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -24,15 +24,15 @@ namespace pmacc { -namespace allocator -{ -namespace tag -{ -struct host; -struct device; -struct unspecified; -} // tag -} // allocator -} // pmacc + namespace allocator + { + namespace tag + { + struct host; + struct device; + struct unspecified; + } // namespace tag + } // namespace allocator +} // namespace pmacc #endif // ALLOCATOR_TAG_H diff --git a/include/pmacc/cuSTL/container/assigner/DeviceMemAssigner.hpp b/include/pmacc/cuSTL/container/assigner/DeviceMemAssigner.hpp index 23ab1aa4df..6440401328 100644 --- a/include/pmacc/cuSTL/container/assigner/DeviceMemAssigner.hpp +++ b/include/pmacc/cuSTL/container/assigner/DeviceMemAssigner.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -38,42 +38,42 @@ namespace pmacc { -namespace assigner -{ - -namespace bmpl = boost::mpl; + namespace assigner + { + namespace bmpl = boost::mpl; -template -struct DeviceMemAssigner -{ - static constexpr int dim = T_Dim::value; - typedef T_CartBuffer CartBuffer; + template + struct DeviceMemAssigner + { + static constexpr int dim = T_Dim::value; + typedef T_CartBuffer CartBuffer; - template - HINLINE void assign(const Type& value) - { - // "Curiously recurring template pattern" - CartBuffer* buffer = static_cast(this); + template + HINLINE void assign(const Type& value) + { + // "Curiously recurring template pattern" + CartBuffer* buffer = static_cast(this); - zone::SphericZone myZone(buffer->size()); - cursor::BufferCursor cursor(buffer->dataPointer, buffer->pitch); + zone::SphericZone myZone(buffer->size()); + cursor::BufferCursor cursor(buffer->dataPointer, buffer->pitch); - /* The greatest common divisor of each component of the volume size - * and a certain power of two value gives the best suitable block size */ - math::Size_t<3> blockSize(math::Size_t<3>::create(1)); - size_t maxValues[] = {16, 16, 4}; // maximum values for each dimension - for(int i = 0; i < dim; i++) - { - blockSize[i] = boost::integer::gcd(buffer->size()[i], maxValues[dim-1]); - } - /* the maximum number of threads per block for devices with - * compute capability > 2.0 is 1024 */ - PMACC_VERIFY(blockSize.productOfComponents() <= 1024); + /* The greatest common divisor of each component of the volume size + * and a certain power of two value gives the best suitable block size */ + math::Size_t<3> blockSize(math::Size_t<3>::create(1)); + size_t maxValues[] = {16, 16, 4}; // maximum values for each dimension + for(int i = 0; i < dim; i++) + { + blockSize[i] = boost::integer::gcd(buffer->size()[i], maxValues[dim - 1]); + } + /* the maximum number of threads per block for devices with + * compute capability > 2.0 is 1024 */ + PMACC_VERIFY(blockSize.productOfComponents() <= 1024); - algorithm::kernel::RT::Foreach foreach(blockSize); - foreach(myZone, cursor, pmacc::algorithm::functor::AssignValue(value)); - } -}; + algorithm::kernel::RT::Foreach foreach(blockSize); + foreach(myZone, cursor, pmacc::algorithm::functor::AssignValue(value)) + ; + } + }; -} // assigner -} // pmacc + } // namespace assigner +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/assigner/HostMemAssigner.hpp b/include/pmacc/cuSTL/container/assigner/HostMemAssigner.hpp index a296247f76..c7a52f6845 100644 --- a/include/pmacc/cuSTL/container/assigner/HostMemAssigner.hpp +++ b/include/pmacc/cuSTL/container/assigner/HostMemAssigner.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -33,31 +33,30 @@ namespace pmacc { -namespace assigner -{ - -namespace bmpl = boost::mpl; - -template -struct HostMemAssigner -{ - static constexpr int dim = T_Dim::value; - typedef T_CartBuffer CartBuffer; - - template - HINLINE void assign(const Type& value) + namespace assigner { - // "Curiously recurring template pattern" - CartBuffer* buffer = static_cast(this); - - // get a host accelerator - auto hostDev = cupla::manager::Device< cupla::AccHost >::get().device( ); - - algorithm::host::Foreach foreach; - foreach(hostDev, buffer->zone(), buffer->origin(), pmacc::algorithm::functor::AssignValue(value)); - } -}; - -} // assigner -} // pmacc - + namespace bmpl = boost::mpl; + + template + struct HostMemAssigner + { + static constexpr int dim = T_Dim::value; + typedef T_CartBuffer CartBuffer; + + template + HINLINE void assign(const Type& value) + { + // "Curiously recurring template pattern" + CartBuffer* buffer = static_cast(this); + + // get a host accelerator + auto hostDev = cupla::manager::Device::get().device(); + + algorithm::host::Foreach foreach; + foreach(hostDev, buffer->zone(), buffer->origin(), pmacc::algorithm::functor::AssignValue(value)) + ; + } + }; + + } // namespace assigner +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/compile-time/CartBuffer.hpp b/include/pmacc/cuSTL/container/compile-time/CartBuffer.hpp index 8858722587..cab81514e9 100644 --- a/include/pmacc/cuSTL/container/compile-time/CartBuffer.hpp +++ b/include/pmacc/cuSTL/container/compile-time/CartBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -30,51 +30,59 @@ namespace pmacc { -namespace container -{ -namespace CT -{ + namespace container + { + namespace CT + { + /** compile-time version of container::CartBuffer + * \tparam _Size compile-time vector specifying the size of the container + */ + template + class CartBuffer + { + public: + typedef Type type; + typedef _Size Size; + typedef typename Allocator::Pitch Pitch; + typedef cursor::CT::BufferCursor Cursor; + static constexpr int dim = Size::dim; + typedef zone::CT::SphericZone<_Size, typename math::CT::make_Int::type> Zone; -/** compile-time version of container::CartBuffer - * \tparam _Size compile-time vector specifying the size of the container - */ -template -class CartBuffer -{ -public: - typedef Type type; - typedef _Size Size; - typedef typename Allocator::Pitch Pitch; - typedef cursor::CT::BufferCursor Cursor; - static constexpr int dim = Size::dim; - typedef zone::CT::SphericZone<_Size, typename math::CT::make_Int::type> Zone; -private: - Type* dataPointer; - //HDINLINE void init(); -public: - template< typename T_Acc > - DINLINE CartBuffer( T_Acc const & acc ); - DINLINE CartBuffer(const CT::CartBuffer& other); + private: + Type* dataPointer; + // HDINLINE void init(); + public: + template + DINLINE CartBuffer(T_Acc const& acc); + DINLINE CartBuffer(const CT::CartBuffer& other); - DINLINE CT::CartBuffer& - operator=(const CT::CartBuffer& rhs); + DINLINE CT::CartBuffer& operator=( + const CT::CartBuffer& rhs); - DINLINE void assign(const Type& value); - DINLINE Type* getDataPointer() const {return dataPointer;} + DINLINE void assign(const Type& value); + DINLINE Type* getDataPointer() const + { + return dataPointer; + } - DINLINE cursor::CT::BufferCursor origin() const; - /* - HDINLINE Cursor, CartNavigator, char*> - originCustomAxes(const math::UInt32& axes) const; - */ - DINLINE math::Size_t size() const {return math::Size_t(Size());} + DINLINE cursor::CT::BufferCursor origin() const; + /* + HDINLINE Cursor, CartNavigator, char*> + originCustomAxes(const math::UInt32& axes) const; + */ + DINLINE math::Size_t size() const + { + return math::Size_t(Size()); + } - DINLINE Zone zone() const { return Zone(); } -}; + DINLINE Zone zone() const + { + return Zone(); + } + }; -} // CT -} // container -} // pmacc + } // namespace CT + } // namespace container +} // namespace pmacc #include "CartBuffer.tpp" - diff --git a/include/pmacc/cuSTL/container/compile-time/CartBuffer.tpp b/include/pmacc/cuSTL/container/compile-time/CartBuffer.tpp index 8561570cbc..2007ffe994 100644 --- a/include/pmacc/cuSTL/container/compile-time/CartBuffer.tpp +++ b/include/pmacc/cuSTL/container/compile-time/CartBuffer.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,26 +23,24 @@ namespace pmacc { -namespace container -{ -namespace CT -{ + namespace container + { + namespace CT + { + template + template + DINLINE CartBuffer::CartBuffer(T_Acc const& acc) + { + this->dataPointer = Allocator::allocate(acc).getMarker(); + } -template -template< typename T_Acc > -DINLINE CartBuffer::CartBuffer( T_Acc const & acc ) -{ - this->dataPointer = Allocator::allocate( acc ).getMarker(); -} - -template -DINLINE -cursor::CT::BufferCursor -CartBuffer::origin() const -{ - return cursor::CT::BufferCursor(this->dataPointer); -} + template + DINLINE cursor::CT::BufferCursor + CartBuffer::origin() const + { + return cursor::CT::BufferCursor(this->dataPointer); + } -} // CT -} // container -} // pmacc + } // namespace CT + } // namespace container +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/compile-time/SharedBuffer.hpp b/include/pmacc/cuSTL/container/compile-time/SharedBuffer.hpp index 80f43f321c..7263813343 100644 --- a/include/pmacc/cuSTL/container/compile-time/SharedBuffer.hpp +++ b/include/pmacc/cuSTL/container/compile-time/SharedBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -26,19 +26,18 @@ namespace pmacc { -namespace container -{ -namespace CT -{ - -/* typedef version of container::CT::CartBuffer for shared mem on a GPU inside a cuda kernel. - * \param uid If two containers in one kernel have the same Type and Size, - * uid has to be different. This is due to a nvcc bug. - */ -template -using SharedBuffer = CT::CartBuffer, void, void>; + namespace container + { + namespace CT + { + /* typedef version of container::CT::CartBuffer for shared mem on a GPU inside a cupla kernel. + * \param uid If two containers in one kernel have the same Type and Size, + * uid has to be different. This is due to a nvcc bug. + */ + template + using SharedBuffer = CT:: + CartBuffer, void, void>; -} // CT -} // container -} // pmacc + } // namespace CT + } // namespace container +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/copier/D2DCopier.hpp b/include/pmacc/cuSTL/container/copier/D2DCopier.hpp index d27191d5ff..f9714ff5e1 100644 --- a/include/pmacc/cuSTL/container/copier/D2DCopier.hpp +++ b/include/pmacc/cuSTL/container/copier/D2DCopier.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -30,43 +30,46 @@ namespace pmacc { -namespace copier -{ - -template -struct D2DCopier -{ - static constexpr int dim = T_dim; - - PMACC_NO_NVCC_HDWARNING /* Handled via CUDA_ARCH */ - template - HDINLINE static void copy(Type* dest, const math::Size_t& pitchDest, - Type* source, const math::Size_t& pitchSource, - const math::Size_t& size) + namespace copier { + template + struct D2DCopier + { + static constexpr int dim = T_dim; + + PMACC_NO_NVCC_HDWARNING /* Handled via CUDA_ARCH */ + template + HDINLINE static void copy( + Type* dest, + const math::Size_t& pitchDest, + Type* source, + const math::Size_t& pitchSource, + const math::Size_t& size) + { #ifdef __CUDA_ARCH__ - typedef cursor::BufferCursor Cursor; - Cursor bufCursorDest(dest, pitchDest); - Cursor bufCursorSrc(source, pitchSource); - cursor::MapTo1DNavigator myNavi(size); + typedef cursor::BufferCursor Cursor; + Cursor bufCursorDest(dest, pitchDest); + Cursor bufCursorSrc(source, pitchSource); + cursor::MapTo1DNavigator myNavi(size); - auto srcCursor = cursor::make_Cursor(cursor::CursorAccessor(), - myNavi, - bufCursorSrc); - auto destCursor = cursor::make_Cursor(cursor::CursorAccessor(), - myNavi, - bufCursorDest); - size_t sizeProd = size.productOfComponents(); - for(size_t i = 0; i < sizeProd; i++) - { - destCursor[i] = srcCursor[i]; - } + auto srcCursor = cursor::make_Cursor(cursor::CursorAccessor(), myNavi, bufCursorSrc); + auto destCursor = cursor::make_Cursor(cursor::CursorAccessor(), myNavi, bufCursorDest); + size_t sizeProd = size.productOfComponents(); + for(size_t i = 0; i < sizeProd; i++) + { + destCursor[i] = srcCursor[i]; + } #else - cudaWrapper::Memcopy()(dest, pitchDest, source, pitchSource, - size, cudaWrapper::flags::Memcopy::deviceToDevice); + cuplaWrapper::Memcopy()( + dest, + pitchDest, + source, + pitchSource, + size, + cuplaWrapper::flags::Memcopy::deviceToDevice); #endif - } -}; + } + }; -} // copier -} // pmacc + } // namespace copier +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/copier/H2HCopier.hpp b/include/pmacc/cuSTL/container/copier/H2HCopier.hpp index 666c51fc2f..7ce5ef286e 100644 --- a/include/pmacc/cuSTL/container/copier/H2HCopier.hpp +++ b/include/pmacc/cuSTL/container/copier/H2HCopier.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -27,24 +27,31 @@ namespace pmacc { -namespace copier -{ - -template -struct H2HCopier -{ - static constexpr int dim = T_dim; - - PMACC_NO_NVCC_HDWARNING /* Should never be called from device functions */ - template - HDINLINE static void copy(Type* dest, const math::Size_t& pitchDest, - Type* source, const math::Size_t& pitchSource, - const math::Size_t& size) + namespace copier { - cudaWrapper::Memcopy()(dest, pitchDest, source, pitchSource, - size, cudaWrapper::flags::Memcopy::hostToHost); - } -}; + template + struct H2HCopier + { + static constexpr int dim = T_dim; + + PMACC_NO_NVCC_HDWARNING /* Should never be called from device functions */ + template + HDINLINE static void copy( + Type* dest, + const math::Size_t& pitchDest, + Type* source, + const math::Size_t& pitchSource, + const math::Size_t& size) + { + cuplaWrapper::Memcopy()( + dest, + pitchDest, + source, + pitchSource, + size, + cuplaWrapper::flags::Memcopy::hostToHost); + } + }; -} // copier -} // pmacc + } // namespace copier +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/copier/Memcopy.hpp b/include/pmacc/cuSTL/container/copier/Memcopy.hpp index ebeb161230..6075b21586 100644 --- a/include/pmacc/cuSTL/container/copier/Memcopy.hpp +++ b/include/pmacc/cuSTL/container/copier/Memcopy.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -26,82 +26,116 @@ namespace pmacc { -namespace cudaWrapper -{ + namespace cuplaWrapper + { + namespace flags + { + struct Memcopy + { + enum Direction + { + hostToDevice = 0, + deviceToHost, + hostToHost, + deviceToDevice + }; + }; + } // namespace flags -namespace flags -{ -struct Memcopy -{ - enum Direction {hostToDevice = 0, deviceToHost, hostToHost, deviceToDevice}; -}; -} + template + struct Memcopy; -template -struct Memcopy; + template<> + struct Memcopy<1> + { + template + void operator()( + Type* dest, + const math::Size_t<0>, + const Type* source, + const math::Size_t<0>, + const math::Size_t<1>& size, + flags::Memcopy::Direction direction) + { + const cuplaMemcpyKind kind[] + = {cuplaMemcpyHostToDevice, + cuplaMemcpyDeviceToHost, + cuplaMemcpyHostToHost, + cuplaMemcpyDeviceToDevice}; + CUDA_CHECK(cuplaMemcpy(dest, source, sizeof(Type) * size.x(), kind[direction])); + } + }; -template<> -struct Memcopy<1> -{ - template - void operator()(Type* dest, const math::Size_t<0>, - const Type* source, const math::Size_t<0>, const math::Size_t<1>& size, - flags::Memcopy::Direction direction) - { - const cudaMemcpyKind kind[] = {cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost, - cudaMemcpyHostToHost, cudaMemcpyDeviceToDevice}; - CUDA_CHECK(cudaMemcpy(dest, source, sizeof(Type) * size.x(), kind[direction])); - } -}; + template<> + struct Memcopy<2u> + { + template + void operator()( + Type* dest, + const math::Size_t<1> pitchDest, + const Type* source, + const math::Size_t<1> pitchSource, + const math::Size_t<2u>& size, + flags::Memcopy::Direction direction) + { + const cuplaMemcpyKind kind[] + = {cuplaMemcpyHostToDevice, + cuplaMemcpyDeviceToHost, + cuplaMemcpyHostToHost, + cuplaMemcpyDeviceToDevice}; -template<> -struct Memcopy<2u> -{ - template - void operator()(Type* dest, const math::Size_t<1> pitchDest, - const Type* source, const math::Size_t<1> pitchSource, const math::Size_t<2u>& size, - flags::Memcopy::Direction direction) - { - const cudaMemcpyKind kind[] = {cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost, - cudaMemcpyHostToHost, cudaMemcpyDeviceToDevice}; - - CUDA_CHECK(cudaMemcpy2D(dest, pitchDest.x(), source, pitchSource.x(), sizeof(Type) * size.x(), size.y(), - kind[direction])); - } -}; + CUDA_CHECK(cuplaMemcpy2D( + dest, + pitchDest.x(), + source, + pitchSource.x(), + sizeof(Type) * size.x(), + size.y(), + kind[direction])); + } + }; -template<> -struct Memcopy<3> -{ - template - void operator()(Type* dest, const math::Size_t<2u> pitchDest, - Type* source, const math::Size_t<2u> pitchSource, const math::Size_t<3>& size, - flags::Memcopy::Direction direction) - { - const cudaMemcpyKind kind[] = {cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost, - cudaMemcpyHostToHost, cudaMemcpyDeviceToDevice}; + template<> + struct Memcopy<3> + { + template + void operator()( + Type* dest, + const math::Size_t<2u> pitchDest, + Type* source, + const math::Size_t<2u> pitchSource, + const math::Size_t<3>& size, + flags::Memcopy::Direction direction) + { + const cuplaMemcpyKind kind[] + = {cuplaMemcpyHostToDevice, + cuplaMemcpyDeviceToHost, + cuplaMemcpyHostToHost, + cuplaMemcpyDeviceToDevice}; - cudaPitchedPtr pitchedPtrDest; - pitchedPtrDest.pitch = pitchDest.x(); pitchedPtrDest.ptr = dest; - pitchedPtrDest.xsize = size.x() * sizeof (Type); - pitchedPtrDest.ysize = size.y(); - cudaPitchedPtr pitchedPtrSource; - pitchedPtrSource.pitch = pitchSource.x(); pitchedPtrSource.ptr = source; - pitchedPtrSource.xsize = size.x() * sizeof (Type); - pitchedPtrSource.ysize = size.y(); + cuplaPitchedPtr pitchedPtrDest; + pitchedPtrDest.pitch = pitchDest.x(); + pitchedPtrDest.ptr = dest; + pitchedPtrDest.xsize = size.x() * sizeof(Type); + pitchedPtrDest.ysize = size.y(); + cuplaPitchedPtr pitchedPtrSource; + pitchedPtrSource.pitch = pitchSource.x(); + pitchedPtrSource.ptr = source; + pitchedPtrSource.xsize = size.x() * sizeof(Type); + pitchedPtrSource.ysize = size.y(); - cudaMemcpy3DParms params; - params.srcArray = nullptr; - params.srcPos = make_cudaPos(0,0,0); - params.srcPtr = pitchedPtrSource; - params.dstArray = nullptr; - params.dstPos = make_cudaPos(0,0,0); - params.dstPtr = pitchedPtrDest; - params.extent = make_cudaExtent(size.x() * sizeof(Type), size.y(), size.z()); - params.kind = kind[direction]; - CUDA_CHECK(cudaMemcpy3D(¶ms)); - } -}; + cuplaMemcpy3DParms params; + params.srcArray = nullptr; + params.srcPos = make_cuplaPos(0, 0, 0); + params.srcPtr = pitchedPtrSource; + params.dstArray = nullptr; + params.dstPos = make_cuplaPos(0, 0, 0); + params.dstPtr = pitchedPtrDest; + params.extent = make_cuplaExtent(size.x() * sizeof(Type), size.y(), size.z()); + params.kind = kind[direction]; + CUDA_CHECK(cuplaMemcpy3D(¶ms)); + } + }; -} // cudaWrapper -} // pmacc + } // namespace cuplaWrapper +} // namespace pmacc diff --git a/include/pmacc/cuSTL/container/tag.hpp b/include/pmacc/cuSTL/container/tag.hpp index a1f493c831..6577f63093 100644 --- a/include/pmacc/cuSTL/container/tag.hpp +++ b/include/pmacc/cuSTL/container/tag.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -24,16 +24,15 @@ namespace pmacc { -namespace container -{ - -namespace tag -{ -struct HostBuffer; -struct DeviceBuffer; -} + namespace container + { + namespace tag + { + struct HostBuffer; + struct DeviceBuffer; + } // namespace tag -} // container -} // pmacc + } // namespace container +} // namespace pmacc #endif // CONTAINER_TAG_H diff --git a/include/pmacc/cuSTL/container/view/View.hpp b/include/pmacc/cuSTL/container/view/View.hpp index 79bfb7d308..5909e6f4de 100644 --- a/include/pmacc/cuSTL/container/view/View.hpp +++ b/include/pmacc/cuSTL/container/view/View.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,53 +23,53 @@ namespace pmacc { -namespace container -{ - -/** Represents a clipped area of its inherited container. - * - * View are not designed to do hard data copies. - * Views don't take care of reference counters. So if the corresponding - * container dies, all views become invalid. - * Usual way to contruct a view goes with container.view(...); - * \tparam Buffer Corresponding container type - */ -template -struct View : public Buffer -{ - HDINLINE View() {} - - template - HDINLINE View(const View& other) + namespace container { - *this = other; - } - - HDINLINE ~View() - { - /* increment the reference counter because the container's destructor decrements it. - * We want to compensate this. + /** Represents a clipped area of its inherited container. + * + * View are not designed to do hard data copies. + * Views don't take care of reference counters. So if the corresponding + * container dies, all views become invalid. + * Usual way to contruct a view goes with container.view(...); + * \tparam Buffer Corresponding container type */ - (*this->refCount)++; - } + template + struct View : public Buffer + { + HDINLINE View() + { + } - template - HDINLINE View& operator=(const View& other) - { - this->dataPointer = other.dataPointer; - this->_size = other._size; - this->pitch = other.pitch; - this->refCount = other.refCount; + template + HDINLINE View(const View& other) + { + *this = other; + } + + HDINLINE ~View() + { + /* increment the reference counter because the container's destructor decrements it. + * We want to compensate this. + */ + (*this->refCount)++; + } + + template + HDINLINE View& operator=(const View& other) + { + this->dataPointer = other.dataPointer; + this->_size = other._size; + this->pitch = other.pitch; + this->refCount = other.refCount; - return *this; - } + return *this; + } -private: - // forbid view = container - HDINLINE Buffer& - operator=(const Buffer& rhs); -}; + private: + // forbid view = container + HDINLINE Buffer& operator=(const Buffer& rhs); + }; -} // container -} // pmacc + } // namespace container +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/BufferCursor.hpp b/include/pmacc/cuSTL/cursor/BufferCursor.hpp index 65dff84f08..4d06c04586 100644 --- a/include/pmacc/cuSTL/cursor/BufferCursor.hpp +++ b/include/pmacc/cuSTL/cursor/BufferCursor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -31,55 +31,51 @@ namespace pmacc { -namespace cursor -{ - -/** The most common cursor typedef - * - * BufferCursor does access and jumping on a cartesian memory buffer. - * - * \tparam T_Type type of a single datum - * \tparam T_dim dimension of the memory buffer - */ -template< - typename T_Type, - int T_dim -> -struct BufferCursor - : public Cursor< PointerAccessor< T_Type >, BufferNavigator< T_dim >, T_Type * > -{ - /* \param pointer data pointer - * \param pitch pitch of the memory buffer - * pitch is a Size_t vector with one dimension less than dim - * pitch[0] is the distance in bytes to the incremented y-coordinate - * pitch[1] is the distance in bytes to the incremented z-coordiante - */ - HDINLINE - BufferCursor( T_Type * pointer, math::Size_t< T_dim - 1 > pitch ) - : Cursor< PointerAccessor< T_Type >, BufferNavigator< T_dim >, T_Type * > - ( PointerAccessor< T_Type >(), BufferNavigator< T_dim >( pitch ), pointer ) {} + namespace cursor + { + /** The most common cursor typedef + * + * BufferCursor does access and jumping on a cartesian memory buffer. + * + * \tparam T_Type type of a single datum + * \tparam T_dim dimension of the memory buffer + */ + template + struct BufferCursor : public Cursor, BufferNavigator, T_Type*> + { + /* \param pointer data pointer + * \param pitch pitch of the memory buffer + * pitch is a Size_t vector with one dimension less than dim + * pitch[0] is the distance in bytes to the incremented y-coordinate + * pitch[1] is the distance in bytes to the incremented z-coordiante + */ + HDINLINE + BufferCursor(T_Type* pointer, math::Size_t pitch) + : Cursor, BufferNavigator, T_Type*>( + PointerAccessor(), + BufferNavigator(pitch), + pointer) + { + } - HDINLINE - BufferCursor( const Cursor< PointerAccessor< T_Type >, BufferNavigator< T_dim >, T_Type * > & other ) - : Cursor, BufferNavigator< T_dim >, T_Type * >( other ) {} -}; + HDINLINE + BufferCursor(const Cursor, BufferNavigator, T_Type*>& other) + : Cursor, BufferNavigator, T_Type*>(other) + { + } + }; -namespace traits -{ + namespace traits + { + /* type trait to get the BufferCursor's dimension if it has one */ + template + struct dim> + { + static constexpr int value = pmacc::cursor::traits::dim< + Cursor, BufferNavigator, T_Type*>>::value; + }; -/* type trait to get the BufferCursor's dimension if it has one */ -template< - typename T_Type, - int T_dim -> -struct dim< BufferCursor< T_Type, T_dim > > -{ - static constexpr int value = pmacc::cursor::traits::dim< - Cursor< PointerAccessor< T_Type >, BufferNavigator< T_dim >, T_Type * > >::value; -}; + } // namespace traits -} // namespace traits - -} // namespace cursor + } // namespace cursor } // namespace pmacc - diff --git a/include/pmacc/cuSTL/cursor/Cursor.hpp b/include/pmacc/cuSTL/cursor/Cursor.hpp index 377689d590..8d69b755a1 100644 --- a/include/pmacc/cuSTL/cursor/Cursor.hpp +++ b/include/pmacc/cuSTL/cursor/Cursor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -32,145 +32,169 @@ namespace mpl = boost::mpl; namespace pmacc { -namespace cursor -{ - -/** A cursor is used to access a single datum and to jump to another one. - * It is always located at a certain datum. Think of a generalized iterator. - * \tparam _Accessor Policy functor class that is called inside operator*(). - * It typically returns a reference to the current selected datum. - * \tparam _Navigator Policy functor class that is called inside operator()(). - * It jumps to another datum. - * \tparam _Marker Runtime data that is used by the accessor and the navigator. - * This is typically a data pointer. - */ -template -class Cursor : private _Accessor, _Navigator -{ -public: - typedef typename _Accessor::type type; - typedef typename boost::remove_reference::type ValueType; - typedef _Accessor Accessor; - typedef _Navigator Navigator; - typedef _Marker Marker; - typedef Cursor This; - typedef This result_type; -protected: - Marker marker; -public: - HDINLINE - Cursor(const Accessor& accessor, - const Navigator& navigator, - const Marker& marker) - : Accessor(accessor), Navigator(navigator), marker(marker) {} - - /** access - * \return Accessor's return type. - * Typically a reference to the current selected single datum. - */ - HDINLINE - type operator*() - { - return Accessor::operator()(this->marker); - } - - /* This is a const method which is called for a const cursor object. - * A const cursor object does *not* mean that the data it points to - * is neccessarily constant too. This is why here the return type is - * the same as for the non-const method above. - */ - HDINLINE - type operator*() const - { - return Accessor::operator()(this->marker); - } - - /** jumping - * \param jump Specifies a jump relative to the current selected datum. - * This is usually a int vector but may be any type that navigator accepts. - * \return A new cursor, which has jumped according to the jump param. - */ - template - HDINLINE This operator()(const Jump& jump) const - { - Navigator newNavigator(getNavigator()); - Marker newMarker = newNavigator(this->marker, jump); - return This(getAccessor(), newNavigator, newMarker); - } - - /* convenient method which is available if the navigator accepts a Int<1> */ - HDINLINE This operator()(int x) const - { - return (*this)(math::Int<1>(x)); - } - - /* convenient method which is available if the navigator accepts a Int<2> */ - HDINLINE This operator()(int x, int y) const - { - return (*this)(math::Int<2u>(x, y)); - } - - /* convenient method which is available if the navigator accepts a Int<3> */ - HDINLINE This operator()(int x, int y, int z) const + namespace cursor { - return (*this)(math::Int<3>(x, y, z)); - } - - /* convenient method which is available if the navigator implements operator++ */ - HDINLINE void operator++() {Navigator::operator++;} - /* convenient method which is available if the navigator implements operator-- */ - HDINLINE void operator--() {Navigator::operator--;} - - /* jump and access in one call */ - template - HDINLINE - type operator[](const Jump& jump) - { - return *((*this)(jump)); - } - - template - HDINLINE - type operator[](const Jump& jump) const - { - return *((*this)(jump)); - } - - /* This is a dirty workaround to enable and disable safe-cursor checks.*/ - /** \todo: Can be substituted by ordinary functions instead of methods.*/ - HDINLINE void enableChecking() {this->marker.enableChecking();} - HDINLINE void disableChecking() {this->marker.disableChecking();} - - /* getters */ - HDINLINE - const _Accessor& getAccessor() const {return *this;} - HDINLINE - const _Navigator& getNavigator() const {return *this;} - HDINLINE - const Marker& getMarker() const {return this->marker;} -}; - -/* convenient function to construct a cursor by passing its constructor arguments */ -template -HDINLINE Cursor make_Cursor -(const Accessor& accessor, const Navigator& navigator, const Marker& marker) -{ - return Cursor(accessor, navigator, marker); -} - -namespace traits -{ - -/* type trait to get the cursor's dimension if it has one */ -template -struct dim< pmacc::cursor::Cursor<_Accessor, _Navigator, _Marker> > -{ - static constexpr int value = pmacc::cursor::traits::dim::Navigator >::value; -}; - -} // traits - -} // cursor -} // pmacc - - + /** A cursor is used to access a single datum and to jump to another one. + * It is always located at a certain datum. Think of a generalized iterator. + * \tparam _Accessor Policy functor class that is called inside operator*(). + * It typically returns a reference to the current selected datum. + * \tparam _Navigator Policy functor class that is called inside operator()(). + * It jumps to another datum. + * \tparam _Marker Runtime data that is used by the accessor and the navigator. + * This is typically a data pointer. + */ + template + class Cursor + : private _Accessor + , _Navigator + { + public: + typedef typename _Accessor::type type; + typedef typename boost::remove_reference::type ValueType; + typedef _Accessor Accessor; + typedef _Navigator Navigator; + typedef _Marker Marker; + typedef Cursor This; + typedef This result_type; + + protected: + Marker marker; + + public: + HDINLINE + Cursor(const Accessor& accessor, const Navigator& navigator, const Marker& marker) + : Accessor(accessor) + , Navigator(navigator) + , marker(marker) + { + } + + /** access + * \return Accessor's return type. + * Typically a reference to the current selected single datum. + */ + HDINLINE + type operator*() + { + return Accessor::operator()(this->marker); + } + + /* This is a const method which is called for a const cursor object. + * A const cursor object does *not* mean that the data it points to + * is neccessarily constant too. This is why here the return type is + * the same as for the non-const method above. + */ + HDINLINE + type operator*() const + { + return Accessor::operator()(this->marker); + } + + /** jumping + * \param jump Specifies a jump relative to the current selected datum. + * This is usually a int vector but may be any type that navigator accepts. + * \return A new cursor, which has jumped according to the jump param. + */ + template + HDINLINE This operator()(const Jump& jump) const + { + Navigator newNavigator(getNavigator()); + Marker newMarker = newNavigator(this->marker, jump); + return This(getAccessor(), newNavigator, newMarker); + } + + /* convenient method which is available if the navigator accepts a Int<1> */ + HDINLINE This operator()(int x) const + { + return (*this)(math::Int<1>(x)); + } + + /* convenient method which is available if the navigator accepts a Int<2> */ + HDINLINE This operator()(int x, int y) const + { + return (*this)(math::Int<2u>(x, y)); + } + + /* convenient method which is available if the navigator accepts a Int<3> */ + HDINLINE This operator()(int x, int y, int z) const + { + return (*this)(math::Int<3>(x, y, z)); + } + + /* convenient method which is available if the navigator implements operator++ */ + HDINLINE void operator++() + { + Navigator::operator++; + } + /* convenient method which is available if the navigator implements operator-- */ + HDINLINE void operator--() + { + Navigator::operator--; + } + + /* jump and access in one call */ + template + HDINLINE type operator[](const Jump& jump) + { + return *((*this)(jump)); + } + + template + HDINLINE type operator[](const Jump& jump) const + { + return *((*this)(jump)); + } + + /* This is a dirty workaround to enable and disable safe-cursor checks.*/ + /** \todo: Can be substituted by ordinary functions instead of methods.*/ + HDINLINE void enableChecking() + { + this->marker.enableChecking(); + } + HDINLINE void disableChecking() + { + this->marker.disableChecking(); + } + + /* getters */ + HDINLINE + const _Accessor& getAccessor() const + { + return *this; + } + HDINLINE + const _Navigator& getNavigator() const + { + return *this; + } + HDINLINE + const Marker& getMarker() const + { + return this->marker; + } + }; + + /* convenient function to construct a cursor by passing its constructor arguments */ + template + HDINLINE Cursor make_Cursor( + const Accessor& accessor, + const Navigator& navigator, + const Marker& marker) + { + return Cursor(accessor, navigator, marker); + } + + namespace traits + { + /* type trait to get the cursor's dimension if it has one */ + template + struct dim> + { + static constexpr int value + = pmacc::cursor::traits::dim::Navigator>::value; + }; + + } // namespace traits + + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/FunctorCursor.hpp b/include/pmacc/cuSTL/cursor/FunctorCursor.hpp index b4575a91b7..f5d8d2fb76 100644 --- a/include/pmacc/cuSTL/cursor/FunctorCursor.hpp +++ b/include/pmacc/cuSTL/cursor/FunctorCursor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,33 +28,28 @@ namespace pmacc { -namespace cursor -{ - -/** wraps a cursor into a new cursor - * - * On each access of the new cursor the result of the nested cursor access - * is filtered through a user-defined functor. - * - * \param cursor Cursor to be wrapped - * \param functor User functor acting as a filter. - */ -template -HDINLINE -Cursor::type>, - CursorNavigator, TCursor> make_FunctorCursor(const TCursor& cursor, const Functor& functor) -{ - return make_Cursor( - FunctorAccessor< - Functor, - typename TCursor::ValueType - >(functor), - CursorNavigator(), - cursor - ); -} - -} // cursor -} // pmacc + namespace cursor + { + /** wraps a cursor into a new cursor + * + * On each access of the new cursor the result of the nested cursor access + * is filtered through a user-defined functor. + * + * \param cursor Cursor to be wrapped + * \param functor User functor acting as a filter. + */ + template + HDINLINE Cursor< + FunctorAccessor::type>, + CursorNavigator, + TCursor> + make_FunctorCursor(const TCursor& cursor, const Functor& functor) + { + return make_Cursor( + FunctorAccessor(functor), + CursorNavigator(), + cursor); + } + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/MultiIndexCursor.hpp b/include/pmacc/cuSTL/cursor/MultiIndexCursor.hpp index f5fbd2914f..9af083c0b4 100644 --- a/include/pmacc/cuSTL/cursor/MultiIndexCursor.hpp +++ b/include/pmacc/cuSTL/cursor/MultiIndexCursor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,26 +28,20 @@ namespace pmacc { -namespace cursor -{ - -/** construct a cursor where accessing means getting the current position - * in terms of an 2D, 3D, ... index. Usefull to obtain for example the current cell index. - * \tparam dim Dimension of the index (say: int-vector) - * \param idx Initial index value - * \return cursor with the behavior mentioned above - */ -template -HDINLINE -cursor::Cursor >, MultiIndexNavigator, - math::Int > - make_MultiIndexCursor(const math::Int& idx = math::Int::create(0)) -{ - return make_Cursor(cursor::MarkerAccessor >(), - MultiIndexNavigator(), - idx); -} - -} // cursor -} // pmacc + namespace cursor + { + /** construct a cursor where accessing means getting the current position + * in terms of an 2D, 3D, ... index. Usefull to obtain for example the current cell index. + * \tparam dim Dimension of the index (say: int-vector) + * \param idx Initial index value + * \return cursor with the behavior mentioned above + */ + template + HDINLINE cursor::Cursor>, MultiIndexNavigator, math::Int> + make_MultiIndexCursor(const math::Int& idx = math::Int::create(0)) + { + return make_Cursor(cursor::MarkerAccessor>(), MultiIndexNavigator(), idx); + } + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/NestedCursor.hpp b/include/pmacc/cuSTL/cursor/NestedCursor.hpp index 9c6e1e69de..971f29419b 100644 --- a/include/pmacc/cuSTL/cursor/NestedCursor.hpp +++ b/include/pmacc/cuSTL/cursor/NestedCursor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,21 +27,21 @@ namespace pmacc { -namespace cursor -{ - -/** wraps a cursor into a new cursor in a way that accessing on the new cursor - * means getting the nested cursor and jumping means jumping on the nested cursor. - * \param cursor Cursor to be wrapped - * \return A new cursor which wraps the given cursor - */ -template -HDINLINE -Cursor, CursorNavigator, TCursor> make_NestedCursor(const TCursor& cursor) -{ - return Cursor, CursorNavigator, TCursor>(MarkerAccessor(), CursorNavigator(), cursor); -} - -} // cursor -} // pmacc + namespace cursor + { + /** wraps a cursor into a new cursor in a way that accessing on the new cursor + * means getting the nested cursor and jumping means jumping on the nested cursor. + * \param cursor Cursor to be wrapped + * \return A new cursor which wraps the given cursor + */ + template + HDINLINE Cursor, CursorNavigator, TCursor> make_NestedCursor(const TCursor& cursor) + { + return Cursor, CursorNavigator, TCursor>( + MarkerAccessor(), + CursorNavigator(), + cursor); + } + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/SafeCursor.hpp b/include/pmacc/cuSTL/cursor/SafeCursor.hpp index f510e20485..a52c8c28ee 100644 --- a/include/pmacc/cuSTL/cursor/SafeCursor.hpp +++ b/include/pmacc/cuSTL/cursor/SafeCursor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,138 +27,153 @@ namespace pmacc { -namespace cursor -{ - -/** A SafeCursor is like a cursor, except that it checks its validity before each access. - */ -template -class SafeCursor : public Cursor -{ -public: - static constexpr int dim = pmacc::cursor::traits::dim::value; -private: - /* \todo: Use a zone instead of lowerExtent and UpperExtent */ - const math::Int lowerExtent; - const math::Int upperExtent; - math::Int offset; - bool enabled; -public: - /** - * \param cursor Base cursor - * \param lowerExtent Top left corner of valid range, inside the range. - * \param upperExtent Bottom right corner of valid range, inside the range. - */ - HDINLINE SafeCursor(const Cursor& cursor, - const math::Int& lowerExtent, - const math::Int& upperExtent) - : Cursor(cursor), - lowerExtent(lowerExtent), - upperExtent(upperExtent), - offset(math::Int(0)), - enabled(true) - {} - - HDINLINE void enableChecking() {this->enabled = true;} - HDINLINE void disableChecking() {this->enabled = false;} - - HDINLINE - typename Cursor::type operator*() - { - checkValidity(); - return Cursor::operator*(); - } - - HDINLINE - typename boost::add_const::type operator*() const - { - checkValidity(); - return Cursor::operator*(); - } - - template - HDINLINE - SafeCursor operator()(const Jump& jump) const + namespace cursor { - SafeCursor result(Cursor::operator()(jump), - this->lowerExtent, - this->upperExtent); - result.offset = this->offset + jump; - result.enabled = this->enabled; - return result; - } - - HDINLINE - SafeCursor operator()(int x) const - { - return (*this)(math::Int<1>(x)); - } - - HDINLINE - SafeCursor operator()(int x, int y) const - { - return (*this)(math::Int<2>(x, y)); - } - - HDINLINE - SafeCursor operator()(int x, int y, int z) const - { - return (*this)(math::Int<3>(x, y, z)); - } - - HDINLINE void operator++() {this->jump[0]++; Cursor::operator++;} - HDINLINE void operator--() {this->jump[0]--; Cursor::operator--;} - - template - HDINLINE - typename Cursor::type operator[](const Jump& jump) - { - return *((*this)(jump)); - } - - template - HDINLINE - typename Cursor::type operator[](const Jump& jump) const - { - return *((*this)(jump)); - } -private: - HDINLINE void checkValidity() const - { - if(!this->enabled) return; - #pragma unroll - for(int i = 0; i < dim; i++) + /** A SafeCursor is like a cursor, except that it checks its validity before each access. + */ + template + class SafeCursor : public Cursor { - if(this->offset[i] < this->lowerExtent[i] || - this->offset[i] > this->upperExtent[i]) - printf("error[cursor]: index %d out of range: %d is not within [%d, %d]\n", - i, this->offset[i], this->lowerExtent[i], this->upperExtent[i]); + public: + static constexpr int dim = pmacc::cursor::traits::dim::value; + + private: + /* \todo: Use a zone instead of lowerExtent and UpperExtent */ + const math::Int lowerExtent; + const math::Int upperExtent; + math::Int offset; + bool enabled; + + public: + /** + * \param cursor Base cursor + * \param lowerExtent Top left corner of valid range, inside the range. + * \param upperExtent Bottom right corner of valid range, inside the range. + */ + HDINLINE SafeCursor( + const Cursor& cursor, + const math::Int& lowerExtent, + const math::Int& upperExtent) + : Cursor(cursor) + , lowerExtent(lowerExtent) + , upperExtent(upperExtent) + , offset(math::Int(0)) + , enabled(true) + { + } + + HDINLINE void enableChecking() + { + this->enabled = true; + } + HDINLINE void disableChecking() + { + this->enabled = false; + } + + HDINLINE + typename Cursor::type operator*() + { + checkValidity(); + return Cursor::operator*(); + } + + HDINLINE + typename boost::add_const::type operator*() const + { + checkValidity(); + return Cursor::operator*(); + } + + template + HDINLINE SafeCursor operator()(const Jump& jump) const + { + SafeCursor result(Cursor::operator()(jump), this->lowerExtent, this->upperExtent); + result.offset = this->offset + jump; + result.enabled = this->enabled; + return result; + } + + HDINLINE + SafeCursor operator()(int x) const + { + return (*this)(math::Int<1>(x)); + } + + HDINLINE + SafeCursor operator()(int x, int y) const + { + return (*this)(math::Int<2>(x, y)); + } + + HDINLINE + SafeCursor operator()(int x, int y, int z) const + { + return (*this)(math::Int<3>(x, y, z)); + } + + HDINLINE void operator++() + { + this->jump[0]++; + Cursor::operator++; + } + HDINLINE void operator--() + { + this->jump[0]--; + Cursor::operator--; + } + + template + HDINLINE typename Cursor::type operator[](const Jump& jump) + { + return *((*this)(jump)); + } + + template + HDINLINE typename Cursor::type operator[](const Jump& jump) const + { + return *((*this)(jump)); + } + + private: + HDINLINE void checkValidity() const + { + if(!this->enabled) + return; +#pragma unroll + for(int i = 0; i < dim; i++) + { + if(this->offset[i] < this->lowerExtent[i] || this->offset[i] > this->upperExtent[i]) + printf( + "error[cursor]: index %d out of range: %d is not within [%d, %d]\n", + i, + this->offset[i], + this->lowerExtent[i], + this->upperExtent[i]); + } + } + }; + + namespace traits + { + /* type trait to get the safe-cursor's dimension if it has one */ + template + struct dim> + { + static constexpr int value = SafeCursor::dim; + }; + + } // namespace traits + + /* convenient function to construct a safe-cursor by passing its constructor arguments */ + template + HDINLINE SafeCursor make_SafeCursor( + const Cursor& cursor, + const math::Int>::value>& lowerExtent, + const math::Int>::value>& upperExtent) + { + return SafeCursor(cursor, lowerExtent, upperExtent); } - } -}; - -namespace traits -{ - -/* type trait to get the safe-cursor's dimension if it has one */ -template -struct dim > -{ - static constexpr int value = SafeCursor::dim; -}; - -} // traits - -/* convenient function to construct a safe-cursor by passing its constructor arguments */ -template -HDINLINE SafeCursor make_SafeCursor( - const Cursor& cursor, - const math::Int >::value>& lowerExtent, - const math::Int >::value>& upperExtent) -{ - return SafeCursor(cursor, lowerExtent, upperExtent); -} - -} // cursor -} // pmacc + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/accessor/CursorAccessor.hpp b/include/pmacc/cuSTL/cursor/accessor/CursorAccessor.hpp index 5075b464ed..b4a1d95102 100644 --- a/include/pmacc/cuSTL/cursor/accessor/CursorAccessor.hpp +++ b/include/pmacc/cuSTL/cursor/accessor/CursorAccessor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -25,22 +25,20 @@ namespace pmacc { -namespace cursor -{ - -template -struct CursorAccessor -{ - typedef typename TCursor::type type; - - HDINLINE type operator()(TCursor& cursor) + namespace cursor { - return *cursor; - } + template + struct CursorAccessor + { + typedef typename TCursor::type type; - ///\todo: implement const method here with a const TCursor& argument and 'type' as return type. -}; + HDINLINE type operator()(TCursor& cursor) + { + return *cursor; + } -} // cursor -} // pmacc + ///\todo: implement const method here with a const TCursor& argument and 'type' as return type. + }; + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/accessor/FunctorAccessor.hpp b/include/pmacc/cuSTL/cursor/accessor/FunctorAccessor.hpp index 5a464d95d1..8925b27346 100644 --- a/include/pmacc/cuSTL/cursor/accessor/FunctorAccessor.hpp +++ b/include/pmacc/cuSTL/cursor/accessor/FunctorAccessor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -26,27 +26,27 @@ namespace pmacc { -namespace cursor -{ - -template -struct FunctorAccessor -{ - _Functor functor; - - typedef typename ::pmacc::result_of::Functor<_Functor, ArgType>::type type; + namespace cursor + { + template + struct FunctorAccessor + { + _Functor functor; - HDINLINE FunctorAccessor(const _Functor& functor) : functor(functor) {} + typedef typename ::pmacc::result_of::Functor<_Functor, ArgType>::type type; - template - HDINLINE type operator()(TCursor& cursor) - { - return this->functor(*cursor); - } + HDINLINE FunctorAccessor(const _Functor& functor) : functor(functor) + { + } - ///\todo: implement const method here with a const TCursor& argument and 'type' as return type. -}; + template + HDINLINE type operator()(TCursor& cursor) + { + return this->functor(*cursor); + } -} // cursor -} // pmacc + ///\todo: implement const method here with a const TCursor& argument and 'type' as return type. + }; + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/accessor/LinearInterpAccessor.hpp b/include/pmacc/cuSTL/cursor/accessor/LinearInterpAccessor.hpp index 20953c2826..13868e9543 100644 --- a/include/pmacc/cuSTL/cursor/accessor/LinearInterpAccessor.hpp +++ b/include/pmacc/cuSTL/cursor/accessor/LinearInterpAccessor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PMacc. * @@ -29,144 +29,152 @@ namespace pmacc { -namespace cursor -{ + namespace cursor + { + /** Performs a 1D, 2D or 3D, linear interpolation on access. + * + * \tparam T_Cursor input data + */ + template::value> + struct LinearInterpAccessor; + + template + struct LinearInterpAccessor + { + typedef T_Cursor Cursor; + typedef typename Cursor::ValueType type; -/** Performs a 1D, 2D or 3D, linear interpolation on access. - * - * \tparam T_Cursor input data - */ -template::value> -struct LinearInterpAccessor; + Cursor cursor; -template -struct LinearInterpAccessor -{ - typedef T_Cursor Cursor; - typedef typename Cursor::ValueType type; + /** + * @param cursor input data + */ + HDINLINE LinearInterpAccessor(const Cursor& cursor) : cursor(cursor) + { + } - Cursor cursor; + template + HDINLINE type operator()(const T_Position pos) const + { + BOOST_STATIC_ASSERT(T_Position::dim == DIM1); - /** - * @param cursor input data - */ - HDINLINE LinearInterpAccessor(const Cursor& cursor) : cursor(cursor) {} + T_Position intPart; + T_Position fracPart; - template - HDINLINE type operator()(const T_Position pos) const - { - BOOST_STATIC_ASSERT(T_Position::dim == DIM1); + fracPart[0] = pmacc::math::modf(pos[0], &(intPart[0])); - T_Position intPart; - T_Position fracPart; + const math::Int idx1D(static_cast(intPart[0])); - fracPart[0] = pmacc::algorithms::math::modf(pos[0], &(intPart[0])); + type result = pmacc::traits::GetInitializedInstance()(0.0); + typedef typename T_Position::type PositionComp; + for(int i = 0; i < 2; i++) + { + const PositionComp weighting1D = (i == 0 ? (PositionComp(1.0) - fracPart[0]) : fracPart[0]); + result += static_cast(weighting1D * this->cursor[idx1D + math::Int(i)]); + } - const math::Int idx1D(static_cast(intPart[0])); + return result; + } + }; - type result = pmacc::traits::GetInitializedInstance()(0.0); - typedef typename T_Position::type PositionComp; - for(int i = 0; i < 2; i++) + template + struct LinearInterpAccessor { - const PositionComp weighting1D = (i == 0 ? (PositionComp(1.0) - fracPart[0]) : fracPart[0]); - result += static_cast(weighting1D * this->cursor[idx1D + math::Int(i)]); - } + typedef T_Cursor Cursor; + typedef typename T_Cursor::ValueType type; - return result; - } -}; + Cursor cursor; -template -struct LinearInterpAccessor -{ - typedef T_Cursor Cursor; - typedef typename T_Cursor::ValueType type; - - Cursor cursor; + /** + * @param cursor input data + */ + HDINLINE LinearInterpAccessor(const Cursor& cursor) : cursor(cursor) + { + } - /** - * @param cursor input data - */ - HDINLINE LinearInterpAccessor(const Cursor& cursor) : cursor(cursor) {} + template + HDINLINE type operator()(const T_Position pos) const + { + BOOST_STATIC_ASSERT(T_Position::dim == DIM2); - template - HDINLINE type operator()(const T_Position pos) const - { - BOOST_STATIC_ASSERT(T_Position::dim == DIM2); + T_Position intPart; + T_Position fracPart; - T_Position intPart; - T_Position fracPart; + fracPart[0] = pmacc::math::modf(pos[0], &(intPart[0])); + fracPart[1] = pmacc::math::modf(pos[1], &(intPart[1])); - fracPart[0] = pmacc::algorithms::math::modf(pos[0], &(intPart[0])); - fracPart[1] = pmacc::algorithms::math::modf(pos[1], &(intPart[1])); + const math::Int idx2D(static_cast(intPart[0]), static_cast(intPart[1])); - const math::Int idx2D(static_cast(intPart[0]), - static_cast(intPart[1])); + type result = pmacc::traits::GetInitializedInstance()(0.0); + typedef typename T_Position::type PositionComp; + for(int i = 0; i < 2; i++) + { + const PositionComp weighting1D = (i == 0 ? (PositionComp(1.0) - fracPart[0]) : fracPart[0]); + for(int j = 0; j < 2; j++) + { + const PositionComp weighting2D + = weighting1D * (j == 0 ? (PositionComp(1.0) - fracPart[1]) : fracPart[1]); + result += static_cast(weighting2D * this->cursor[idx2D + math::Int(i, j)]); + } + } - type result = pmacc::traits::GetInitializedInstance()(0.0); - typedef typename T_Position::type PositionComp; - for(int i = 0; i < 2; i++) - { - const PositionComp weighting1D = (i == 0 ? (PositionComp(1.0) - fracPart[0]) : fracPart[0]); - for(int j = 0; j < 2; j++) - { - const PositionComp weighting2D = weighting1D * (j == 0 ? (PositionComp(1.0) - fracPart[1]) : fracPart[1]); - result += static_cast(weighting2D * this->cursor[idx2D + math::Int(i, j)]); + return result; } - } - - return result; - } -}; + }; -template -struct LinearInterpAccessor -{ - typedef T_Cursor Cursor; - typedef typename T_Cursor::ValueType type; + template + struct LinearInterpAccessor + { + typedef T_Cursor Cursor; + typedef typename T_Cursor::ValueType type; - Cursor cursor; + Cursor cursor; - /** - * @param cursor input data - */ - HDINLINE LinearInterpAccessor(const Cursor& cursor) : cursor(cursor) {} + /** + * @param cursor input data + */ + HDINLINE LinearInterpAccessor(const Cursor& cursor) : cursor(cursor) + { + } - template - HDINLINE type operator()(const T_Position pos) const - { - BOOST_STATIC_ASSERT(T_Position::dim == DIM3); + template + HDINLINE type operator()(const T_Position pos) const + { + BOOST_STATIC_ASSERT(T_Position::dim == DIM3); - T_Position intPart; - T_Position fracPart; + T_Position intPart; + T_Position fracPart; - fracPart[0] = pmacc::algorithms::math::modf(pos[0], &(intPart[0])); - fracPart[1] = pmacc::algorithms::math::modf(pos[1], &(intPart[1])); - fracPart[2] = pmacc::algorithms::math::modf(pos[2], &(intPart[2])); + fracPart[0] = pmacc::math::modf(pos[0], &(intPart[0])); + fracPart[1] = pmacc::math::modf(pos[1], &(intPart[1])); + fracPart[2] = pmacc::math::modf(pos[2], &(intPart[2])); - const math::Int idx3D(static_cast(intPart[0]), - static_cast(intPart[1]), - static_cast(intPart[2])); + const math::Int idx3D( + static_cast(intPart[0]), + static_cast(intPart[1]), + static_cast(intPart[2])); - type result = pmacc::traits::GetInitializedInstance()(0.0); - typedef typename T_Position::type PositionComp; - for(int i = 0; i < 2; i++) - { - const PositionComp weighting1D = (i == 0 ? (PositionComp(1.0) - fracPart[0]) : fracPart[0]); - for(int j = 0; j < 2; j++) - { - const PositionComp weighting2D = weighting1D * (j == 0 ? (PositionComp(1.0) - fracPart[1]) : fracPart[1]); - for(int k = 0; k < 2; k++) + type result = pmacc::traits::GetInitializedInstance()(0.0); + typedef typename T_Position::type PositionComp; + for(int i = 0; i < 2; i++) { - const PositionComp weighting3D = weighting2D * (k == 0 ? (PositionComp(1.0) - fracPart[2]) : fracPart[2]); - result += static_cast(weighting3D * this->cursor[idx3D + math::Int(i, j, k)]); + const PositionComp weighting1D = (i == 0 ? (PositionComp(1.0) - fracPart[0]) : fracPart[0]); + for(int j = 0; j < 2; j++) + { + const PositionComp weighting2D + = weighting1D * (j == 0 ? (PositionComp(1.0) - fracPart[1]) : fracPart[1]); + for(int k = 0; k < 2; k++) + { + const PositionComp weighting3D + = weighting2D * (k == 0 ? (PositionComp(1.0) - fracPart[2]) : fracPart[2]); + result += static_cast(weighting3D * this->cursor[idx3D + math::Int(i, j, k)]); + } + } } - } - } - return result; - } -}; + return result; + } + }; -} // namespace cursor + } // namespace cursor } // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/accessor/MarkerAccessor.hpp b/include/pmacc/cuSTL/cursor/accessor/MarkerAccessor.hpp index f70b95fe1c..06d497f80e 100644 --- a/include/pmacc/cuSTL/cursor/accessor/MarkerAccessor.hpp +++ b/include/pmacc/cuSTL/cursor/accessor/MarkerAccessor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,27 +23,25 @@ namespace pmacc { -namespace cursor -{ - -template -struct MarkerAccessor -{ - typedef const Marker type; - /** returns the cursor's marker. - * - * Here a copy of marker is returned because the cursor object - * could be a temporary object. Therefore any reference or const-reference - * of marker is dangerous. If you want to have a reference to marker use e.g. - * FunctorAccessor or Cursor::getMarker(). - */ - HDINLINE - type operator()(const Marker& marker) const + namespace cursor { - return marker; - } -}; - -} // cursor -} // pmacc + template + struct MarkerAccessor + { + typedef const Marker type; + /** returns the cursor's marker. + * + * Here a copy of marker is returned because the cursor object + * could be a temporary object. Therefore any reference or const-reference + * of marker is dangerous. If you want to have a reference to marker use e.g. + * FunctorAccessor or Cursor::getMarker(). + */ + HDINLINE + type operator()(const Marker& marker) const + { + return marker; + } + }; + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/accessor/PointerAccessor.hpp b/include/pmacc/cuSTL/cursor/accessor/PointerAccessor.hpp index aaa57cd0cb..f0b59b2cc3 100644 --- a/include/pmacc/cuSTL/cursor/accessor/PointerAccessor.hpp +++ b/include/pmacc/cuSTL/cursor/accessor/PointerAccessor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,28 +23,25 @@ namespace pmacc { -namespace cursor -{ - -template -struct PointerAccessor -{ - typedef Type& type; - - /** Returns the dereferenced pointer of type 'Type' - * - * Here a reference is returned because one expects a reference - * if an ordinary c++ pointer is dereferenced too. - * There is no danger if the cursor object is temporary. - */ - template - HDINLINE - type operator()(Data& data) const + namespace cursor { - return *((Type*)data); - } -}; + template + struct PointerAccessor + { + typedef Type& type; -} // cursor -} // pmacc + /** Returns the dereferenced pointer of type 'Type' + * + * Here a reference is returned because one expects a reference + * if an ordinary c++ pointer is dereferenced too. + * There is no danger if the cursor object is temporary. + */ + template + HDINLINE type operator()(Data& data) const + { + return *((Type*) data); + } + }; + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/accessor/TwistAxesAccessor.hpp b/include/pmacc/cuSTL/cursor/accessor/TwistAxesAccessor.hpp index b28ee6f28c..674b2d48a0 100644 --- a/include/pmacc/cuSTL/cursor/accessor/TwistAxesAccessor.hpp +++ b/include/pmacc/cuSTL/cursor/accessor/TwistAxesAccessor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -26,27 +26,25 @@ namespace pmacc { -namespace cursor -{ - -template -struct TwistAxesAccessor -{ - typedef typename math::result_of::TwistComponents< - Axes, typename TCursor::ValueType>::type type; - - /** Returns a reference to the result of '*cursor' (with twisted axes). - * - * Be aware that the underlying cursor must not be a temporary object if '*cursor' - * refers to something inside the cursor. - */ - HDINLINE type operator()(TCursor& cursor) + namespace cursor { - return math::twistComponents(*cursor); - } + template + struct TwistAxesAccessor + { + typedef typename math::result_of::TwistComponents::type type; + + /** Returns a reference to the result of '*cursor' (with twisted axes). + * + * Be aware that the underlying cursor must not be a temporary object if '*cursor' + * refers to something inside the cursor. + */ + HDINLINE type operator()(TCursor& cursor) + { + return math::twistComponents(*cursor); + } - ///\todo: implement const method here with a const TCursor& argument and 'type' as return type. -}; + ///\todo: implement const method here with a const TCursor& argument and 'type' as return type. + }; -} // cursor -} // pmacc + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/compile-time/BufferCursor.hpp b/include/pmacc/cuSTL/cursor/compile-time/BufferCursor.hpp index 8f02f29913..4983b56b22 100644 --- a/include/pmacc/cuSTL/cursor/compile-time/BufferCursor.hpp +++ b/include/pmacc/cuSTL/cursor/compile-time/BufferCursor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,39 +28,40 @@ namespace pmacc { -namespace cursor -{ -namespace CT -{ - -/** Compile-time version of cursor::BufferCursor where pitch is a compile-time vector - */ -template -struct BufferCursor : public Cursor, - CT::BufferNavigator, Type*> -{ - HDINLINE BufferCursor(Type* pointer) - : Cursor, CT::BufferNavigator, Type*> - (PointerAccessor(), CT::BufferNavigator(), pointer) {} + namespace cursor + { + namespace CT + { + /** Compile-time version of cursor::BufferCursor where pitch is a compile-time vector + */ + template + struct BufferCursor : public Cursor, CT::BufferNavigator, Type*> + { + HDINLINE BufferCursor(Type* pointer) + : Cursor, CT::BufferNavigator, Type*>( + PointerAccessor(), + CT::BufferNavigator(), + pointer) + { + } - HDINLINE BufferCursor(const Cursor, - CT::BufferNavigator, Type*>& cur) - : Cursor, CT::BufferNavigator, Type*>(cur) {} -}; + HDINLINE BufferCursor(const Cursor, CT::BufferNavigator, Type*>& cur) + : Cursor, CT::BufferNavigator, Type*>(cur) + { + } + }; -} // CT - -namespace traits -{ - -template -struct dim > -{ - const static int value = Pitch::dim + 1; -}; + } // namespace CT -} // traits + namespace traits + { + template + struct dim> + { + const static int value = Pitch::dim + 1; + }; -} // cursor -} // pmacc + } // namespace traits + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/compile-time/SafeCursor.hpp b/include/pmacc/cuSTL/cursor/compile-time/SafeCursor.hpp index a01145f72d..2a88860c0d 100644 --- a/include/pmacc/cuSTL/cursor/compile-time/SafeCursor.hpp +++ b/include/pmacc/cuSTL/cursor/compile-time/SafeCursor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -26,105 +26,115 @@ namespace pmacc { -namespace cursor -{ -namespace CT -{ - -/** Compile-time version of cursor::SafeCursor where LowerExtent and UpperExtent are - * compile-time vectors. - */ -template -class SafeCursor : public Cursor -{ -private: - typedef SafeCursor This; - static constexpr int dim = pmacc::cursor::traits::dim::value; - math::Int offset; -public: - HDINLINE SafeCursor(const Cursor& cursor) - : Cursor(cursor), offset(math::Int(0)) - {} - - HDINLINE - typename Cursor::type operator*() - { - checkValidity(); - return Cursor::operator*(); - } - - HDINLINE - typename boost::add_const::type operator*() const - { - checkValidity(); - return Cursor::operator*(); - } - - template - HDINLINE - This operator()(const Jump& jump) const - { - This result(Cursor::operator()(jump)); - result.offset = this->offset + jump; - return result; - } - - HDINLINE - This operator()(int x) const - { - return (*this)(math::Int<1>(x)); - } - - HDINLINE - This operator()(int x, int y) const - { - return (*this)(math::Int<2>(x, y)); - } - - HDINLINE - This operator()(int x, int y, int z) const - { - return (*this)(math::Int<3>(x, y, z)); - } - - HDINLINE void operator++() {this->jump[0]++; Cursor::operator++;} - HDINLINE void operator--() {this->jump[0]--; Cursor::operator--;} - - template - HDINLINE - typename Cursor::type operator[](const Jump& jump) + namespace cursor { - return *((*this)(jump)); - } - - template - HDINLINE - typename Cursor::type operator[](const Jump& jump) const - { - return *((*this)(jump)); - } -private: - HDINLINE void checkValidity() const - { - #pragma unroll - for(int i = 0; i < dim; i++) + namespace CT { - if(this->offset[i] < LowerExtent().toRT()[i] || - this->offset[i] > UpperExtent().toRT()[i]) - printf("error[cursor]: index %d out of range: %d is not within [%d, %d]\n", - i, this->offset[i], LowerExtent().toRT()[i], UpperExtent().toRT()[i]); - } - } -}; - -template -HDINLINE SafeCursor -make_SafeCursor(const Cursor& cursor, LowerExtent, UpperExtent) -{ - return SafeCursor(cursor); -} - -} // CT -} // cursor -} // pmacc - + /** Compile-time version of cursor::SafeCursor where LowerExtent and UpperExtent are + * compile-time vectors. + */ + template + class SafeCursor : public Cursor + { + private: + typedef SafeCursor This; + static constexpr int dim = pmacc::cursor::traits::dim::value; + math::Int offset; + + public: + HDINLINE SafeCursor(const Cursor& cursor) : Cursor(cursor), offset(math::Int(0)) + { + } + + HDINLINE + typename Cursor::type operator*() + { + checkValidity(); + return Cursor::operator*(); + } + + HDINLINE + typename boost::add_const::type operator*() const + { + checkValidity(); + return Cursor::operator*(); + } + + template + HDINLINE This operator()(const Jump& jump) const + { + This result(Cursor::operator()(jump)); + result.offset = this->offset + jump; + return result; + } + + HDINLINE + This operator()(int x) const + { + return (*this)(math::Int<1>(x)); + } + + HDINLINE + This operator()(int x, int y) const + { + return (*this)(math::Int<2>(x, y)); + } + + HDINLINE + This operator()(int x, int y, int z) const + { + return (*this)(math::Int<3>(x, y, z)); + } + + HDINLINE void operator++() + { + this->jump[0]++; + Cursor::operator++; + } + HDINLINE void operator--() + { + this->jump[0]--; + Cursor::operator--; + } + + template + HDINLINE typename Cursor::type operator[](const Jump& jump) + { + return *((*this)(jump)); + } + + template + HDINLINE typename Cursor::type operator[](const Jump& jump) const + { + return *((*this)(jump)); + } + + private: + HDINLINE void checkValidity() const + { +#pragma unroll + for(int i = 0; i < dim; i++) + { + if(this->offset[i] < LowerExtent().toRT()[i] || this->offset[i] > UpperExtent().toRT()[i]) + printf( + "error[cursor]: index %d out of range: %d is not within [%d, %d]\n", + i, + this->offset[i], + LowerExtent().toRT()[i], + UpperExtent().toRT()[i]); + } + } + }; + + template + HDINLINE SafeCursor make_SafeCursor( + const Cursor& cursor, + LowerExtent, + UpperExtent) + { + return SafeCursor(cursor); + } + + } // namespace CT + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/navigator/BufferNavigator.hpp b/include/pmacc/cuSTL/cursor/navigator/BufferNavigator.hpp index 3446eeea56..ba4cb74de8 100644 --- a/include/pmacc/cuSTL/cursor/navigator/BufferNavigator.hpp +++ b/include/pmacc/cuSTL/cursor/navigator/BufferNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -32,68 +32,72 @@ namespace pmacc { -namespace cursor -{ - -template -class BufferNavigator -{ -public: - typedef tag::BufferNavigator tag; - static constexpr int dim = T_dim; -private: - math::Size_t pitch; -public: - HDINLINE - BufferNavigator(math::Size_t pitch) : pitch(pitch) {} - - template - HDINLINE Data - operator()(const Data& data, const math::Int& jump) const - { - char* result = (char*)data; - result += jump.x() * sizeof(typename boost::remove_pointer::type); - for(int i = 1; i < dim; i++) - result += jump[i] * this->pitch[i-1]; - return (Data)result; - } - - HDINLINE - const math::Size_t& getPitch() const {return pitch;} -}; - -template<> -class BufferNavigator<1> -{ -public: - typedef tag::BufferNavigator tag; - static constexpr int dim = 1; - -public: - HDINLINE - BufferNavigator(math::Size_t) {} - - template - HDINLINE Data - operator()(const Data& data, const math::Int& jump) const + namespace cursor { - char* result = (char*)data; - result += jump.x() * sizeof(typename boost::remove_pointer::type); - return (Data)result; - } -}; - -namespace traits -{ - -template -struct dim > -{ - static constexpr int value = T_dim; -}; - -} // traits - -} //cursor -} // pmacc - + template + class BufferNavigator + { + public: + typedef tag::BufferNavigator tag; + static constexpr int dim = T_dim; + + private: + math::Size_t pitch; + + public: + HDINLINE + BufferNavigator(math::Size_t pitch) : pitch(pitch) + { + } + + template + HDINLINE Data operator()(const Data& data, const math::Int& jump) const + { + char* result = (char*) data; + result += jump.x() * sizeof(typename boost::remove_pointer::type); + for(int i = 1; i < dim; i++) + result += jump[i] * this->pitch[i - 1]; + return (Data) result; + } + + HDINLINE + const math::Size_t& getPitch() const + { + return pitch; + } + }; + + template<> + class BufferNavigator<1> + { + public: + typedef tag::BufferNavigator tag; + static constexpr int dim = 1; + + public: + HDINLINE + BufferNavigator(math::Size_t) + { + } + + template + HDINLINE Data operator()(const Data& data, const math::Int& jump) const + { + char* result = (char*) data; + result += jump.x() * sizeof(typename boost::remove_pointer::type); + return (Data) result; + } + }; + + namespace traits + { + template + struct dim> + { + static constexpr int value = T_dim; + }; + + } // namespace traits + + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/navigator/CartNavigator.hpp b/include/pmacc/cuSTL/cursor/navigator/CartNavigator.hpp index 41f93ea8ce..560efdf124 100644 --- a/include/pmacc/cuSTL/cursor/navigator/CartNavigator.hpp +++ b/include/pmacc/cuSTL/cursor/navigator/CartNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -29,47 +29,50 @@ namespace pmacc { -namespace cursor -{ - -template -class CartNavigator -{ -public: - typedef tag::CartNavigator tag; - static constexpr int dim = T_dim; -private: - math::Int factor; -public: - HDINLINE - CartNavigator(math::Int factor) : factor(factor) {} - - template - HDINLINE - Data operator()(const Data& data, const math::Int& jump) const + namespace cursor { - char* result = (char*)data; - result += algorithms::math::dot( - static_cast::BaseType>(jump), - static_cast::BaseType>(this->factor)); - return (Data)result; - } + template + class CartNavigator + { + public: + typedef tag::CartNavigator tag; + static constexpr int dim = T_dim; - HDINLINE - const math::Int& getFactor() const {return factor;} -}; + private: + math::Int factor; -namespace traits -{ + public: + HDINLINE + CartNavigator(math::Int factor) : factor(factor) + { + } -template -struct dim > -{ - static constexpr int value = T_dim; -}; + template + HDINLINE Data operator()(const Data& data, const math::Int& jump) const + { + char* result = (char*) data; + result += pmacc::math::dot( + static_cast::BaseType>(jump), + static_cast::BaseType>(this->factor)); + return (Data) result; + } + + HDINLINE + const math::Int& getFactor() const + { + return factor; + } + }; -} // traits + namespace traits + { + template + struct dim> + { + static constexpr int value = T_dim; + }; -} // cursor -} // pmacc + } // namespace traits + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/navigator/CursorNavigator.hpp b/include/pmacc/cuSTL/cursor/navigator/CursorNavigator.hpp index ecae903673..6371033a07 100644 --- a/include/pmacc/cuSTL/cursor/navigator/CursorNavigator.hpp +++ b/include/pmacc/cuSTL/cursor/navigator/CursorNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -25,19 +25,16 @@ namespace pmacc { -namespace cursor -{ - -struct CursorNavigator -{ - template - HDINLINE - Cursor operator()(const Cursor& cursor, const Jump& jump) const + namespace cursor { - return cursor(jump); - } -}; - -} // cursor -} // pmacc + struct CursorNavigator + { + template + HDINLINE Cursor operator()(const Cursor& cursor, const Jump& jump) const + { + return cursor(jump); + } + }; + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/navigator/EmptyNavigator.hpp b/include/pmacc/cuSTL/cursor/navigator/EmptyNavigator.hpp index 85127b480b..9006251cdc 100644 --- a/include/pmacc/cuSTL/cursor/navigator/EmptyNavigator.hpp +++ b/include/pmacc/cuSTL/cursor/navigator/EmptyNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,19 +23,16 @@ namespace pmacc { -namespace cursor -{ - -struct EmptyNavigator -{ - template - HDINLINE - Marker operator()(const Marker& marker, Jump) const + namespace cursor { - return marker; - } -}; - -} // cursor -} // pmacc + struct EmptyNavigator + { + template + HDINLINE Marker operator()(const Marker& marker, Jump) const + { + return marker; + } + }; + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/navigator/MapTo1DNavigator.hpp b/include/pmacc/cuSTL/cursor/navigator/MapTo1DNavigator.hpp index 7b2b676449..8116f39003 100644 --- a/include/pmacc/cuSTL/cursor/navigator/MapTo1DNavigator.hpp +++ b/include/pmacc/cuSTL/cursor/navigator/MapTo1DNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PMacc. * @@ -23,55 +23,55 @@ namespace pmacc { -namespace cursor -{ - -/** - * Use this navigator to wrap a ndim-cursor into a 1D cursor. - */ -template -class MapTo1DNavigator -{ -public: - static constexpr int dim = T_dim; -private: - math::Size_t shape; - int pos; - - HDINLINE - math::Int toNdim(int idx) const + namespace cursor { - math::Int result; - int volume = 1; - for(int i = 0; i < dim; i++) + /** + * Use this navigator to wrap a ndim-cursor into a 1D cursor. + */ + template + class MapTo1DNavigator { - result[i] = (idx / volume) % this->shape[i]; - volume *= this->shape[i]; - } - return result; - } -public: - /** - * @param shape area to map the 1D index to. - */ - HDINLINE - MapTo1DNavigator(math::Size_t shape) - : shape(shape), pos(0) {} + public: + static constexpr int dim = T_dim; - template - HDINLINE - Cursor operator()(const Cursor& cursor, math::Int<1> jump) - { - math::Int ndstart = toNdim(this->pos); - this->pos += jump.x(); - math::Int ndend = toNdim(this->pos); + private: + math::Size_t shape; + int pos; + + HDINLINE + math::Int toNdim(int idx) const + { + math::Int result; + int volume = 1; + for(int i = 0; i < dim; i++) + { + result[i] = (idx / volume) % this->shape[i]; + volume *= this->shape[i]; + } + return result; + } + + public: + /** + * @param shape area to map the 1D index to. + */ + HDINLINE + MapTo1DNavigator(math::Size_t shape) : shape(shape), pos(0) + { + } - math::Int ndjump = ndend - ndstart; + template + HDINLINE Cursor operator()(const Cursor& cursor, math::Int<1> jump) + { + math::Int ndstart = toNdim(this->pos); + this->pos += jump.x(); + math::Int ndend = toNdim(this->pos); - return cursor(ndjump); - } + math::Int ndjump = ndend - ndstart; -}; + return cursor(ndjump); + } + }; -} // namespace cursor + } // namespace cursor } // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/navigator/MultiIndexNavigator.hpp b/include/pmacc/cuSTL/cursor/navigator/MultiIndexNavigator.hpp index a2607f5f24..a32438321a 100644 --- a/include/pmacc/cuSTL/cursor/navigator/MultiIndexNavigator.hpp +++ b/include/pmacc/cuSTL/cursor/navigator/MultiIndexNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,34 +28,30 @@ namespace pmacc { -namespace cursor -{ - -template -struct MultiIndexNavigator -{ - typedef tag::MultiIndexNavigator tag; - static constexpr int dim = T_dim; - - template - HDINLINE - MultiIndex operator()(const MultiIndex& index, const math::Int& jump) const + namespace cursor { - return index + jump; - } -}; - -namespace traits -{ - -template -struct dim > -{ - static constexpr int value = T_dim; -}; - -} - -} // cursor -} // pmacc - + template + struct MultiIndexNavigator + { + typedef tag::MultiIndexNavigator tag; + static constexpr int dim = T_dim; + + template + HDINLINE MultiIndex operator()(const MultiIndex& index, const math::Int& jump) const + { + return index + jump; + } + }; + + namespace traits + { + template + struct dim> + { + static constexpr int value = T_dim; + }; + + } // namespace traits + + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/navigator/PlusNavigator.hpp b/include/pmacc/cuSTL/cursor/navigator/PlusNavigator.hpp index cf3d33b6cc..8211169792 100644 --- a/include/pmacc/cuSTL/cursor/navigator/PlusNavigator.hpp +++ b/include/pmacc/cuSTL/cursor/navigator/PlusNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PMacc. * @@ -23,21 +23,18 @@ namespace pmacc { -namespace cursor -{ - -/** Navigator which combines jump and marker of any type by using the +operator. - */ -struct PlusNavigator -{ - template - HDINLINE Marker - operator()(const Marker& marker, const Jump& jump) const + namespace cursor { - return marker + jump; - } -}; + /** Navigator which combines jump and marker of any type by using the +operator. + */ + struct PlusNavigator + { + template + HDINLINE Marker operator()(const Marker& marker, const Jump& jump) const + { + return marker + jump; + } + }; -} // namespace cursor + } // namespace cursor } // namespace pmacc - diff --git a/include/pmacc/cuSTL/cursor/navigator/compile-time/BufferNavigator.hpp b/include/pmacc/cuSTL/cursor/navigator/compile-time/BufferNavigator.hpp index b4d5b05a11..8ea1855233 100644 --- a/include/pmacc/cuSTL/cursor/navigator/compile-time/BufferNavigator.hpp +++ b/include/pmacc/cuSTL/cursor/navigator/compile-time/BufferNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -26,63 +26,57 @@ namespace pmacc { -namespace cursor -{ -namespace CT -{ - -template -struct BufferNavigator; - -template -struct BufferNavigator -{ - static constexpr int dim = 1; - - template - HDINLINE - Data operator()(const Data& data, const math::Int& jump) const + namespace cursor { - char* result = (char*)data; - result += jump.x() * sizeof(typename boost::remove_pointer::type); - return (Data)result; - } -}; + namespace CT + { + template + struct BufferNavigator; -template -struct BufferNavigator -{ - static constexpr int dim = 2; + template + struct BufferNavigator + { + static constexpr int dim = 1; - template - HDINLINE - Data operator()(const Data& data, const math::Int& jump) const - { - char* result = (char*)data; - result += jump.x() * sizeof(typename boost::remove_pointer::type) - + jump.y() * Pitch::x::value; - return (Data)result; - } -}; + template + HDINLINE Data operator()(const Data& data, const math::Int& jump) const + { + char* result = (char*) data; + result += jump.x() * sizeof(typename boost::remove_pointer::type); + return (Data) result; + } + }; -template -struct BufferNavigator -{ - static constexpr int dim = 3; + template + struct BufferNavigator + { + static constexpr int dim = 2; - template - HDINLINE - Data operator()(const Data& data, const math::Int& jump) const - { - char* result = (char*)data; - result += jump.x() * sizeof(typename boost::remove_pointer::type) - + jump.y() * Pitch::x::value - + jump.z() * Pitch::y::value; - return (Data)result; - } -}; + template + HDINLINE Data operator()(const Data& data, const math::Int& jump) const + { + char* result = (char*) data; + result + += jump.x() * sizeof(typename boost::remove_pointer::type) + jump.y() * Pitch::x::value; + return (Data) result; + } + }; + + template + struct BufferNavigator + { + static constexpr int dim = 3; -} // CT -} // cursor -} // pmacc + template + HDINLINE Data operator()(const Data& data, const math::Int& jump) const + { + char* result = (char*) data; + result += jump.x() * sizeof(typename boost::remove_pointer::type) + + jump.y() * Pitch::x::value + jump.z() * Pitch::y::value; + return (Data) result; + } + }; + } // namespace CT + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/navigator/compile-time/TwistAxesNavigator.hpp b/include/pmacc/cuSTL/cursor/navigator/compile-time/TwistAxesNavigator.hpp index bdf91f3da4..aa5c3b0be9 100644 --- a/include/pmacc/cuSTL/cursor/navigator/compile-time/TwistAxesNavigator.hpp +++ b/include/pmacc/cuSTL/cursor/navigator/compile-time/TwistAxesNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -25,48 +25,44 @@ namespace pmacc { -namespace cursor -{ -namespace CT -{ - -template -struct TwistAxesNavigator; - -template -struct TwistAxesNavigator -{ - static constexpr int dim = 2; - - template - HDINLINE - TCursor operator()(const TCursor& cursor, const math::Int<2>& jump) const + namespace cursor { - math::Int<2> twistedJump; - twistedJump[Axes::x::value] = jump.x(); - twistedJump[Axes::y::value] = jump.y(); - return cursor(twistedJump); - } -}; + namespace CT + { + template + struct TwistAxesNavigator; -template -struct TwistAxesNavigator -{ - static constexpr int dim = 3; + template + struct TwistAxesNavigator + { + static constexpr int dim = 2; - template - HDINLINE - TCursor operator()(const TCursor& cursor, const math::Int<3>& jump) const - { - math::Int<3> twistedJump; - twistedJump[Axes::x::value] = jump.x(); - twistedJump[Axes::y::value] = jump.y(); - twistedJump[Axes::z::value] = jump.z(); - return cursor(twistedJump); - } -}; + template + HDINLINE TCursor operator()(const TCursor& cursor, const math::Int<2>& jump) const + { + math::Int<2> twistedJump; + twistedJump[Axes::x::value] = jump.x(); + twistedJump[Axes::y::value] = jump.y(); + return cursor(twistedJump); + } + }; + + template + struct TwistAxesNavigator + { + static constexpr int dim = 3; -} // CT -} // cursor -} // pmacc + template + HDINLINE TCursor operator()(const TCursor& cursor, const math::Int<3>& jump) const + { + math::Int<3> twistedJump; + twistedJump[Axes::x::value] = jump.x(); + twistedJump[Axes::y::value] = jump.y(); + twistedJump[Axes::z::value] = jump.z(); + return cursor(twistedJump); + } + }; + } // namespace CT + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/navigator/compile-time/TwistedAxesNavigator.hpp b/include/pmacc/cuSTL/cursor/navigator/compile-time/TwistedAxesNavigator.hpp index a94c93ae93..3537c421ca 100644 --- a/include/pmacc/cuSTL/cursor/navigator/compile-time/TwistedAxesNavigator.hpp +++ b/include/pmacc/cuSTL/cursor/navigator/compile-time/TwistedAxesNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -25,48 +25,44 @@ namespace pmacc { -namespace cursor -{ -namespace CT -{ - -template -struct TwistedAxesNavigator; - -template -struct TwistedAxesNavigator -{ - static constexpr int dim = 2; - - template - HDINLINE - TCursor operator()(const TCursor& cursor, const math::Int<2>& jump) const + namespace cursor { - math::Int<2> twistedJump; - twistedJump[Axes::x::value] = jump.x(); - twistedJump[Axes::y::value] = jump.y(); - return cursor(twistedJump); - } -}; + namespace CT + { + template + struct TwistedAxesNavigator; -template -struct TwistedAxesNavigator -{ - static constexpr int dim = 3; + template + struct TwistedAxesNavigator + { + static constexpr int dim = 2; - template - HDINLINE - TCursor operator()(const TCursor& cursor, const math::Int<3>& jump) const - { - math::Int<3> twistedJump; - twistedJump[Axes::x::value] = jump.x(); - twistedJump[Axes::y::value] = jump.y(); - twistedJump[Axes::z::value] = jump.z(); - return cursor(twistedJump); - } -}; + template + HDINLINE TCursor operator()(const TCursor& cursor, const math::Int<2>& jump) const + { + math::Int<2> twistedJump; + twistedJump[Axes::x::value] = jump.x(); + twistedJump[Axes::y::value] = jump.y(); + return cursor(twistedJump); + } + }; + + template + struct TwistedAxesNavigator + { + static constexpr int dim = 3; -} // CT -} // cursor -} // pmacc + template + HDINLINE TCursor operator()(const TCursor& cursor, const math::Int<3>& jump) const + { + math::Int<3> twistedJump; + twistedJump[Axes::x::value] = jump.x(); + twistedJump[Axes::y::value] = jump.y(); + twistedJump[Axes::z::value] = jump.z(); + return cursor(twistedJump); + } + }; + } // namespace CT + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/navigator/tag.hpp b/include/pmacc/cuSTL/cursor/navigator/tag.hpp index b20b6310a7..a29605afe1 100644 --- a/include/pmacc/cuSTL/cursor/navigator/tag.hpp +++ b/include/pmacc/cuSTL/cursor/navigator/tag.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -24,15 +24,15 @@ namespace pmacc { -namespace cursor -{ -namespace tag -{ -struct BufferNavigator; -struct CartNavigator; -struct MultiIndexNavigator; -} // tag -} // cursor -} // pmacc + namespace cursor + { + namespace tag + { + struct BufferNavigator; + struct CartNavigator; + struct MultiIndexNavigator; + } // namespace tag + } // namespace cursor +} // namespace pmacc #endif // CURSOR_NAVIGATOR_TAG_H diff --git a/include/pmacc/cuSTL/cursor/tools/LinearInterp.hpp b/include/pmacc/cuSTL/cursor/tools/LinearInterp.hpp index 4c4739eb5f..4409514bb5 100644 --- a/include/pmacc/cuSTL/cursor/tools/LinearInterp.hpp +++ b/include/pmacc/cuSTL/cursor/tools/LinearInterp.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PMacc. * @@ -31,62 +31,46 @@ namespace pmacc { -namespace cursor -{ -namespace tools -{ - -/** Return a cursor that does 1D, 2D or 3D, linear interpolation on input data. - * - * \tparam T_PositionComp integral type of the weighting factor - */ -template -struct LinearInterp -{ - template - Cursor< - LinearInterpAccessor, - PlusNavigator, - pmacc::math::Vector< - T_PositionComp, - pmacc::cursor::traits::dim< - T_Cursor>::value - > - > - HDINLINE - operator()(const T_Cursor& cur) + namespace cursor { - return make_Cursor( - LinearInterpAccessor(cur), - PlusNavigator(), - pmacc::math::Vector< - T_PositionComp, - pmacc::cursor::traits::dim::value>::create(0.0)); - } -}; + namespace tools + { + /** Return a cursor that does 1D, 2D or 3D, linear interpolation on input data. + * + * \tparam T_PositionComp integral type of the weighting factor + */ + template + struct LinearInterp + { + template + Cursor< + LinearInterpAccessor, + PlusNavigator, + pmacc::math::Vector::value>> + HDINLINE operator()(const T_Cursor& cur) + { + return make_Cursor( + LinearInterpAccessor(cur), + PlusNavigator(), + pmacc::math::Vector::value>::create(0.0)); + } + }; -} // namespace tools -} // namespace cursor + } // namespace tools + } // namespace cursor -namespace result_of -{ - -template -struct Functor, T_Cursor> -{ - typedef pmacc::cursor::Cursor< - cursor::LinearInterpAccessor, - cursor::PlusNavigator, - pmacc::math::Vector< - T_PositionComp, - pmacc::cursor::traits::dim< - T_Cursor - >::value - > - > type; -}; + namespace result_of + { + template + struct Functor, T_Cursor> + { + typedef pmacc::cursor::Cursor< + cursor::LinearInterpAccessor, + cursor::PlusNavigator, + pmacc::math::Vector::value>> + type; + }; -} // namespace result_of + } // namespace result_of } // namespace pmacc - diff --git a/include/pmacc/cuSTL/cursor/tools/slice.hpp b/include/pmacc/cuSTL/cursor/tools/slice.hpp index d566af14e1..275427b816 100644 --- a/include/pmacc/cuSTL/cursor/tools/slice.hpp +++ b/include/pmacc/cuSTL/cursor/tools/slice.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,79 +28,79 @@ namespace pmacc { -namespace cursor -{ -namespace tools -{ -namespace detail -{ -template -struct SliceResult; + namespace cursor + { + namespace tools + { + namespace detail + { + template + struct SliceResult; -template -struct SliceResult -{ - typedef Cursor< - typename TCursor::Accessor, - BufferNavigator, - typename TCursor::Marker> type; -}; + template + struct SliceResult + { + typedef Cursor< + typename TCursor::Accessor, + BufferNavigator, + typename TCursor::Marker> + type; + }; -template -struct SliceResult -{ - typedef Cursor< - typename TCursor::Accessor, - CartNavigator, - typename TCursor::Marker> type; -}; + template + struct SliceResult + { + typedef Cursor< + typename TCursor::Accessor, + CartNavigator, + typename TCursor::Marker> + type; + }; -template -struct Slice_helper; + template + struct Slice_helper; -template -struct Slice_helper -{ - HDINLINE - BufferNavigator operator()(const Navi& navi) - { - math::Size_t pitch; - for(int i = 0; i < Navi::dim-2; i++) - pitch[i] = navi.getPitch()[i]; - return BufferNavigator(pitch); - } -}; - -template -struct Slice_helper -{ - HDINLINE - CartNavigator operator()(const Navi& navi) - { - math::Int factor; - for(uint32_t i = 0; i < Navi::dim-1; i++) - factor[i] = navi.getFactor()[i]; - return CartNavigator(factor); - } -}; + template + struct Slice_helper + { + HDINLINE + BufferNavigator operator()(const Navi& navi) + { + math::Size_t pitch; + for(int i = 0; i < Navi::dim - 2; i++) + pitch[i] = navi.getPitch()[i]; + return BufferNavigator(pitch); + } + }; -} // detail + template + struct Slice_helper + { + HDINLINE + CartNavigator operator()(const Navi& navi) + { + math::Int factor; + for(uint32_t i = 0; i < Navi::dim - 1; i++) + factor[i] = navi.getFactor()[i]; + return CartNavigator(factor); + } + }; -/** makes a 2D cursor of a 3D vector by dropping the z-component - */ -template -HDINLINE -typename detail::SliceResult::type -slice(const TCursor& cur) -{ - detail::Slice_helper slice_helper; - return typename detail::SliceResult::type - (cur.getAccessor(), - slice_helper(cur.getNavigator()), - cur.getMarker()); -} + } // namespace detail -} // tools -} // cursor -} // pmacc + /** makes a 2D cursor of a 3D vector by dropping the z-component + */ + template + HDINLINE typename detail::SliceResult::type slice( + const TCursor& cur) + { + detail::Slice_helper slice_helper; + return typename detail::SliceResult::type( + cur.getAccessor(), + slice_helper(cur.getNavigator()), + cur.getMarker()); + } + } // namespace tools + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/tools/twistAxes.hpp b/include/pmacc/cuSTL/cursor/tools/twistAxes.hpp index cd5ddbfcb4..bb14bac837 100644 --- a/include/pmacc/cuSTL/cursor/tools/twistAxes.hpp +++ b/include/pmacc/cuSTL/cursor/tools/twistAxes.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,30 +27,29 @@ namespace pmacc { -namespace cursor -{ -namespace tools -{ - -/** Returns a new cursor which looks like a rotated version of the one passed. - * - * The new cursor wraps the one that is passed. In the new cursor's navigator - * the components of the passed int-vector are reordered according to the Axes - * parameter and then passed to the nested cursor. - * - * \tparam Axes compile-time vector (pmacc::math::CT::Int) that descripes the mapping. - * x-axis -> Axes::at<0>, y-axis -> Axes::at<1>, ... - */ -template -HDINLINE -Cursor, CT::TwistAxesNavigator, TCursor> -twistAxes(const TCursor& cursor) -{ - return Cursor, CT::TwistAxesNavigator, TCursor> - (CursorAccessor(), CT::TwistAxesNavigator(), cursor); -} - -} // tools -} // cursor -} // pmacc + namespace cursor + { + namespace tools + { + /** Returns a new cursor which looks like a rotated version of the one passed. + * + * The new cursor wraps the one that is passed. In the new cursor's navigator + * the components of the passed int-vector are reordered according to the Axes + * parameter and then passed to the nested cursor. + * + * \tparam Axes compile-time vector (pmacc::math::CT::Int) that descripes the mapping. + * x-axis -> Axes::at<0>, y-axis -> Axes::at<1>, ... + */ + template + HDINLINE Cursor, CT::TwistAxesNavigator, TCursor> twistAxes( + const TCursor& cursor) + { + return Cursor, CT::TwistAxesNavigator, TCursor>( + CursorAccessor(), + CT::TwistAxesNavigator(), + cursor); + } + } // namespace tools + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/tools/twistVectorFieldAxes.hpp b/include/pmacc/cuSTL/cursor/tools/twistVectorFieldAxes.hpp index 917560c609..75bb4a5e5e 100644 --- a/include/pmacc/cuSTL/cursor/tools/twistVectorFieldAxes.hpp +++ b/include/pmacc/cuSTL/cursor/tools/twistVectorFieldAxes.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,95 +27,93 @@ namespace pmacc { -namespace cursor -{ -namespace tools -{ + namespace cursor + { + namespace tools + { + namespace result_of + { + /** result for TwistVectorFieldAxes + * + * \tparam T_NavigatorPerm permutation vector for navigator + * \tparam T_AccessorPerm permutation vector for the accessor + * \tparam T_Cursor cursor to permute + */ + template + struct TwistVectorFieldAxes + { + typedef Cursor< + TwistAxesAccessor, + pmacc::cursor::CT::TwistAxesNavigator, + T_Cursor> + type; + }; -namespace result_of -{ + } // namespace result_of -/** result for TwistVectorFieldAxes - * - * \tparam T_NavigatorPerm permutation vector for navigator - * \tparam T_AccessorPerm permutation vector for the accessor - * \tparam T_Cursor cursor to permute - */ -template -struct TwistVectorFieldAxes -{ - typedef Cursor, - pmacc::cursor::CT::TwistAxesNavigator, - T_Cursor> type; -}; - -} // result_of - -/** Returns a new cursor which looks like a vector field rotated version of the one passed - * - * When rotating a vector field in physics the coordinate system and the vectors themselves - * have to be rotated. This is the idea behind this function. It is assuming that the cursor - * which is passed returns in its access call a vector type of the same dimension as in - * the jumping call. In other words, the field and the vector have the same dimension. - * - * e.g.: new_cur = twistVectorFieldAxes >(cur); // x -> y, y -> z, z -> x - * - * \tparam T_Permutation compile-time vector (pmacc::math::CT::Int) that describes the mapping. - * x-axis -> T_Permutation::at<0>, y-axis -> T_Permutation::at<1>, ... - * - */ -template -HDINLINE -typename result_of::TwistVectorFieldAxes::type -twistVectorFieldAxes(const T_Cursor& cursor) -{ - return typename result_of::TwistVectorFieldAxes::type - (TwistAxesAccessor(), - pmacc::cursor::CT::TwistAxesNavigator(), - cursor); -} + /** Returns a new cursor which looks like a vector field rotated version of the one passed + * + * When rotating a vector field in physics the coordinate system and the vectors themselves + * have to be rotated. This is the idea behind this function. It is assuming that the cursor + * which is passed returns in its access call a vector type of the same dimension as in + * the jumping call. In other words, the field and the vector have the same dimension. + * + * e.g.: new_cur = twistVectorFieldAxes >(cur); // x -> y, y -> z, z -> x + * + * \tparam T_Permutation compile-time vector (pmacc::math::CT::Int) that describes the mapping. + * x-axis -> T_Permutation::at<0>, y-axis -> T_Permutation::at<1>, ... + * + */ + template + HDINLINE typename result_of::TwistVectorFieldAxes::type + twistVectorFieldAxes(const T_Cursor& cursor) + { + return typename result_of::TwistVectorFieldAxes::type( + TwistAxesAccessor(), + pmacc::cursor::CT::TwistAxesNavigator(), + cursor); + } -/** permute navigation and access of a cursor - * - * use same permutation for accessor and navigator - * - * \tparam T_Permutation permutation vector - * \tparam T_Cursor permutation vector - * \param cursor cursor to permute - * \param permutation cursor to permute - */ -template -HDINLINE -typename result_of::TwistVectorFieldAxes::type -twistVectorFieldAxes(const T_Cursor& cursor, const T_Permutation& /*permutation*/) -{ - return typename result_of::TwistVectorFieldAxes::type - (TwistAxesAccessor(), - pmacc::cursor::CT::TwistAxesNavigator(), - cursor); -} + /** permute navigation and access of a cursor + * + * use same permutation for accessor and navigator + * + * \tparam T_Permutation permutation vector + * \tparam T_Cursor permutation vector + * \param cursor cursor to permute + * \param permutation cursor to permute + */ + template + HDINLINE typename result_of::TwistVectorFieldAxes::type + twistVectorFieldAxes(const T_Cursor& cursor, const T_Permutation& /*permutation*/) + { + return typename result_of::TwistVectorFieldAxes::type( + TwistAxesAccessor(), + pmacc::cursor::CT::TwistAxesNavigator(), + cursor); + } -/** permute navigation and access of a cursor - * - * different dimensions for the accessor and navigator permutation vector are allowed - * - * \param cursor cursor to permute - * \param navigatorPermutation compile time permutation vector for the navigator - * \param accessorPermutation compile time permutation vector for the accessor - */ -template -HDINLINE -typename result_of::TwistVectorFieldAxes::type -twistVectorFieldAxes(const T_Cursor& cursor, - const T_NavigatorPerm& /*navigatorPermutation*/, - const T_AccessorPerm& /*accessorPermutation*/) -{ - return typename result_of::TwistVectorFieldAxes::type - (TwistAxesAccessor(), - pmacc::cursor::CT::TwistAxesNavigator(), - cursor); -} + /** permute navigation and access of a cursor + * + * different dimensions for the accessor and navigator permutation vector are allowed + * + * \param cursor cursor to permute + * \param navigatorPermutation compile time permutation vector for the navigator + * \param accessorPermutation compile time permutation vector for the accessor + */ + template + HDINLINE typename result_of::TwistVectorFieldAxes::type + twistVectorFieldAxes( + const T_Cursor& cursor, + const T_NavigatorPerm& /*navigatorPermutation*/, + const T_AccessorPerm& /*accessorPermutation*/) + { + return typename result_of::TwistVectorFieldAxes::type( + TwistAxesAccessor(), + pmacc::cursor::CT::TwistAxesNavigator(), + cursor); + } -} // tools -} // cursor -} // pmacc + } // namespace tools + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/cursor/traits.hpp b/include/pmacc/cuSTL/cursor/traits.hpp index 8855965023..3ee70eeb5a 100644 --- a/include/pmacc/cuSTL/cursor/traits.hpp +++ b/include/pmacc/cuSTL/cursor/traits.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -23,15 +23,13 @@ namespace pmacc { -namespace cursor -{ -namespace traits -{ - -template -struct dim; - -} // traits -} // cursor -} // pmacc + namespace cursor + { + namespace traits + { + template + struct dim; + } // namespace traits + } // namespace cursor +} // namespace pmacc diff --git a/include/pmacc/cuSTL/zone/SphericZone.hpp b/include/pmacc/cuSTL/zone/SphericZone.hpp index 6ad4166fa7..37d68bd520 100644 --- a/include/pmacc/cuSTL/zone/SphericZone.hpp +++ b/include/pmacc/cuSTL/zone/SphericZone.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,44 +27,52 @@ namespace pmacc { -namespace zone -{ - -namespace tag -{ -struct SphericZone {}; -} - -/* spheric (no holes), cartesian zone - * - * \tparam T_dim dimension of the zone - * - * This is a zone which is simply described by a size and a offset. - * - */ -template -struct SphericZone -{ - typedef tag::SphericZone tag; - static constexpr int dim = T_dim; - math::Size_t size; - math::Int offset; + namespace zone + { + namespace tag + { + struct SphericZone + { + }; + } // namespace tag - HDINLINE SphericZone() {} - HDINLINE SphericZone(const math::Size_t& size) : size(size), offset(math::Int::create(0)) {} - HDINLINE SphericZone(const math::Size_t& size, - const math::Int& offset) : size(size), offset(offset) {} + /* spheric (no holes), cartesian zone + * + * \tparam T_dim dimension of the zone + * + * This is a zone which is simply described by a size and a offset. + * + */ + template + struct SphericZone + { + typedef tag::SphericZone tag; + static constexpr int dim = T_dim; + math::Size_t size; + math::Int offset; - /* Returns whether pos is within the zone */ - HDINLINE bool within(const pmacc::math::Int& pos) const - { - bool result = true; - for(int i = 0; i < T_dim; i++) - if((pos[i] < offset[i]) || (pos[i] >= offset[i] + (int)size[i])) result = false; - return result; - } -}; + HDINLINE SphericZone() + { + } + HDINLINE SphericZone(const math::Size_t& size) : size(size), offset(math::Int::create(0)) + { + } + HDINLINE SphericZone(const math::Size_t& size, const math::Int& offset) + : size(size) + , offset(offset) + { + } -} // zone -} // pmacc + /* Returns whether pos is within the zone */ + HDINLINE bool within(const pmacc::math::Int& pos) const + { + bool result = true; + for(int i = 0; i < T_dim; i++) + if((pos[i] < offset[i]) || (pos[i] >= offset[i] + (int) size[i])) + result = false; + return result; + } + }; + } // namespace zone +} // namespace pmacc diff --git a/include/pmacc/cuSTL/zone/StaggeredZone.hpp b/include/pmacc/cuSTL/zone/StaggeredZone.hpp index 755c4d4843..40255e67f8 100644 --- a/include/pmacc/cuSTL/zone/StaggeredZone.hpp +++ b/include/pmacc/cuSTL/zone/StaggeredZone.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,21 +27,22 @@ namespace pmacc { -namespace zone -{ -namespace tag -{ -struct StaggeredZone {}; -} - -template -struct StaggeredZone : public SphericZone -{ - typedef tag::StaggeredZone tag; - math::UInt32 staggered; - math::UInt32 staggeredOffset; -}; + namespace zone + { + namespace tag + { + struct StaggeredZone + { + }; + } // namespace tag -} // zone -} // pmacc + template + struct StaggeredZone : public SphericZone + { + typedef tag::StaggeredZone tag; + math::UInt32 staggered; + math::UInt32 staggeredOffset; + }; + } // namespace zone +} // namespace pmacc diff --git a/include/pmacc/cuSTL/zone/ToricZone.hpp b/include/pmacc/cuSTL/zone/ToricZone.hpp index fae3acdd90..8d84a7dca4 100644 --- a/include/pmacc/cuSTL/zone/ToricZone.hpp +++ b/include/pmacc/cuSTL/zone/ToricZone.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -26,23 +26,24 @@ namespace pmacc { -namespace zone -{ -namespace tag -{ -struct ToricZone {}; -} - -template -struct ToricZone -{ - typedef tag::ToricZone tag; - static constexpr int dim = T_dim; - math::Size_t offset; - math::Size_t size; - uint32_t thickness; -}; + namespace zone + { + namespace tag + { + struct ToricZone + { + }; + } // namespace tag -} // zone -} // pmacc + template + struct ToricZone + { + typedef tag::ToricZone tag; + static constexpr int dim = T_dim; + math::Size_t offset; + math::Size_t size; + uint32_t thickness; + }; + } // namespace zone +} // namespace pmacc diff --git a/include/pmacc/cuSTL/zone/compile-time/SphericZone.hpp b/include/pmacc/cuSTL/zone/compile-time/SphericZone.hpp index 862b9f728f..30a148832e 100644 --- a/include/pmacc/cuSTL/zone/compile-time/SphericZone.hpp +++ b/include/pmacc/cuSTL/zone/compile-time/SphericZone.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl * * This file is part of PMacc. * @@ -25,29 +25,29 @@ namespace pmacc { -namespace zone -{ -namespace CT -{ - -/* spheric (no holes), cartesian, compile-time zone - * - * \tparam _Size compile-time vector (pmacc::math::CT::Size_t) of the zone's size. - * \tparam _Offset compile-time vector (pmacc::math::CT::Size_t) of the zone's offset. default is a zero vector. - * - * This is a zone which is simply described by a size and a offset. - * - * Compile-time version of zone::SphericZone - * - */ -template::type> -struct SphericZone -{ - typedef _Size Size; - typedef _Offset Offset; - static constexpr int dim = Size::dim; -}; + namespace zone + { + namespace CT + { + /* spheric (no holes), cartesian, compile-time zone + * + * \tparam _Size compile-time vector (pmacc::math::CT::Size_t) of the zone's size. + * \tparam _Offset compile-time vector (pmacc::math::CT::Size_t) of the zone's offset. default is a zero + * vector. + * + * This is a zone which is simply described by a size and a offset. + * + * Compile-time version of zone::SphericZone + * + */ + template::type> + struct SphericZone + { + typedef _Size Size; + typedef _Offset Offset; + static constexpr int dim = Size::dim; + }; -} // CT -} // zone -} // pmacc + } // namespace CT + } // namespace zone +} // namespace pmacc diff --git a/include/pmacc/cudaSpecs.hpp b/include/pmacc/cudaSpecs.hpp index d8e7d507af..a72e719682 100644 --- a/include/pmacc/cudaSpecs.hpp +++ b/include/pmacc/cudaSpecs.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PMacc. * @@ -26,20 +26,19 @@ namespace pmacc { -namespace cudaSpecs -{ - -/* Various hardware specific numerical limits taken from the - * *CUDA C Programming Guide* Section: G.1. Features and Technical Specifications. - * - * Valid for sm_2.x - sm_5.3 - */ + namespace cudaSpecs + { + /* Various hardware specific numerical limits taken from the + * *CUDA C Programming Guide* Section: G.1. Features and Technical Specifications. + * + * Valid for sm_2.x - sm_5.3 + */ -/** maximum number of threads per block */ -constexpr uint32_t maxNumThreadsPerBlock = 1024; + /** maximum number of threads per block */ + constexpr uint32_t maxNumThreadsPerBlock = 1024; -/** maximum number of threads per axis of a block */ -typedef math::CT::Size_t<1024, 1024, 64> MaxNumThreadsPerBlockDim; + /** maximum number of threads per axis of a block */ + typedef math::CT::Size_t<1024, 1024, 64> MaxNumThreadsPerBlockDim; -} // namespace cudaSpecs + } // namespace cudaSpecs } // namespace pmacc diff --git a/include/pmacc/cuplaHelper/ValidateCall.hpp b/include/pmacc/cuplaHelper/ValidateCall.hpp index 18f283ca19..383c651095 100644 --- a/include/pmacc/cuplaHelper/ValidateCall.hpp +++ b/include/pmacc/cuplaHelper/ValidateCall.hpp @@ -1,6 +1,6 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, - * Alexander Grund + * Alexander Grund, Sergei Bastrakov * * This file is part of PMacc. * @@ -23,35 +23,70 @@ #pragma once -#include +#include + #include #include -namespace pmacc -{ /** - * Print a cuda error message including file/line info to stderr + * Print a cupla error message including file/line info to stderr */ -#define PMACC_PRINT_CUDA_ERROR(msg) \ - std::cerr << "[CUDA] Error: <" << __FILE__ << ">:" << __LINE__ << " " << msg << std::endl +#define PMACC_PRINT_CUPLA_ERROR(msg) \ + std::cerr << "[cupla] Error: <" << __FILE__ << ">:" << __LINE__ << " " << msg << std::endl /** - * Print a cuda error message including file/line info to stderr and raises an exception + * Print a cupla error message including file/line info to stderr and raises an exception */ -#define PMACC_PRINT_CUDA_ERROR_AND_THROW(cudaError, msg) \ - PMACC_PRINT_CUDA_ERROR(msg); \ - throw std::runtime_error(std::string("[CUDA] Error: ") + std::string(cudaGetErrorString(cudaError))) +#define PMACC_PRINT_CUPLA_ERROR_AND_THROW(cuplaError, msg) \ + PMACC_PRINT_CUPLA_ERROR(msg); \ + throw std::runtime_error(std::string("[cupla] Error: ") + std::string(cuplaGetErrorString(cuplaError))) /** * Captures CUDA errors and prints messages to stdout, including line number and file. * - * @param cmd command with cudaError_t return value to check + * @param cmd command with cuplaError_t return value to check */ -#define CUDA_CHECK(cmd) {cudaError_t error = cmd; if(error!=cudaSuccess){ PMACC_PRINT_CUDA_ERROR_AND_THROW(error, ""); }} - -#define CUDA_CHECK_MSG(cmd,msg) {cudaError_t error = cmd; if(error!=cudaSuccess){ PMACC_PRINT_CUDA_ERROR_AND_THROW(error, msg); }} +#define CUDA_CHECK(cmd) \ + { \ + cuplaError_t error = cmd; \ + if(error != cuplaSuccess) \ + { \ + PMACC_PRINT_CUPLA_ERROR_AND_THROW(error, ""); \ + } \ + } -#define CUDA_CHECK_NO_EXCEPT(cmd) {cudaError_t error = cmd; if(error!=cudaSuccess){ PMACC_PRINT_CUDA_ERROR(""); }} +/** Capture error, report and throw + * + * This macro is only used when PMACC_SYNC_KERNEL == 1 to wrap all + * kernel calls. Since alpaka may throw inside cmd, everything is + * wrapped up in another try-catch level. + * + * This macro will always throw in case of an error, either by + * producing a new exception or propagating an existing one + */ +#define CUDA_CHECK_MSG(cmd, msg) \ + { \ + try \ + { \ + cuplaError_t error = cmd; \ + if(error != cuplaSuccess) \ + { \ + PMACC_PRINT_CUPLA_ERROR_AND_THROW(error, msg); \ + } \ + } \ + catch(...) \ + { \ + PMACC_PRINT_CUPLA_ERROR(msg); \ + throw; \ + } \ + } -} // namespace pmacc +#define CUDA_CHECK_NO_EXCEPT(cmd) \ + { \ + cuplaError_t error = cmd; \ + if(error != cuplaSuccess) \ + { \ + PMACC_PRINT_CUPLA_ERROR(""); \ + } \ + } diff --git a/include/pmacc/dataManagement/AbstractInitialiser.hpp b/include/pmacc/dataManagement/AbstractInitialiser.hpp index 5da2e3009e..519bd3d1f5 100644 --- a/include/pmacc/dataManagement/AbstractInitialiser.hpp +++ b/include/pmacc/dataManagement/AbstractInitialiser.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Benjamin Worpitz * * This file is part of PMacc. * @@ -26,7 +26,6 @@ namespace pmacc { - /** * Abstract base class for initialising simulation data (ISimulationData). */ @@ -39,13 +38,16 @@ namespace pmacc * * @return the next timestep */ - virtual uint32_t setup() { return 0;}; + virtual uint32_t setup() + { + return 0; + }; /** * Tears down this initialiser. * Called after any init. */ - virtual void teardown() {}; + virtual void teardown(){}; /** * Initialises simulation data (concrete type of data is described by id). @@ -56,4 +58,4 @@ namespace pmacc virtual void init(ISimulationData& data, uint32_t currentStep) = 0; }; -} +} // namespace pmacc diff --git a/include/pmacc/dataManagement/DataConnector.hpp b/include/pmacc/dataManagement/DataConnector.hpp index e0c2ba065f..737fc019e1 100644 --- a/include/pmacc/dataManagement/DataConnector.hpp +++ b/include/pmacc/dataManagement/DataConnector.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Axel Huebl, Sergei Bastrakov +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Axel Huebl, Sergei Bastrakov * * This file is part of PMacc. * @@ -44,17 +44,12 @@ namespace pmacc class DataConnector { private: - std::list< std::shared_ptr< ISimulationData > >::iterator - findId( SimulationDataId id ) + std::list>::iterator findId(SimulationDataId id) { return std::find_if( datasets.begin(), datasets.end(), - [ & id ]( std::shared_ptr< ISimulationData > data ) -> bool - { - return data->getUniqueId() == id; - } - ); + [&id](std::shared_ptr data) -> bool { return data->getUniqueId() == id; }); } public: @@ -63,10 +58,9 @@ namespace pmacc * @param id id of the Dataset to query * @return if dataset with id is registered */ - bool - hasId( SimulationDataId id ) + bool hasId(SimulationDataId id) { - return findId( id ) != datasets.end(); + return findId(id) != datasets.end(); } /** @@ -76,20 +70,13 @@ namespace pmacc * @param initialiser class used for initialising Datasets * @param currentStep current simulation step */ - void - initialise( - AbstractInitialiser& initialiser, - uint32_t currentStep - ) + void initialise(AbstractInitialiser& initialiser, uint32_t currentStep) { currentStep = initialiser.setup(); - for( auto & data : datasets ) + for(auto& data : datasets) { - initialiser.init( - *data, - currentStep - ); + initialiser.init(*data, currentStep); } initialiser.teardown(); @@ -102,24 +89,18 @@ namespace pmacc * * @param data simulation data to share ownership */ - void - share( const std::shared_ptr< ISimulationData > & data ) + void share(const std::shared_ptr& data) { - PMACC_ASSERT( data != nullptr ); + PMACC_ASSERT(data != nullptr); SimulationDataId id = data->getUniqueId(); - log< ggLog::MEMORY >( "DataConnector: data shared '%1%'" ) % id; + log("DataConnector: data shared '%1%'") % id; - if( hasId( id ) ) - throw std::runtime_error( - getExceptionStringForID( - "dataset ID already exists", - id - ) - ); + if(hasId(id)) + throw std::runtime_error(getExceptionStringForID("dataset ID already exists", id)); - datasets.push_back( data ); + datasets.push_back(data); } /** Register a new Dataset and transfer its ownership. @@ -130,50 +111,40 @@ namespace pmacc * * @param data simulation data to transfer ownership */ - void - consume( std::unique_ptr< ISimulationData > data ) + void consume(std::unique_ptr data) { - std::shared_ptr< ISimulationData > newOwner( std::move( data ) ); - share( newOwner ); + std::shared_ptr newOwner(std::move(data)); + share(newOwner); } /** End sharing a dataset with identifier id * * @param id id of the dataset to remove */ - void - deregister( SimulationDataId id ) + void deregister(SimulationDataId id) { - const auto it = findId( id ); + const auto it = findId(id); - if( it == datasets.end() ) - throw std::runtime_error( - getExceptionStringForID( - "dataset not found", - id - ) - ); + if(it == datasets.end()) + throw std::runtime_error(getExceptionStringForID("dataset not found", id)); - log< ggLog::MEMORY >( "DataConnector: unshared '%1%' (%2% uses left)" ) % - id % ( it->use_count() - 1 ); + log("DataConnector: unshared '%1%' (%2% uses left)") % id % (it->use_count() - 1); - datasets.erase( it ); + datasets.erase(it); } /** Unshare all associated datasets */ - void - clean() + void clean() { - log< ggLog::MEMORY >( "DataConnector: being cleaned (%1% datasets left to unshare)" ) % - datasets.size(); + log("DataConnector: being cleaned (%1% datasets left to unshare)") % datasets.size(); // verbose version of: datasets.clear(); - while( ! datasets.empty() ) + while(!datasets.empty()) { auto it = datasets.rbegin(); - log< ggLog::MEMORY >( "DataConnector: unshared '%1%' (%2% uses left)" ) % - (*it)->getUniqueId() % ( it->use_count() - 1 ); + log("DataConnector: unshared '%1%' (%2% uses left)") % (*it)->getUniqueId() + % (it->use_count() - 1); datasets.pop_back(); } } @@ -191,32 +162,25 @@ namespace pmacc * @param noSync indicates that no synchronization should be performed, regardless of dataset status * @return returns a reference to the data of type TYPE */ - template< class TYPE > - std::shared_ptr< TYPE > - get( + template + std::shared_ptr get( SimulationDataId id, bool noSync = false // @todo invert! ) { - auto it = findId( id ); + auto it = findId(id); - if( it == datasets.end() ) - throw std::runtime_error( - getExceptionStringForID( - "Invalid dataset ID", - id - ) - ); + if(it == datasets.end()) + throw std::runtime_error(getExceptionStringForID("Invalid dataset ID", id)); - log< ggLog::MEMORY >( "DataConnector: sharing access to '%1%' (%2% uses)" ) % - id % ( it->use_count() ); + log("DataConnector: sharing access to '%1%' (%2% uses)") % id % (it->use_count()); - if( !noSync ) + if(!noSync) { (*it)->synchronize(); } - return std::static_pointer_cast< TYPE >( *it ); + return std::static_pointer_cast(*it); } /** Indicate a data set gotten temporarily via @see getData is not used anymore @@ -225,41 +189,30 @@ namespace pmacc * * @param id id for the dataset previously acquired using getData() */ - void - releaseData( SimulationDataId ) + void releaseData(SimulationDataId) { } private: - friend struct detail::Environment; - static DataConnector& - getInstance() + static DataConnector& getInstance() { static DataConnector instance; return instance; } - std::list< std::shared_ptr< ISimulationData > > datasets; + std::list> datasets; - DataConnector() - { - }; + DataConnector(){}; - virtual - ~DataConnector() + virtual ~DataConnector() { - log< ggLog::MEMORY >( "DataConnector: being destroyed (%1% datasets left to destroy)" ) % - datasets.size(); + log("DataConnector: being destroyed (%1% datasets left to destroy)") % datasets.size(); clean(); } - std::string - getExceptionStringForID( - const char *msg, - SimulationDataId id - ) + std::string getExceptionStringForID(const char* msg, SimulationDataId id) { std::stringstream stream; stream << "DataConnector: " << msg << " (" << id << ")"; @@ -267,5 +220,4 @@ namespace pmacc } }; -} - +} // namespace pmacc diff --git a/include/pmacc/dataManagement/ISimulationData.hpp b/include/pmacc/dataManagement/ISimulationData.hpp index a5ed822a94..82c335ee68 100644 --- a/include/pmacc/dataManagement/ISimulationData.hpp +++ b/include/pmacc/dataManagement/ISimulationData.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Benjamin Worpitz, +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -35,7 +35,9 @@ namespace pmacc class ISimulationData { public: - virtual ~ISimulationData(){} + virtual ~ISimulationData() + { + } /** * Synchronizes simulation data, meaning accessing (host side) data * will return up-to-date values. @@ -48,6 +50,5 @@ namespace pmacc * @return globally unique identifier */ virtual SimulationDataId getUniqueId() = 0; - }; -} +} // namespace pmacc diff --git a/include/pmacc/debug/DebugBuffers.hpp b/include/pmacc/debug/DebugBuffers.hpp index e9bacc6a09..0ae0ec7e8b 100644 --- a/include/pmacc/debug/DebugBuffers.hpp +++ b/include/pmacc/debug/DebugBuffers.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -29,13 +29,12 @@ namespace pmacc { - /** * Helper class for debugging buffers * * @tparam DIM dimension of the buffer to debug. */ - template + template class DebugBuffers { public: @@ -46,25 +45,24 @@ namespace pmacc * @param hostBuffer the HostBuffer to convert to a string * @return a string representing the buffer */ - template + template static std::string bufferToStr(HostBuffer& hostBuffer); }; - template <> + template<> class DebugBuffers { public: - - template + template static std::string bufferToStr(HostBuffer& hostBuffer) { std::stringstream stream; typename HostBuffer::DataBoxType db = hostBuffer.getDataBox(); - for (size_t y = 0; y < hostBuffer.getDataSpace().y(); y++) + for(size_t y = 0; y < hostBuffer.getDataSpace().y(); y++) { - for (size_t x = 0; x < hostBuffer.getDataSpace().x(); x++) + for(size_t x = 0; x < hostBuffer.getDataSpace().x(); x++) stream << db[y][x] << " "; stream << std::endl; @@ -74,33 +72,31 @@ namespace pmacc } }; - template <> + template<> class DebugBuffers { public: - - template + template static std::string bufferToStr(HostBuffer& hostBuffer) { std::stringstream stream; typename HostBuffer::DataBoxType db = hostBuffer.getDataBox(); - for (size_t z = 0; z < hostBuffer.getDataSpace().z(); z++) + for(size_t z = 0; z < hostBuffer.getDataSpace().z(); z++) { stream << "z = " << z << std::endl; - for (size_t y = 0; y < hostBuffer.getDataSpace().y(); y++) + for(size_t y = 0; y < hostBuffer.getDataSpace().y(); y++) { - for (size_t x = 0; x < hostBuffer.getDataSpace().x(); x++) + for(size_t x = 0; x < hostBuffer.getDataSpace().x(); x++) stream << db[z][y][x] << " "; stream << std::endl; } - } return stream.str(); } }; -} +} // namespace pmacc diff --git a/include/pmacc/debug/DebugDataSpace.hpp b/include/pmacc/debug/DebugDataSpace.hpp index c10b3b6a96..c657e9bcb1 100644 --- a/include/pmacc/debug/DebugDataSpace.hpp +++ b/include/pmacc/debug/DebugDataSpace.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Benjamin Worpitz * * This file is part of PMacc. @@ -29,20 +29,19 @@ namespace pmacc { - /** * Helper class for debugging DataSpaces * * @tparam DIM dimension of the DataSpace to debug. */ - template + template class DebugDataSpace { public: static std::string dspToStr(DataSpace& dsp); }; - template <> + template<> class DebugDataSpace { public: @@ -56,7 +55,7 @@ namespace pmacc } }; - template <> + template<> class DebugDataSpace { public: @@ -70,4 +69,4 @@ namespace pmacc } }; -} +} // namespace pmacc diff --git a/include/pmacc/debug/DebugExchangeTypes.hpp b/include/pmacc/debug/DebugExchangeTypes.hpp index 806e2cb243..692011f910 100644 --- a/include/pmacc/debug/DebugExchangeTypes.hpp +++ b/include/pmacc/debug/DebugExchangeTypes.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -29,7 +29,6 @@ namespace pmacc { - /** * Helper class for debugging exchange types. * @@ -37,7 +36,6 @@ namespace pmacc class DebugExchangeTypes { public: - /** * Converts an exchange type to a string for debugging. * @@ -51,22 +49,22 @@ namespace pmacc std::stringstream stream; stream << "["; - if (mask.containsExchangeType(LEFT)) + if(mask.containsExchangeType(LEFT)) stream << "LEFT "; - if (mask.containsExchangeType(RIGHT)) + if(mask.containsExchangeType(RIGHT)) stream << "RIGHT "; - if (mask.containsExchangeType(TOP)) + if(mask.containsExchangeType(TOP)) stream << "TOP "; - if (mask.containsExchangeType(BOTTOM)) + if(mask.containsExchangeType(BOTTOM)) stream << "BOTTOM "; - if (mask.containsExchangeType(FRONT)) + if(mask.containsExchangeType(FRONT)) stream << "FRONT "; - if (mask.containsExchangeType(BACK)) + if(mask.containsExchangeType(BACK)) stream << "BACK "; stream << "]"; @@ -75,4 +73,4 @@ namespace pmacc } }; -} +} // namespace pmacc diff --git a/include/pmacc/debug/PMaccVerbose.hpp b/include/pmacc/debug/PMaccVerbose.hpp index f4a9013306..cd598db554 100644 --- a/include/pmacc/debug/PMaccVerbose.hpp +++ b/include/pmacc/debug/PMaccVerbose.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -26,33 +26,25 @@ #include #ifndef PMACC_VERBOSE_LVL -#define PMACC_VERBOSE_LVL 0 +# define PMACC_VERBOSE_LVL 0 #endif namespace pmacc { - /*create verbose class*/ DEFINE_VERBOSE_CLASS(PMaccVerbose) ( /* define log lvl for later use * e.g. log("TEXT");*/ - DEFINE_LOGLVL(0,NOTHING); - DEFINE_LOGLVL(1,MEMORY); - DEFINE_LOGLVL(2,INFO); - DEFINE_LOGLVL(4,CRITICAL); - DEFINE_LOGLVL(8,MPI); - DEFINE_LOGLVL(16,CUDA_RT); - DEFINE_LOGLVL(32,COMMUNICATION); - DEFINE_LOGLVL(64,EVENT); - ) - /*set default verbose lvl (integer number)*/ - (NOTHING::lvl|PMACC_VERBOSE_LVL); - - //short name for access verbose types of PMacc + DEFINE_LOGLVL(0, NOTHING); DEFINE_LOGLVL(1, MEMORY); DEFINE_LOGLVL(2, INFO); DEFINE_LOGLVL(4, CRITICAL); + DEFINE_LOGLVL(8, MPI); + DEFINE_LOGLVL(16, CUDA_RT); + DEFINE_LOGLVL(32, COMMUNICATION); + DEFINE_LOGLVL(64, EVENT);) + /*set default verbose lvl (integer number)*/ + (NOTHING::lvl | PMACC_VERBOSE_LVL); + + // short name for access verbose types of PMacc using ggLog = PMaccVerbose; -} - - - +} // namespace pmacc diff --git a/include/pmacc/debug/VerboseLog.hpp b/include/pmacc/debug/VerboseLog.hpp index 7b3c8148bd..690191ccc4 100644 --- a/include/pmacc/debug/VerboseLog.hpp +++ b/include/pmacc/debug/VerboseLog.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Alexander Grund +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Alexander Grund * * This file is part of PMacc. * @@ -32,123 +32,115 @@ namespace pmacc { - - -/** get the name of a verbose lvl - * - * this function is defined as friend function for every log lvl - * @param dummy instance of LogClass to find name - * @return name of LogClass - */ -template -std::string getLogName(const LogClass& dummy) -{ - return std::string("UNDEFINED_LVL"); -} - - -namespace verboseLog_detail -{ - -template -struct IsSameClassType -{ - static constexpr bool result = false; -}; - -template -struct IsSameClassType -{ - static constexpr bool result = true; -}; - -} //namespace verboseLog_detail - -template -struct LogLvl -{ - typedef membership_ Parent; - static constexpr uint64_t lvl = lvl_; - - /* This operation is only allowed for LogLvl with the same Parent type. - * Create a LogLvl that contains two levels. At least one lvl has to be true + /** get the name of a verbose lvl + * + * this function is defined as friend function for every log lvl + * @param dummy instance of LogClass to find name + * @return name of LogClass */ - template - LogLvl < (OtherLogLvl::lvl | lvl), membership_> operator+(const OtherLogLvl&) + template + std::string getLogName(const LogClass& dummy) { - return LogLvl < (OtherLogLvl::lvl | lvl), membership_ > (); + return std::string("UNDEFINED_LVL"); } -}; -namespace verboseLog_detail -{ + namespace verboseLog_detail + { + template + struct IsSameClassType + { + static constexpr bool result = false; + }; -template -class VerboseLog -{ -private: - typedef typename LogLevel::Parent LogParent; - static constexpr uint64_t logLvl = LogLevel::lvl; -public: + template + struct IsSameClassType + { + static constexpr bool result = true; + }; - VerboseLog(const char* msg) : fmt(msg) - { - } + } // namespace verboseLog_detail - ~VerboseLog() + template + struct LogLvl { - typedef LogLvl<(logLvl & LogParent::log_level), LogParent> LogClass; - /* check if a bit in the mask is set - * If you get an linker error in the next two lines you have not used - * DEFINE_LOGLVL makro to define a named logLvl + typedef membership_ Parent; + static constexpr uint64_t lvl = lvl_; + + /* This operation is only allowed for LogLvl with the same Parent type. + * Create a LogLvl that contains two levels. At least one lvl has to be true */ - if (logLvl & LogParent::log_level) /*compile-time check*/ + template + LogLvl<(OtherLogLvl::lvl | lvl), membership_> operator+(const OtherLogLvl&) { - std::cout << LogParent::getName() << " " << getLogName(LogClass()) << - "(" << (logLvl & LogParent::log_level) << ")" << " | " << fmt << std::endl; + return LogLvl<(OtherLogLvl::lvl | lvl), membership_>(); } - } + }; - template - VerboseLog& operator %(T value) + namespace verboseLog_detail + { + template + class VerboseLog + { + private: + typedef typename LogLevel::Parent LogParent; + static constexpr uint64_t logLvl = LogLevel::lvl; + + public: + VerboseLog(const char* msg) : fmt(msg) + { + } + + ~VerboseLog() + { + typedef LogLvl<(logLvl & LogParent::log_level), LogParent> LogClass; + /* check if a bit in the mask is set + * If you get an linker error in the next two lines you have not used + * DEFINE_LOGLVL makro to define a named logLvl + */ + if(logLvl & LogParent::log_level) /*compile-time check*/ + { + std::cout << LogParent::getName() << " " << getLogName(LogClass()) << "(" + << (logLvl & LogParent::log_level) << ")" + << " | " << fmt << std::endl; + } + } + + template + VerboseLog& operator%(T value) + { + if(logLvl & LogParent::log_level) /*compile-time check*/ + fmt % value; + return *this; + } + + protected: + boost::format fmt; + }; + + } // namespace verboseLog_detail + + /* + * example call: + * log("printf %2% stream %1%, number example %3%.") % "messages" % "style" % 5; + * output of example: 4 | printf style stream messages, number example 5 + */ + template + verboseLog_detail::VerboseLog log(const char* msg) { - if (logLvl & LogParent::log_level) /*compile-time check*/ - fmt % value; - return *this; + return verboseLog_detail::VerboseLog(msg); } -protected: - boost::format fmt; -}; - -}//namespace verboseLog_detail - -/* - * example call: - * log("printf %2% stream %1%, number example %3%.") % "messages" % "style" % 5; - * output of example: 4 | printf style stream messages, number example 5 - */ -template -verboseLog_detail::VerboseLog -log(const char* msg) -{ - return verboseLog_detail::VerboseLog (msg); -} - -/* version that allows to combine error levels - * example call: - * log(MYLOGLEVELS::CRITICAL+MYLOGLEVELS::MEMORY,"printf %2% stream %1%, number example %3%.") % "messages" % "style" % 5 - */ -template -verboseLog_detail::VerboseLog -log(const LogLvl, const char* msg) -{ - return verboseLog_detail::VerboseLog (msg); -} - - - -} //namespace pmacc + /* version that allows to combine error levels + * example call: + * log(MYLOGLEVELS::CRITICAL+MYLOGLEVELS::MEMORY,"printf %2% stream %1%, number example %3%.") % "messages" % + * "style" % 5 + */ + template + verboseLog_detail::VerboseLog log(const LogLvl, const char* msg) + { + return verboseLog_detail::VerboseLog(msg); + } +} // namespace pmacc diff --git a/include/pmacc/debug/VerboseLogMakros.hpp b/include/pmacc/debug/VerboseLogMakros.hpp index c329153149..4a914d2bb2 100644 --- a/include/pmacc/debug/VerboseLogMakros.hpp +++ b/include/pmacc/debug/VerboseLogMakros.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Alexander Grund +/* Copyright 2013-2021 Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -28,37 +28,39 @@ * @param code integer which represent a bit in a 64bit bitmask * @param name name of the log lvl, name is needet later to call log(...) */ -#define DEFINE_LOGLVL(code,name) \ - typedef pmacc::LogLvl < code, thisClass > name; \ - friend inline std::string getLogName(const name) \ - { \ - return std::string(#name); \ +#define DEFINE_LOGLVL(code, name) \ + typedef pmacc::LogLvl name; \ + friend inline std::string getLogName(const name) \ + { \ + return std::string(#name); \ } /** set a default value for a verbose class * @param default_lvl must be a integer which represent a defined log lvl */ -#define __DEFINE_VERBOSE_CLASS_DEFAULT_LVL(default_lvl) \ - static constexpr uint64_t log_level = default_lvl; \ +#define __DEFINE_VERBOSE_CLASS_DEFAULT_LVL(default_lvl) \ + static constexpr uint64_t log_level = default_lvl; \ } /** helper for define log lvl inside of DEFINE_VERBOSE_CLASS */ -#define __DEFINE_VERBOSE_CLASS_LVLS(...) \ - __VA_ARGS__ \ +#define __DEFINE_VERBOSE_CLASS_LVLS(...) \ + __VA_ARGS__ \ __DEFINE_VERBOSE_CLASS_DEFAULT_LVL /** create a struct which represent a verbose container * @param structName name of the container(struct) */ -#define DEFINE_VERBOSE_CLASS(structName) \ - struct structName \ - { \ - static std::string getName() \ - { \ - return std::string(#structName); \ - } \ - private: \ - typedef structName thisClass; \ - public: \ - __DEFINE_VERBOSE_CLASS_LVLS +#define DEFINE_VERBOSE_CLASS(structName) \ + struct structName \ + { \ + static std::string getName() \ + { \ + return std::string(#structName); \ + } \ + \ + private: \ + typedef structName thisClass; \ + \ + public: \ + __DEFINE_VERBOSE_CLASS_LVLS diff --git a/include/pmacc/debug/abortWithError.hpp b/include/pmacc/debug/abortWithError.hpp index 140ee49e71..c8714d9788 100644 --- a/include/pmacc/debug/abortWithError.hpp +++ b/include/pmacc/debug/abortWithError.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera * * This file is part of PMacc. * @@ -28,33 +28,28 @@ namespace pmacc { -namespace{ - /** abort program with an exception - * - * This function always throws a `runtime_error`. - * - * @param exp evaluated expression - * @param filename name of the broken file - * @param lineNumber line in file - * @param msg user defined error message - */ - void abortWithError( - const std::string exp, - const std::string filename, - const uint32_t lineNumber, - const std::string msg = std::string() - ) + namespace { - std::stringstream line; - line << lineNumber; + /** abort program with an exception + * + * This function always throws a `runtime_error`. + * + * @param exp evaluated expression + * @param filename name of the broken file + * @param lineNumber line in file + * @param msg user defined error message + */ + void abortWithError( + const std::string exp, + const std::string filename, + const uint32_t lineNumber, + const std::string msg = std::string()) + { + std::stringstream line; + line << lineNumber; - throw std::runtime_error( - "expression (" + - exp + - ") failed in file (" + - filename + ":" + line.str() + ") : " + - msg - ); - } -} -} + throw std::runtime_error( + "expression (" + exp + ") failed in file (" + filename + ":" + line.str() + ") : " + msg); + } + } // namespace +} // namespace pmacc diff --git a/include/pmacc/dimensions/DataSpace.hpp b/include/pmacc/dimensions/DataSpace.hpp index a46a87dc81..8c37ac02af 100644 --- a/include/pmacc/dimensions/DataSpace.hpp +++ b/include/pmacc/dimensions/DataSpace.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, Alexander Grund * * This file is part of PMacc. @@ -27,7 +27,6 @@ namespace pmacc { - /** * A T_Dim-dimensional data space. * @@ -36,12 +35,11 @@ namespace pmacc * * @tparam T_Dim dimension (1-3) of the dataspace */ - template - class DataSpace : public math::Vector + template + class DataSpace : public math::Vector { public: - - static constexpr int Dim=T_Dim; + static constexpr int Dim = T_Dim; using BaseType = math::Vector; /** @@ -50,19 +48,21 @@ namespace pmacc */ HDINLINE DataSpace() { - for (uint32_t i = 0; i < T_Dim; ++i) + for(uint32_t i = 0; i < T_Dim; ++i) { (*this)[i] = 0; } } + constexpr HDINLINE DataSpace& operator=(const DataSpace&) = default; + /** * constructor. * Sets size of all dimensions from cuda dim3. */ - HDINLINE explicit DataSpace(dim3 value) + HDINLINE explicit DataSpace(cupla::dim3 value) { - for (uint32_t i = 0; i < T_Dim; ++i) + for(uint32_t i = 0; i < T_Dim; ++i) { (*this)[i] = *(&(value.x) + i); } @@ -70,11 +70,11 @@ namespace pmacc /** * constructor. - * Sets size of all dimensions from cuda uint3 (e.g. threadIdx/blockIdx) + * Sets size of all dimensions from cupla uint3 (e.g. cupla::threadIdx(acc)/cupla::blockIdx(acc)) */ - HDINLINE explicit DataSpace(uint3 value) + HDINLINE DataSpace(cupla::uint3 value) { - for (uint32_t i = 0; i < T_Dim; ++i) + for(uint32_t i = 0; i < T_Dim; ++i) { (*this)[i] = *(&(value.x) + i); } @@ -120,7 +120,7 @@ namespace pmacc HDINLINE DataSpace(const math::Size_t& vec) { - for (uint32_t i = 0; i < T_Dim; ++i) + for(uint32_t i = 0; i < T_Dim; ++i) { (*this)[i] = vec[i]; } @@ -135,7 +135,7 @@ namespace pmacc HDINLINE static DataSpace create(int value = 1) { DataSpace tmp; - for (uint32_t i = 0; i < T_Dim; ++i) + for(uint32_t i = 0; i < T_Dim; ++i) { tmp[i] = value; } @@ -160,9 +160,9 @@ namespace pmacc */ HINLINE bool isOneDimensionGreaterThan(const DataSpace& other) const { - for (uint32_t i = 0; i < T_Dim; ++i) + for(uint32_t i = 0; i < T_Dim; ++i) { - if ((*this)[i] > other[i]) + if((*this)[i] > other[i]) return true; } return false; @@ -171,18 +171,17 @@ namespace pmacc HDINLINE operator math::Size_t() const { math::Size_t result; - for (uint32_t i = 0; i < T_Dim; i++) + for(uint32_t i = 0; i < T_Dim; i++) result[i] = static_cast((*this)[i]); return result; } - HDINLINE explicit operator dim3() const + HDINLINE operator cupla::dim3() const { return this->toDim3(); } - }; -} //namespace pmacc +} // namespace pmacc #include "pmacc/dimensions/DataSpace.tpp" diff --git a/include/pmacc/dimensions/DataSpace.tpp b/include/pmacc/dimensions/DataSpace.tpp index cf8793aec3..3c9ca4d588 100644 --- a/include/pmacc/dimensions/DataSpace.tpp +++ b/include/pmacc/dimensions/DataSpace.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -32,53 +32,50 @@ namespace pmacc { - -namespace traits -{ - -template -struct GetComponentsType, false > -{ - typedef typename DataSpace::type type; -}; - -/** Trait for float_X */ -template -struct GetNComponents,false > -{ - static constexpr uint32_t value=DIM; -}; - -}// namespace traits - -namespace algorithms -{ -namespace precisionCast -{ - -template -struct TypeCast > -{ - typedef const pmacc::DataSpace& result; - - HDINLINE result operator( )(const pmacc::DataSpace& vector ) const + namespace traits { - return vector; - } -}; - -template -struct TypeCast > -{ - typedef ::pmacc::math::Vector result; - - HDINLINE result operator( )(const pmacc::DataSpace& vector ) const + template + struct GetComponentsType, false> + { + typedef typename DataSpace::type type; + }; + + /** Trait for float_X */ + template + struct GetNComponents, false> + { + static constexpr uint32_t value = DIM; + }; + + } // namespace traits + + namespace algorithms { - return result( vector ); - } -}; - -} //namespace typecast -} //namespace algorithms - -} //namespace pmacc + namespace precisionCast + { + template + struct TypeCast> + { + typedef const pmacc::DataSpace& result; + + HDINLINE result operator()(const pmacc::DataSpace& vector) const + { + return vector; + } + }; + + template + struct TypeCast> + { + typedef ::pmacc::math::Vector result; + + HDINLINE result operator()(const pmacc::DataSpace& vector) const + { + return result(vector); + } + }; + + } // namespace precisionCast + } // namespace algorithms + +} // namespace pmacc diff --git a/include/pmacc/dimensions/DataSpaceOperations.hpp b/include/pmacc/dimensions/DataSpaceOperations.hpp index 6e57113910..57176142df 100644 --- a/include/pmacc/dimensions/DataSpaceOperations.hpp +++ b/include/pmacc/dimensions/DataSpaceOperations.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Benjamin Worpitz * * This file is part of PMacc. @@ -29,7 +29,6 @@ namespace pmacc { - /** * Implements operations on DataSpace objects such as reduce and extend. * @@ -62,7 +61,7 @@ namespace pmacc * @param ex exchange direction for reduction * @return reduced DataSpace with dimension DIM-1 */ - static HDINLINE DataSpace reduce(DataSpace ds, uint32_t ex); + static HDINLINE DataSpace reduce(DataSpace ds, uint32_t ex); /** * Extends the DataSpace object ds of dimension DIM to a DataSpace object of dimension DIM+1. @@ -78,19 +77,21 @@ namespace pmacc * @param offset DataSpace describing size of target grid's offset * @return extended DataSpace with dimension DIM+1 */ - static HDINLINE DataSpace extend(DataSpace ds, uint32_t ex, - DataSpace target, DataSpace offset); + static HDINLINE DataSpace extend( + DataSpace ds, + uint32_t ex, + DataSpace target, + DataSpace offset); }; template<> class DataSpaceOperations { public: - template static HDINLINE DataSpace map(uint32_t pos) { - return DataSpace (pos); + return DataSpace(pos); } template @@ -101,66 +102,64 @@ namespace pmacc static HDINLINE DataSpace map(const DataSpace& size, uint32_t pos) { - return DataSpace (pos); + return DataSpace(pos); } - static HDINLINE DataSpace extend(DataSpace ds, uint32_t ex, - DataSpace target, DataSpace offset) + static HDINLINE DataSpace extend( + DataSpace ds, + uint32_t ex, + DataSpace target, + DataSpace offset) { - DataSpace directions = Mask::getRelativeDirections (ex); + DataSpace directions = Mask::getRelativeDirections(ex); DataSpace result(ds[0], ds[0]); // RIGHT - if (directions.x() == 1) + if(directions.x() == 1) { result.x() = target.x() - offset.x() - 1; } // LEFT - if (directions.x() == -1) + if(directions.x() == -1) { result.x() = offset.x(); } // TOP - if (directions.y() == 1) + if(directions.y() == 1) { result.y() = target.y() - offset.y() - 1; } // BOTTOM - if (directions.y() == -1) + if(directions.y() == -1) { result.y() = offset.y(); } return result; - } - }; template<> class DataSpaceOperations { public: - template static HDINLINE DataSpace map(uint32_t pos) { auto const y = pos / TVEC::x::value; auto const x = pos - y * TVEC::x::value; - return DataSpace< DIM2 >( x , y ); + return DataSpace(x, y); } template static HDINLINE uint32_t map(const DataSpace& pos) { - return - pos.y() * TVEC::x::value + - pos.x(); + return pos.y() * TVEC::x::value + pos.x(); } static HDINLINE DataSpace map(const DataSpace& size, uint32_t pos) @@ -168,33 +167,34 @@ namespace pmacc auto const y = pos / size.x(); auto const x = pos - y * size.x(); - return DataSpace< DIM2 >( x , y ); + return DataSpace(x, y); } static HDINLINE uint32_t map(const DataSpace& size, const DataSpace& pos) { - return - pos.y() * size.x() + - pos.x(); + return pos.y() * size.x() + pos.x(); } static HDINLINE DataSpace reduce(DataSpace ds, uint32_t ex) { - DataSpace directions = Mask::getRelativeDirections (ex); + DataSpace directions = Mask::getRelativeDirections(ex); - if (directions.x() != 0) - return DataSpace (ds.y()); + if(directions.x() != 0) + return DataSpace(ds.y()); - if (directions.y() != 0) - return DataSpace (ds.x()); + if(directions.y() != 0) + return DataSpace(ds.x()); - return DataSpace (0); + return DataSpace(0); } - static HDINLINE DataSpace extend(DataSpace ds, uint32_t ex, - DataSpace target, DataSpace offset) + static HDINLINE DataSpace extend( + DataSpace ds, + uint32_t ex, + DataSpace target, + DataSpace offset) { - DataSpace directions = Mask::getRelativeDirections (ex); + DataSpace directions = Mask::getRelativeDirections(ex); DataSpace result; @@ -202,7 +202,7 @@ namespace pmacc const uint32_t z_entry(1); uint32_t y_entry(1); - switch (directions.x()) + switch(directions.x()) { // RIGHT case 1: @@ -219,7 +219,7 @@ namespace pmacc break; } - switch (directions.z()) + switch(directions.z()) { // BACK case 1: @@ -234,7 +234,7 @@ namespace pmacc break; } - switch (directions.y()) + switch(directions.y()) { // BOTTOM case 1: @@ -245,8 +245,8 @@ namespace pmacc result.y() = offset.y(); break; case 0: - //thsi if fiy lmem usage (old wars result.y()=ds[y_entry] ) - if (y_entry == 0) + // thsi if fiy lmem usage (old wars result.y()=ds[y_entry] ) + if(y_entry == 0) result.y() = ds.x(); else result.y() = ds.y(); @@ -255,14 +255,12 @@ namespace pmacc return result; } - }; template<> class DataSpaceOperations { public: - template static HDINLINE DataSpace map(uint32_t pos) { @@ -272,7 +270,7 @@ namespace pmacc auto const y = pos / TVEC::x::value; auto const x = pos - y * TVEC::x::value; - return DataSpace< DIM3 >( x , y, z ); + return DataSpace(x, y, z); } static HDINLINE DataSpace map(const DataSpace& size, uint32_t pos) @@ -283,41 +281,35 @@ namespace pmacc auto const y = pos / size.x(); auto const x = pos - y * size.x(); - return DataSpace< DIM3 >( x , y, z ); + return DataSpace(x, y, z); } template static HDINLINE uint32_t map(const DataSpace& pos) { - return - pos.z() * ( TVEC::x::value * TVEC::y::value ) + - pos.y() * TVEC::x::value + - pos.x(); + return pos.z() * (TVEC::x::value * TVEC::y::value) + pos.y() * TVEC::x::value + pos.x(); } static HDINLINE uint32_t map(const DataSpace& size, const DataSpace& pos) { - return - pos.z() * size.x() * size.y() + - pos.y() * size.x() + - pos.x(); + return pos.z() * size.x() * size.y() + pos.y() * size.x() + pos.x(); } static HDINLINE DataSpace reduce(DataSpace ds, uint32_t ex) { - DataSpace directions = Mask::getRelativeDirections (ex); + DataSpace directions = Mask::getRelativeDirections(ex); - if (directions.x() != 0) - return DataSpace (ds.y(), ds.z()); + if(directions.x() != 0) + return DataSpace(ds.y(), ds.z()); - if (directions.z() != 0) - return DataSpace (ds.x(), ds.y()); + if(directions.z() != 0) + return DataSpace(ds.x(), ds.y()); - if (directions.y() != 0) - return DataSpace (ds.x(), ds.z()); + if(directions.y() != 0) + return DataSpace(ds.x(), ds.z()); - return DataSpace (0, 0); + return DataSpace(0, 0); } }; -} +} // namespace pmacc diff --git a/include/pmacc/dimensions/Definition.hpp b/include/pmacc/dimensions/Definition.hpp index 0236567bf9..9c6ef1071f 100644 --- a/include/pmacc/dimensions/Definition.hpp +++ b/include/pmacc/dimensions/Definition.hpp @@ -1,4 +1,4 @@ -/* Copyright 2019-2020 Rene Widera +/* Copyright 2019-2021 Rene Widera * * This file is part of PMacc. * diff --git a/include/pmacc/dimensions/GridLayout.hpp b/include/pmacc/dimensions/GridLayout.hpp index 2b2e390a8b..9a525c76d4 100644 --- a/include/pmacc/dimensions/GridLayout.hpp +++ b/include/pmacc/dimensions/GridLayout.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -26,33 +26,29 @@ namespace pmacc { - /** * Describes layout of a DIM-dimensional data grid including the actual grid and optional guards. * * @tparam DIM dimension of the grid */ - template + template class GridLayout { public: - - HDINLINE GridLayout() : - dataSpace(DataSpace::create(1)), - guard(DataSpace::create(0)) + HDINLINE GridLayout() : dataSpace(DataSpace::create(1)), guard(DataSpace::create(0)) { } /** * constructor * @param dataSpace DataSpace defining size of the layout (native loacal simulation area whithout any guarding) - * @param guard DataSpace defining size of the guard cells. Guard is added to actual grid (dataSpace). Will be initialized to 0. + * @param guard DataSpace defining size of the guard cells. Guard is added to actual grid (dataSpace). Will be + * initialized to 0. */ - HDINLINE GridLayout(const DataSpace &dataSpace, DataSpace guard = DataSpace()) : - dataSpace(dataSpace), - guard(guard) + HDINLINE GridLayout(const DataSpace& dataSpace, DataSpace guard = DataSpace()) + : dataSpace(dataSpace) + , guard(guard) { - } /** @@ -82,7 +78,6 @@ namespace pmacc private: DataSpace dataSpace; DataSpace guard; - }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/dimensions/SuperCellDescription.hpp b/include/pmacc/dimensions/SuperCellDescription.hpp index e4f23557f8..5f0799b250 100644 --- a/include/pmacc/dimensions/SuperCellDescription.hpp +++ b/include/pmacc/dimensions/SuperCellDescription.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -37,12 +37,12 @@ namespace pmacc * @tparam T_OffsetOrigin compile time size of the guard relative to origin (positive value) * @tparam T_OffsetEnd compile time size of the guard relative to end of SuperCell (positive value) */ - template< class T_SuperCellSize, - class T_OffsetOrigin = typename math::CT::make_Int::type, - class T_OffsetEnd = typename math::CT::make_Int::type > + template< + class T_SuperCellSize, + class T_OffsetOrigin = typename math::CT::make_Int::type, + class T_OffsetEnd = typename math::CT::make_Int::type> struct SuperCellDescription { - enum { Dim = T_SuperCellSize::dim @@ -52,8 +52,8 @@ namespace pmacc typedef T_OffsetEnd OffsetEnd; typedef SuperCellDescription Type; - typedef typename ct::add::type AddFirst; - typedef typename ct::add::type FullSuperCellSize; + typedef typename ct::add::type AddFirst; + typedef typename ct::add::type FullSuperCellSize; }; -}//namespace +} // namespace pmacc diff --git a/include/pmacc/eventSystem/EventSystem.hpp b/include/pmacc/eventSystem/EventSystem.hpp index 90bba8b606..9f2d4397e6 100644 --- a/include/pmacc/eventSystem/EventSystem.hpp +++ b/include/pmacc/eventSystem/EventSystem.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Alexander Grund +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Alexander Grund * * This file is part of PMacc. * diff --git a/include/pmacc/eventSystem/EventSystem.tpp b/include/pmacc/eventSystem/EventSystem.tpp index 7e53b8c5d6..dd01952790 100644 --- a/include/pmacc/eventSystem/EventSystem.tpp +++ b/include/pmacc/eventSystem/EventSystem.tpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * diff --git a/include/pmacc/eventSystem/EventType.hpp b/include/pmacc/eventSystem/EventType.hpp index 3de4257d14..0fa9dc717c 100644 --- a/include/pmacc/eventSystem/EventType.hpp +++ b/include/pmacc/eventSystem/EventType.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, * Alexander Grund * @@ -28,27 +28,26 @@ namespace pmacc { -namespace eventSystem -{ - - /** - * Internal event/task type used for notifications in the event system. - */ - enum EventType + namespace eventSystem { - FINISHED, - COPYHOST2DEVICE, - COPYDEVICE2HOST, - COPYDEVICE2DEVICE, - SENDFINISHED, - RECVFINISHED, - LOGICALAND, - SETVALUE, - GETVALUE, - KERNEL - }; + /** + * Internal event/task type used for notifications in the event system. + */ + enum EventType + { + FINISHED, + COPYHOST2DEVICE, + COPYDEVICE2HOST, + COPYDEVICE2DEVICE, + SENDFINISHED, + RECVFINISHED, + LOGICALAND, + SETVALUE, + GETVALUE, + KERNEL + }; -} // namespace type + } // namespace eventSystem // for backward compatibility pull all definitions into the pmacc namespace using namespace eventSystem; diff --git a/include/pmacc/eventSystem/Manager.hpp b/include/pmacc/eventSystem/Manager.hpp index 09d67ea434..3ead7f8c87 100644 --- a/include/pmacc/eventSystem/Manager.hpp +++ b/include/pmacc/eventSystem/Manager.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz, Alexander Grund * * This file is part of PMacc. @@ -67,15 +67,14 @@ namespace pmacc * adds an ITask to the manager and returns an EventTask for it * @param task task to add to the manager */ - void addTask(ITask *task); + void addTask(ITask* task); - void addPassiveTask(ITask *task); + void addPassiveTask(ITask* task); std::size_t getCount(); private: - friend struct detail::Environment; inline ITask* getPassiveITaskIfNotFinished(id_t taskId) const; @@ -98,4 +97,4 @@ namespace pmacc TaskMap passiveTasks; }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/Manager.tpp b/include/pmacc/eventSystem/Manager.tpp index 1e11df0154..cddff3666a 100644 --- a/include/pmacc/eventSystem/Manager.tpp +++ b/include/pmacc/eventSystem/Manager.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -35,179 +35,176 @@ namespace pmacc { + inline Manager::~Manager() + { + CUDA_CHECK_NO_EXCEPT(cuplaGetLastError()); + waitForAllTasks(); + CUDA_CHECK_NO_EXCEPT(cuplaGetLastError()); + } -inline Manager::~Manager( ) -{ - CUDA_CHECK_NO_EXCEPT(cudaGetLastError( )); - waitForAllTasks( ); - CUDA_CHECK_NO_EXCEPT(cudaGetLastError( )); -} - -inline bool Manager::execute( id_t taskToWait ) -{ + inline bool Manager::execute(id_t taskToWait) + { #ifdef DEBUG_EVENTS - static int old_max = 0; - static int deep = -1; - static int counter = 0; - ++counter; + static int old_max = 0; + static int deep = -1; + static int counter = 0; + ++counter; - deep++; - if ( deep > old_max ) - { - old_max = deep; - } + deep++; + if(deep > old_max) + { + old_max = deep; + } #endif - static TaskMap::iterator iter = tasks.begin( ); + static TaskMap::iterator iter = tasks.begin(); - if ( iter == tasks.end( ) ) - iter = tasks.begin( ); + if(iter == tasks.end()) + iter = tasks.begin(); - // this is the slow but very save variant to delete tasks in a map - while ( iter != tasks.end( ) ) - { - id_t id = iter->first; - ITask* taskPtr = iter->second; - PMACC_ASSERT( taskPtr != nullptr ); - ++iter; + // this is the slow but very save variant to delete tasks in a map + while(iter != tasks.end()) + { + id_t id = iter->first; + ITask* taskPtr = iter->second; + PMACC_ASSERT(taskPtr != nullptr); + ++iter; #ifdef DEBUG_EVENTS - if ( counter == 500000 ) - std::cout << taskPtr->toString( ) << " " << passiveTasks.size( ) << std::endl; + if(counter == 500000) + std::cout << taskPtr->toString() << " " << passiveTasks.size() << std::endl; #endif - if ( taskPtr->execute( ) ) - { - /*test if task is deleted by other stackdeep*/ - if ( getActiveITaskIfNotFinished( id ) == taskPtr ) + if(taskPtr->execute()) { - tasks.erase( id ); - __delete(taskPtr); - } + /*test if task is deleted by other stackdeep*/ + if(getActiveITaskIfNotFinished(id) == taskPtr) + { + tasks.erase(id); + __delete(taskPtr); + } #ifdef DEBUG_EVENTS - counter = 0; + counter = 0; #endif - if ( taskToWait == id ) - { - iter = tasks.end( ); + if(taskToWait == id) + { + iter = tasks.end(); #ifdef DEBUG_EVENTS - --deep; + --deep; #endif - return true; //jump out because searched task is finished + return true; // jump out because searched task is finished + } } } - } #ifdef DEBUG_EVENTS - --deep; + --deep; #endif - return false; -} + return false; + } -inline void Manager::event( id_t eventId, EventType, IEventData* ) -{ - passiveTasks.erase( eventId ); -} + inline void Manager::event(id_t eventId, EventType, IEventData*) + { + passiveTasks.erase(eventId); + } -inline ITask* Manager::getITaskIfNotFinished( id_t taskId ) const -{ - if( taskId == 0 ) - return nullptr; - ITask* passiveTask = getPassiveITaskIfNotFinished( taskId ); - if ( passiveTask != nullptr ) - return passiveTask; + inline ITask* Manager::getITaskIfNotFinished(id_t taskId) const + { + if(taskId == 0) + return nullptr; + ITask* passiveTask = getPassiveITaskIfNotFinished(taskId); + if(passiveTask != nullptr) + return passiveTask; - return getActiveITaskIfNotFinished( taskId ); -} + return getActiveITaskIfNotFinished(taskId); + } -inline ITask* Manager::getPassiveITaskIfNotFinished( id_t taskId ) const -{ - TaskMap::const_iterator itPassive = passiveTasks.find( taskId ); - if ( itPassive != passiveTasks.end( ) ) - return itPassive->second; - return nullptr; -} + inline ITask* Manager::getPassiveITaskIfNotFinished(id_t taskId) const + { + TaskMap::const_iterator itPassive = passiveTasks.find(taskId); + if(itPassive != passiveTasks.end()) + return itPassive->second; + return nullptr; + } -inline ITask* Manager::getActiveITaskIfNotFinished( id_t taskId ) const -{ - TaskMap::const_iterator it = tasks.find( taskId ); - if ( it != tasks.end( ) ) - return it->second; - return nullptr; -} + inline ITask* Manager::getActiveITaskIfNotFinished(id_t taskId) const + { + TaskMap::const_iterator it = tasks.find(taskId); + if(it != tasks.end()) + return it->second; + return nullptr; + } -inline void Manager::waitForFinished( id_t taskId ) -{ - if( taskId == 0 ) - return; - //check if task is passive and wait on it - ITask* task = getPassiveITaskIfNotFinished( taskId ); - if ( task != nullptr ) + inline void Manager::waitForFinished(id_t taskId) { - do + if(taskId == 0) + return; + // check if task is passive and wait on it + ITask* task = getPassiveITaskIfNotFinished(taskId); + if(task != nullptr) { - this->execute( ); + do + { + this->execute(); + } while(getPassiveITaskIfNotFinished(taskId) != nullptr); + + return; // we can jump out because task is passive task } - while ( getPassiveITaskIfNotFinished( taskId ) != nullptr ); - return; //we can jump out because task is passive task + // check if task is active and wait on it + task = getActiveITaskIfNotFinished(taskId); + if(task != nullptr) + { + do + { + if(this->execute(taskId)) + return; // jump out because task is finished + } while(getActiveITaskIfNotFinished(taskId) != nullptr); + } } - //check if task is active and wait on it - task = getActiveITaskIfNotFinished( taskId ); - if ( task != nullptr ) + inline void Manager::waitForAllTasks() { - do + while(tasks.size() != 0 || passiveTasks.size() != 0) { - if ( this->execute( taskId ) ) - return; //jump out because task is finished + this->execute(); } - while ( getActiveITaskIfNotFinished( taskId ) != nullptr ); + PMACC_ASSERT(tasks.size() == 0); } -} -inline void Manager::waitForAllTasks( ) -{ - while ( tasks.size( ) != 0 || passiveTasks.size( ) != 0 ) + inline void Manager::addTask(ITask* task) { - this->execute( ); + PMACC_ASSERT(task != nullptr); + tasks[task->getId()] = task; } - PMACC_ASSERT( tasks.size( ) == 0 ); -} - -inline void Manager::addTask( ITask *task ) -{ - PMACC_ASSERT( task != nullptr ); - tasks[task->getId( )] = task; -} -inline void Manager::addPassiveTask( ITask *task ) -{ - PMACC_ASSERT( task != nullptr ); + inline void Manager::addPassiveTask(ITask* task) + { + PMACC_ASSERT(task != nullptr); - task->addObserver( this ); - passiveTasks[task->getId( )] = task; -} + task->addObserver(this); + passiveTasks[task->getId()] = task; + } -inline Manager::Manager( ) -{ -} + inline Manager::Manager() + { + } -inline Manager::Manager( const Manager& ) -{ -} + inline Manager::Manager(const Manager&) + { + } -inline std::size_t Manager::getCount( ) -{ - for ( TaskMap::iterator iter = tasks.begin( ); iter != tasks.end( ); ++iter ) + inline std::size_t Manager::getCount() { - if ( iter->second != nullptr ) + for(TaskMap::iterator iter = tasks.begin(); iter != tasks.end(); ++iter) { - std::cout << iter->first << " = " << iter->second->toString( ) << std::endl; + if(iter->second != nullptr) + { + std::cout << iter->first << " = " << iter->second->toString() << std::endl; + } } + return tasks.size(); } - return tasks.size( ); -} -} +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/CudaEvent.def b/include/pmacc/eventSystem/events/CudaEvent.def index f56fca9c6f..d046812c77 100644 --- a/include/pmacc/eventSystem/events/CudaEvent.def +++ b/include/pmacc/eventSystem/events/CudaEvent.def @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera * * This file is part of PMacc. * @@ -25,84 +25,79 @@ #include "pmacc/assert.hpp" - namespace pmacc { - -/** Wrapper for cudaEvent_t - * - * This class follows the RAII rules - */ -class CudaEvent -{ -private: - - /** native cuda event */ - cudaEvent_t event; - /** native cuda stream where the event is recorded - * - * only valid if isRecorded is true - */ - cudaStream_t stream; - /** state if event is recorded */ - bool isRecorded; - /** state if a recorded event is finished + /** Wrapper for cuplaEvent_t * - * avoid cuda driver calls after `isFinished()` returns the first time true + * This class follows the RAII rules */ - bool finished; - - /** number of CudaEventHandle's to the instance */ - uint32_t refCounter; - - -public: - - /** Constructor - * - * if called before the cuda device is initialized the behavior is undefined - */ - HINLINE CudaEvent( ); - - /** Destructor */ - HINLINE ~CudaEvent( ); - - /** register a existing handle to a event instance */ - HINLINE void registerHandle( ); - - /** free a registered handle */ - HINLINE void releaseHandle( ); - - /** get native cudaEvent_t object - * - * @return native cuda event - */ - cudaEvent_t operator*( ) const - { - return event; - } - - /** get stream in which this event is recorded - * - * @return native cuda stream - */ - cudaStream_t getStream( ) const + class CudaEvent { - assert( isRecorded ); - return stream; - } - - /** check whether the event is finished - * - * @return true if event is finished else false - */ - HINLINE bool isFinished( ); - - /** record event in a device stream - * - * @param stream native cuda stream - */ - HINLINE void recordEvent( cudaStream_t stream ); - -}; -} + private: + /** native cupla event */ + cuplaEvent_t event; + /** native cupla stream where the event is recorded + * + * only valid if isRecorded is true + */ + cuplaStream_t stream; + /** state if event is recorded */ + bool isRecorded; + /** state if a recorded event is finished + * + * avoid cupla driver calls after `isFinished()` returns the first time true + */ + bool finished; + + /** number of CudaEventHandle's to the instance */ + uint32_t refCounter; + + + public: + /** Constructor + * + * if called before the cupla device is initialized the behavior is undefined + */ + HINLINE CudaEvent(); + + /** Destructor */ + HINLINE ~CudaEvent(); + + /** register a existing handle to a event instance */ + HINLINE void registerHandle(); + + /** free a registered handle */ + HINLINE void releaseHandle(); + + /** get native cuplaEvent_t object + * + * @return native cupla event + */ + cuplaEvent_t operator*() const + { + return event; + } + + /** get stream in which this event is recorded + * + * @return native cupla stream + */ + cuplaStream_t getStream() const + { + assert(isRecorded); + return stream; + } + + /** check whether the event is finished + * + * @return true if event is finished else false + */ + HINLINE bool isFinished(); + + /** record event in a device stream + * + * @param stream native cupla stream + */ + HINLINE void recordEvent(cuplaStream_t stream); + }; +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/CudaEvent.hpp b/include/pmacc/eventSystem/events/CudaEvent.hpp index e205962508..6446315f37 100644 --- a/include/pmacc/eventSystem/events/CudaEvent.hpp +++ b/include/pmacc/eventSystem/events/CudaEvent.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera * * This file is part of PMacc. * @@ -26,25 +26,22 @@ #include "pmacc/types.hpp" - - namespace pmacc { - CudaEvent::CudaEvent( ) : isRecorded( false ), finished( true ), refCounter( 0u ) + CudaEvent::CudaEvent() : isRecorded(false), finished(true), refCounter(0u) { - log( ggLog::CUDA_RT()+ggLog::EVENT(), "create event" ); - CUDA_CHECK( cudaEventCreateWithFlags( &event, cudaEventDisableTiming ) ); + log(ggLog::CUDA_RT() + ggLog::EVENT(), "create event"); + CUDA_CHECK(cuplaEventCreateWithFlags(&event, cuplaEventDisableTiming)); } - CudaEvent::~CudaEvent( ) + CudaEvent::~CudaEvent() { - PMACC_ASSERT( refCounter == 0u ); - log( ggLog::CUDA_RT()+ggLog::EVENT(), "sync and delete event" ); - // free cuda event - CUDA_CHECK_NO_EXCEPT(cudaEventSynchronize( event )); - CUDA_CHECK_NO_EXCEPT(cudaEventDestroy( event )); - + PMACC_ASSERT(refCounter == 0u); + log(ggLog::CUDA_RT() + ggLog::EVENT(), "sync and delete event"); + // free cupla event + CUDA_CHECK_NO_EXCEPT(cuplaEventSynchronize(event)); + CUDA_CHECK_NO_EXCEPT(cuplaEventDestroy(event)); } void CudaEvent::registerHandle() @@ -54,49 +51,49 @@ namespace pmacc void CudaEvent::releaseHandle() { - assert( refCounter != 0u ); + assert(refCounter != 0u); // get old value and decrement uint32_t oldCounter = refCounter--; - if( oldCounter == 1u ) + if(oldCounter == 1u) { // reset event meta data isRecorded = false; finished = true; - Environment<>::get().EventPool( ).push( this ); + Environment<>::get().EventPool().push(this); } } bool CudaEvent::isFinished() { - // avoid cuda driver calls if event is already finished - if( finished ) + // avoid cupla driver calls if event is already finished + if(finished) return true; - assert( isRecorded ); + assert(isRecorded); - cudaError_t rc = cudaEventQuery(event); + cuplaError_t rc = cuplaEventQuery(event); - if(rc == cudaSuccess) + if(rc == cuplaSuccess) { finished = true; return true; } - else if(rc == cudaErrorNotReady) + else if(rc == cuplaErrorNotReady) return false; else - PMACC_PRINT_CUDA_ERROR_AND_THROW(rc, "Event query failed"); + PMACC_PRINT_CUPLA_ERROR_AND_THROW(rc, "Event query failed"); } - void CudaEvent::recordEvent(cudaStream_t stream) + void CudaEvent::recordEvent(cuplaStream_t stream) { /* disallow double recording */ assert(isRecorded == false); isRecorded = true; finished = false; this->stream = stream; - CUDA_CHECK(cudaEventRecord(event, stream)); + CUDA_CHECK(cuplaEventRecord(event, stream)); } -} // namepsace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/CudaEventHandle.hpp b/include/pmacc/eventSystem/events/CudaEventHandle.hpp index c105240a42..34fd1854c3 100644 --- a/include/pmacc/eventSystem/events/CudaEventHandle.hpp +++ b/include/pmacc/eventSystem/events/CudaEventHandle.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -29,106 +29,101 @@ namespace pmacc { - -/** handle to CudaEvent */ -class CudaEventHandle -{ -private: - - /** pointer to the CudaEvent */ - CudaEvent* event; - -public: - - /** create invalid handle */ - CudaEventHandle( ) : event( nullptr ) - { - - } - - /** create a handle to a valid CudaEvent - * - * @param evPointer pointer to a CudaEvent - */ - CudaEventHandle( CudaEvent* const evPointer ) : event( evPointer ) - { - event->registerHandle(); - } - - CudaEventHandle( const CudaEventHandle& other ) : event( nullptr ) - { - /* register and release handle is done by the assign operator */ - *this = other; - } - - /** assign an event handle - * - * undefined behavior if the other event handle is equal to this instance - * - * @param other event handle - * @return this handle - */ - CudaEventHandle& - operator=( const CudaEventHandle& other ) - { - /* check if an old event is overwritten */ - if( event ) - event->releaseHandle( ); - event = other.event; - /* check that new event pointer is not nullptr */ - if( event ) - event->registerHandle( ); - return *this; - } - - /** Destructor */ - ~CudaEventHandle( ) - { - if( event ) - event->releaseHandle( ); - event = nullptr; - } - - /** - * get native cuda event - * - * @return native cuda event - */ - cudaEvent_t operator*( ) const - { - assert( event ); - return **event; - } - - /** check whether the event is finished - * - * @return true if event is finished else false - */ - bool isFinished( ) - { - PMACC_ASSERT( event ); - return event->isFinished( ); - } - - - /** get stream in which this event is recorded - * - * @return native cuda stream - */ - cudaStream_t getStream( ) const - { - PMACC_ASSERT( event ); - return event->getStream( ); - } - - /** record event in a device stream - * - * @param stream native cuda stream - */ - void recordEvent( cudaStream_t stream ) + /** handle to CudaEvent */ + class CudaEventHandle { - PMACC_ASSERT( event ); - event->recordEvent( stream ); - } -}; -} + private: + /** pointer to the CudaEvent */ + CudaEvent* event; + + public: + /** create invalid handle */ + CudaEventHandle() : event(nullptr) + { + } + + /** create a handle to a valid CudaEvent + * + * @param evPointer pointer to a CudaEvent + */ + CudaEventHandle(CudaEvent* const evPointer) : event(evPointer) + { + event->registerHandle(); + } + + CudaEventHandle(const CudaEventHandle& other) : event(nullptr) + { + /* register and release handle is done by the assign operator */ + *this = other; + } + + /** assign an event handle + * + * undefined behavior if the other event handle is equal to this instance + * + * @param other event handle + * @return this handle + */ + CudaEventHandle& operator=(const CudaEventHandle& other) + { + /* check if an old event is overwritten */ + if(event) + event->releaseHandle(); + event = other.event; + /* check that new event pointer is not nullptr */ + if(event) + event->registerHandle(); + return *this; + } + + /** Destructor */ + ~CudaEventHandle() + { + if(event) + event->releaseHandle(); + event = nullptr; + } + + /** + * get native cupla event + * + * @return native cupla event + */ + cuplaEvent_t operator*() const + { + assert(event); + return **event; + } + + /** check whether the event is finished + * + * @return true if event is finished else false + */ + bool isFinished() + { + PMACC_ASSERT(event); + return event->isFinished(); + } + + + /** get stream in which this event is recorded + * + * @return native cupla stream + */ + cuplaStream_t getStream() const + { + PMACC_ASSERT(event); + return event->getStream(); + } + + /** record event in a device stream + * + * @param stream native cupla stream + */ + void recordEvent(cuplaStream_t stream) + { + PMACC_ASSERT(event); + event->recordEvent(stream); + } + }; +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/EventDataReceive.hpp b/include/pmacc/eventSystem/events/EventDataReceive.hpp index d6cb1bbd96..91381a9577 100644 --- a/include/pmacc/eventSystem/events/EventDataReceive.hpp +++ b/include/pmacc/eventSystem/events/EventDataReceive.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -26,14 +26,12 @@ namespace pmacc { - class EventDataReceive : public IEventData { public: - EventDataReceive(EventNotify *task, size_t recv_count) : - IEventData(task), - recv_count(recv_count) - {} + EventDataReceive(EventNotify* task, size_t recv_count) : IEventData(task), recv_count(recv_count) + { + } size_t getReceivedCount() const { @@ -42,7 +40,6 @@ namespace pmacc private: size_t recv_count; - }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/EventNotify.hpp b/include/pmacc/eventSystem/events/EventNotify.hpp index 84b5236d53..efeabaa71b 100644 --- a/include/pmacc/eventSystem/events/EventNotify.hpp +++ b/include/pmacc/eventSystem/events/EventNotify.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -28,7 +28,6 @@ namespace pmacc { - class IEventData; class IEvent; @@ -38,7 +37,6 @@ namespace pmacc class EventNotify { public: - virtual ~EventNotify() { } @@ -67,12 +65,10 @@ namespace pmacc * @param type the type of this notification * @param data data passed to observers */ - void notify(id_t eventId, EventType type, IEventData *data); + void notify(id_t eventId, EventType type, IEventData* data); private: std::set observers; - }; -} //namespace pmacc - +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/EventNotify.tpp b/include/pmacc/eventSystem/events/EventNotify.tpp index b8bb0f4517..3da67a684e 100644 --- a/include/pmacc/eventSystem/events/EventNotify.tpp +++ b/include/pmacc/eventSystem/events/EventNotify.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -30,27 +30,25 @@ namespace pmacc { - - inline void EventNotify::notify( id_t eventId, EventType type, IEventData *data ) + inline void EventNotify::notify(id_t eventId, EventType type, IEventData* data) + { + std::set::iterator iter = observers.begin(); + for(; iter != observers.end(); iter++) { - std::set::iterator iter = observers.begin( ); - for (; iter != observers.end( ); iter++ ) - { - if ( *iter != nullptr ) - ( *iter )->event( eventId, type, data ); - } - /* if notify is not called from destructor - * other tasks can register after this call. - * But any ITask must call this function in destrctor again" - */ - observers.clear( ); - - /** - * \TODO are we sure that data won't be deleted anywhere else? - * if (data != nullptr) - * delete data; - **/ - + if(*iter != nullptr) + (*iter)->event(eventId, type, data); } - -} //namespace pmacc + /* if notify is not called from destructor + * other tasks can register after this call. + * But any ITask must call this function in destrctor again" + */ + observers.clear(); + + /** + * \TODO are we sure that data won't be deleted anywhere else? + * if (data != nullptr) + * delete data; + **/ + } + +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/EventPool.hpp b/include/pmacc/eventSystem/events/EventPool.hpp index 87c03ffd9f..3340d552cf 100644 --- a/include/pmacc/eventSystem/events/EventPool.hpp +++ b/include/pmacc/eventSystem/events/EventPool.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -33,26 +33,24 @@ namespace pmacc { - - /** Manages a pool of cudaEvent_t objects and gives access to them. */ + /** Manages a pool of cuplaEvent_t objects and gives access to them. */ class EventPool { public: - - /** Returns a free cuda event + /** Returns a free cupla event * - * @return free cuda event + * @return free cupla event */ - CudaEventHandle pop( ) + CudaEventHandle pop() { - if( freeEvents.size( ) != 0 ) + if(freeEvents.size() != 0) { - CudaEventHandle result = freeEvents.front( ); - freeEvents.pop_front( ); + CudaEventHandle result = freeEvents.front(); + freeEvents.pop_front(); return result; } - createEvents( ); - return pop( ); + createEvents(); + return pop(); } @@ -62,67 +60,66 @@ namespace pmacc * * @param ev pointer to CudaEvent */ - void push( CudaEvent* const ev ) + void push(CudaEvent* const ev) { /* Guard that no event is added during the pool is closed (shutdown phase). * This method is also called during the evaluation of the destructor. */ - if( !isClosed ) - freeEvents.push_back( CudaEventHandle(ev) ); + if(!isClosed) + freeEvents.push_back(CudaEventHandle(ev)); } - /** create and add cuda events to the pool + /** create and add cupla events to the pool * - * @param count number of cuda events to add + * @param count number of cupla events to add */ - void createEvents( size_t count = 1u ) + void createEvents(size_t count = 1u) { - for( size_t i = 0u; i < count; i++ ) + for(size_t i = 0u; i < count; i++) { - CudaEvent* nativeEvent = new CudaEvent( ); - events.push_back( nativeEvent ); - push( nativeEvent ); + CudaEvent* nativeEvent = new CudaEvent(); + events.push_back(nativeEvent); + push(nativeEvent); } } - /** Returns the number of cuda events in the pool. + /** Returns the number of cupla events in the pool. * - * @return number of cuda events + * @return number of cupla events */ - size_t getEventsCount( ) + size_t getEventsCount() { - return events.size( ); + return events.size(); } private: - friend struct detail::Environment; - static EventPool& getInstance( ) + static EventPool& getInstance() { static EventPool instance; return instance; } /** Constructor */ - EventPool( ) : isClosed( false ) + EventPool() : isClosed(false) { } /** Destructor * - * destroys all cuda events in the pool + * destroys all cupla events in the pool */ ~EventPool() { - log( ggLog::CUDA_RT( )+ggLog::EVENT( ), "shutdown EventPool with %1% events" ) % getEventsCount( ); + log(ggLog::CUDA_RT() + ggLog::EVENT(), "shutdown EventPool with %1% events") % getEventsCount(); isClosed = true; - freeEvents.clear( ); - for( std::vector::const_iterator iter = events.begin(); iter != events.end(); ++iter ) + freeEvents.clear(); + for(std::vector::const_iterator iter = events.begin(); iter != events.end(); ++iter) { delete *iter; } - events.clear( ); + events.clear(); } //! hold all CudaEvents @@ -137,4 +134,4 @@ namespace pmacc */ bool isClosed; }; -} +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/EventTask.hpp b/include/pmacc/eventSystem/events/EventTask.hpp index 20987b15e5..bc4c6abaa6 100644 --- a/include/pmacc/eventSystem/events/EventTask.hpp +++ b/include/pmacc/eventSystem/events/EventTask.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -27,7 +27,6 @@ namespace pmacc { - /** * EventTask is used for task-synchronization in the event system. * @@ -37,7 +36,6 @@ namespace pmacc class EventTask { public: - /** * Constructor. * @@ -45,6 +43,8 @@ namespace pmacc */ EventTask(id_t taskId); + constexpr EventTask(const pmacc::EventTask&) = default; + /** * Constructor. */ @@ -78,7 +78,7 @@ namespace pmacc * * @param other EventTask to add to this task */ - EventTask operator+(const EventTask & other); + EventTask operator+(const EventTask& other); /** * Adds two tasks (this task and other) and creates @@ -86,22 +86,19 @@ namespace pmacc * * @param other EventTask to add to this task */ - EventTask& operator+=(const EventTask & other); + EventTask& operator+=(const EventTask& other); /** * Copies attributes from other to this task. * * This task effectively becomes other. */ - EventTask & operator=(const EventTask & other); + EventTask& operator=(const EventTask& other); std::string toString(); private: - id_t taskId; }; -} //namespace pmacc - - +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/EventTask.tpp b/include/pmacc/eventSystem/events/EventTask.tpp index 01793454f7..55c0cea8ba 100644 --- a/include/pmacc/eventSystem/events/EventTask.tpp +++ b/include/pmacc/eventSystem/events/EventTask.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -27,19 +27,18 @@ namespace pmacc { + inline EventTask::EventTask(id_t taskId) : taskId(taskId) + { + } - inline EventTask::EventTask(id_t taskId) : - taskId(taskId) - {} - - inline EventTask::EventTask() : - taskId(0) - {} + inline EventTask::EventTask() : taskId(0) + { + } inline std::string EventTask::toString() { - ITask* task=Environment<>::get().Manager().getITaskIfNotFinished(taskId); - if(task!=nullptr) + ITask* task = Environment<>::get().Manager().getITaskIfNotFinished(taskId); + if(task != nullptr) return task->toString(); return std::string(); @@ -60,48 +59,46 @@ namespace pmacc Environment<>::get().Manager().waitForFinished(taskId); } - inline EventTask EventTask::operator+(const EventTask & other) + inline EventTask EventTask::operator+(const EventTask& other) { - EventTask tmp=*this; - return tmp+=other; + EventTask tmp = *this; + return tmp += other; } - inline EventTask& EventTask::operator+=(const EventTask & other) + inline EventTask& EventTask::operator+=(const EventTask& other) { // If one of the two tasks is already finished, the other task is returned. // Otherwise, a TaskLogicalAnd is created and added to the Manager's queue. Manager& manager = Environment<>::get().Manager(); - if(this->taskId==other.taskId) + if(this->taskId == other.taskId) return *this; ITask* myTask = manager.getITaskIfNotFinished(this->taskId); - if(myTask==nullptr) + if(myTask == nullptr) { - this->taskId=other.taskId; + this->taskId = other.taskId; return *this; } ITask* otherTask = manager.getITaskIfNotFinished(other.taskId); - if(otherTask==nullptr) + if(otherTask == nullptr) { return *this; } - TaskLogicalAnd *taskAnd = new TaskLogicalAnd(myTask, - otherTask); - this->taskId=taskAnd->getId(); + TaskLogicalAnd* taskAnd = new TaskLogicalAnd(myTask, otherTask); + this->taskId = taskAnd->getId(); manager.addPassiveTask(taskAnd); return *this; } - inline EventTask& EventTask::operator=(const EventTask & other) + inline EventTask& EventTask::operator=(const EventTask& other) { - //this is faster than a copy constructor + // this is faster than a copy constructor taskId = other.taskId; return *this; } -} - +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/IEvent.hpp b/include/pmacc/eventSystem/events/IEvent.hpp index 8c6d703a7a..6885ff01f6 100644 --- a/include/pmacc/eventSystem/events/IEvent.hpp +++ b/include/pmacc/eventSystem/events/IEvent.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -26,7 +26,6 @@ namespace pmacc { - class IEventData; /** @@ -35,7 +34,6 @@ namespace pmacc class IEvent { public: - /** * Destructor. */ @@ -53,4 +51,4 @@ namespace pmacc virtual void event(id_t eventId, EventType type, IEventData* data) = 0; }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/IEventData.hpp b/include/pmacc/eventSystem/events/IEventData.hpp index 773c820745..0accf82b3f 100644 --- a/include/pmacc/eventSystem/events/IEventData.hpp +++ b/include/pmacc/eventSystem/events/IEventData.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -35,13 +35,13 @@ namespace pmacc class IEventData { public: - - IEventData(EventNotify *task) : - task(task) - {} + IEventData(EventNotify* task) : task(task) + { + } virtual ~IEventData() - {} + { + } EventNotify* getEventNotify() { @@ -49,8 +49,7 @@ namespace pmacc } protected: - EventNotify *task; - + EventNotify* task; }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/events/kernelEvents.hpp b/include/pmacc/eventSystem/events/kernelEvents.hpp index 9c72393922..445ef77a16 100644 --- a/include/pmacc/eventSystem/events/kernelEvents.hpp +++ b/include/pmacc/eventSystem/events/kernelEvents.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -32,241 +32,176 @@ #include - /* No namespace in this file since we only declare macro defines */ /*if this flag is defined all kernel calls would be checked and synchronize * this flag must set by the compiler or inside of the Makefile */ -#if( PMACC_SYNC_KERNEL == 1 ) -# define CUDA_CHECK_KERNEL_MSG(...) CUDA_CHECK_MSG(__VA_ARGS__) +#if(PMACC_SYNC_KERNEL == 1) +# define CUDA_CHECK_KERNEL_MSG(...) CUDA_CHECK_MSG(__VA_ARGS__) #else - /*no synchronize and check of kernel calls*/ -# define CUDA_CHECK_KERNEL_MSG(...) ; +/*no synchronize and check of kernel calls*/ +# define CUDA_CHECK_KERNEL_MSG(...) ; #endif namespace pmacc { -namespace exec -{ - /** configured kernel object - * - * this objects contains the functor and the starting parameter - * - * @tparam T_Kernel pmacc Kernel object - * @tparam T_VectorGrid type which defines the grid extents (type must be castable to CUDA dim3) - * @tparam T_VectorBlock type which defines the block extents (type must be castable to CUDA dim3) - */ - template< - typename T_Kernel, - typename T_VectorGrid, - typename T_VectorBlock - > - struct KernelStarter; - - /** wrapper for the user kernel functor - * - * contains debug information like filename and line of the kernel call - */ - template< typename T_KernelFunctor > - struct Kernel + namespace exec { - using KernelType = T_KernelFunctor; - /** functor */ - T_KernelFunctor const m_kernelFunctor; - /** file name from where the kernel is called */ - std::string const m_file; - /** line number in the file */ - size_t const m_line; - - /** - * - * @param gridExtent grid extent configuration for the kernel - * @param blockExtent block extent configuration for the kernel - * @param sharedMemByte dynamic shared memory used by the kernel (in byte ) - * @return - */ - HINLINE Kernel( - T_KernelFunctor const & kernelFunctor, - std::string const & file = std::string(), - size_t const line = 0 - ) : - m_kernelFunctor( kernelFunctor ), - m_file( file ), - m_line( line ) - { - - } - /** configured kernel object * * this objects contains the functor and the starting parameter * - * @tparam T_VectorGrid type which defines the grid extents (type must be castable to CUDA dim3) - * @tparam T_VectorBlock type which defines the block extents (type must be castable to CUDA dim3) - * - * @param gridExtent grid extent configuration for the kernel - * @param blockExtent block extent configuration for the kernel - * @param sharedMemByte dynamic shared memory used by the kernel (in byte) + * @tparam T_Kernel pmacc Kernel object + * @tparam T_VectorGrid type which defines the grid extents (type must be castable to cupla dim3) + * @tparam T_VectorBlock type which defines the block extents (type must be castable to cupla dim3) */ - template< - typename T_VectorGrid, - typename T_VectorBlock - > - HINLINE - auto - operator()( - T_VectorGrid const & gridExtent, - T_VectorBlock const & blockExtent, - size_t const sharedMemByte = 0 - ) const - -> KernelStarter< - Kernel, - T_VectorGrid, - T_VectorBlock - >; - }; - - - template< - typename T_Kernel, - typename T_VectorGrid, - typename T_VectorBlock - > - struct KernelStarter - { - /** kernel functor */ - T_Kernel const m_kernel; - /** grid extents for the kernel */ - T_VectorGrid const m_gridExtent; - /** block extents for the kernel */ - T_VectorBlock const m_blockExtent; - /** dynamic shared memory consumed by the kernel (in byte) */ - size_t const m_sharedMemByte; + template + struct KernelStarter; - /** kernel starter object + /** wrapper for the user kernel functor * - * @param kernel pmacc Kernel + * contains debug information like filename and line of the kernel call */ - HINLINE KernelStarter( - T_Kernel const & kernel, - T_VectorGrid const & gridExtent, - T_VectorBlock const & blockExtent, - size_t const sharedMemByte - ) : - m_kernel( kernel ), - m_gridExtent( gridExtent ), - m_blockExtent( blockExtent ), - m_sharedMemByte( sharedMemByte ) + template + struct Kernel { - - } - - /** execute the kernel functor - * - * @tparam T_Args types of the arguments - * @param args arguments for the kernel functor + using KernelType = T_KernelFunctor; + /** functor */ + T_KernelFunctor const m_kernelFunctor; + /** file name from where the kernel is called */ + std::string const m_file; + /** line number in the file */ + size_t const m_line; + + /** + * + * @param gridExtent grid extent configuration for the kernel + * @param blockExtent block extent configuration for the kernel + * @param sharedMemByte dynamic shared memory used by the kernel (in byte ) + * @return + */ + HINLINE Kernel( + T_KernelFunctor const& kernelFunctor, + std::string const& file = std::string(), + size_t const line = 0) + : m_kernelFunctor(kernelFunctor) + , m_file(file) + , m_line(line) + { + } + + /** configured kernel object + * + * this objects contains the functor and the starting parameter + * + * @tparam T_VectorGrid type which defines the grid extents (type must be castable to cupla dim3) + * @tparam T_VectorBlock type which defines the block extents (type must be castable to cupla dim3) + * + * @param gridExtent grid extent configuration for the kernel + * @param blockExtent block extent configuration for the kernel + * @param sharedMemByte dynamic shared memory used by the kernel (in byte) + */ + template + HINLINE auto operator()( + T_VectorGrid const& gridExtent, + T_VectorBlock const& blockExtent, + size_t const sharedMemByte = 0) const -> KernelStarter; + }; + + + template + struct KernelStarter + { + /** kernel functor */ + T_Kernel const m_kernel; + /** grid extents for the kernel */ + T_VectorGrid const m_gridExtent; + /** block extents for the kernel */ + T_VectorBlock const m_blockExtent; + /** dynamic shared memory consumed by the kernel (in byte) */ + size_t const m_sharedMemByte; + + /** kernel starter object + * + * @param kernel pmacc Kernel + */ + HINLINE KernelStarter( + T_Kernel const& kernel, + T_VectorGrid const& gridExtent, + T_VectorBlock const& blockExtent, + size_t const sharedMemByte) + : m_kernel(kernel) + , m_gridExtent(gridExtent) + , m_blockExtent(blockExtent) + , m_sharedMemByte(sharedMemByte) + { + } + + /** execute the kernel functor + * + * @tparam T_Args types of the arguments + * @param args arguments for the kernel functor + * + * @{ + */ + template + HINLINE void operator()(T_Args const&... args) const + { + std::string const kernelName = typeid(m_kernel.m_kernelFunctor).name(); + std::string const kernelInfo = kernelName + std::string(" [") + m_kernel.m_file + std::string(":") + + std::to_string(m_kernel.m_line) + std::string(" ]"); + + CUDA_CHECK_KERNEL_MSG(cuplaDeviceSynchronize(), std::string("Crash before kernel call ") + kernelInfo); + + pmacc::TaskKernel* taskKernel + = pmacc::Environment<>::get().Factory().createTaskKernel(typeid(kernelName).name()); + + DataSpace::value> gridExtent(m_gridExtent); + + DataSpace::value> blockExtent(m_blockExtent); + + CUPLA_KERNEL(typename T_Kernel::KernelType) + (gridExtent.toDim3(), blockExtent.toDim3(), m_sharedMemByte, taskKernel->getCudaStream())(args...); + CUDA_CHECK_KERNEL_MSG( + cuplaGetLastError(), + std::string("Last error after kernel launch ") + kernelInfo); + CUDA_CHECK_KERNEL_MSG( + cuplaDeviceSynchronize(), + std::string("Crash after kernel launch ") + kernelInfo); + taskKernel->activateChecks(); + CUDA_CHECK_KERNEL_MSG( + cuplaDeviceSynchronize(), + std::string("Crash after kernel activation") + kernelInfo); + } + + template + HINLINE void operator()(T_Args const&... args) + { + return static_cast(*this)(args...); + } + + /** @} */ + }; + + + /** creates a kernel object * - * @{ + * @tparam T_KernelFunctor type of the kernel functor + * @param kernelFunctor instance of the functor + * @param file file name (for debug) + * @param line line number in the file (for debug) */ - template< - typename ... T_Args - > - HINLINE - void - operator()( - T_Args const & ... args - ) const - { - - std::string const kernelName = typeid( m_kernel.m_kernelFunctor ).name(); - std::string const kernelInfo = kernelName + - std::string( " [" ) + m_kernel.m_file + std::string( ":" ) + - std::to_string( m_kernel.m_line ) + std::string( " ]" ); - - CUDA_CHECK_KERNEL_MSG( - cudaDeviceSynchronize( ), - std::string( "Crash before kernel call " ) + kernelInfo - ); - - pmacc::TaskKernel* taskKernel = pmacc::Environment<>::get().Factory().createTaskKernel( - typeid( kernelName ).name() - ); - - DataSpace< - traits::GetNComponents< - T_VectorGrid - >::value - > gridExtent( m_gridExtent ); - - DataSpace< - traits::GetNComponents< - T_VectorBlock - >::value - > blockExtent( m_blockExtent ); - - CUPLA_KERNEL( typename T_Kernel::KernelType )( - gridExtent.toDim3(), - blockExtent.toDim3(), - m_sharedMemByte, - taskKernel->getCudaStream() - )( - args ... - ); - CUDA_CHECK_KERNEL_MSG( - cudaGetLastError( ), - std::string( "Last error after kernel launch " ) + kernelInfo - ); - CUDA_CHECK_KERNEL_MSG( - cudaDeviceSynchronize( ), - std::string( "Crash after kernel launch " ) + kernelInfo - ); - taskKernel->activateChecks( ); - CUDA_CHECK_KERNEL_MSG( - cudaDeviceSynchronize( ), - std::string( "Crash after kernel activation" ) + kernelInfo - ); - } - - template< - typename ... T_Args - > - HINLINE - void - operator()( - T_Args const &... args - ) + template + auto kernel( + T_KernelFunctor const& kernelFunctor, + std::string const& file = std::string(), + size_t const line = 0) -> Kernel { - return static_cast< const KernelStarter & >(*this)( args ... ); + return Kernel(kernelFunctor, file, line); } - - /** @} */ - - }; - - - /** creates a kernel object - * - * @tparam T_KernelFunctor type of the kernel functor - * @param kernelFunctor instance of the functor - * @param file file name (for debug) - * @param line line number in the file (for debug) - */ - template< typename T_KernelFunctor > - auto kernel( - T_KernelFunctor const & kernelFunctor, - std::string const & file = std::string(), - size_t const line = 0 - ) -> Kernel< T_KernelFunctor > - { - return Kernel< T_KernelFunctor >( - kernelFunctor, - file, - line - ); - } -} // namespace exec + } // namespace exec } // namespace pmacc @@ -276,7 +211,7 @@ namespace exec * * @param ... instance of kernel functor */ -#define PMACC_KERNEL( ... ) ::pmacc::exec::kernel( __VA_ARGS__, __FILE__, static_cast< size_t >( __LINE__ ) ) +#define PMACC_KERNEL(...) ::pmacc::exec::kernel(__VA_ARGS__, __FILE__, static_cast(__LINE__)) #include "pmacc/eventSystem/events/kernelEvents.tpp" diff --git a/include/pmacc/eventSystem/events/kernelEvents.tpp b/include/pmacc/eventSystem/events/kernelEvents.tpp index 9efdd38e2c..30a4fd6c88 100644 --- a/include/pmacc/eventSystem/events/kernelEvents.tpp +++ b/include/pmacc/eventSystem/events/kernelEvents.tpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera * * This file is part of PMacc. * @@ -28,36 +28,16 @@ namespace pmacc { -namespace exec -{ - template< typename T_KernelFunctor > - template< - typename T_VectorGrid, - typename T_VectorBlock - > - HINLINE - auto - Kernel< T_KernelFunctor >::operator()( - T_VectorGrid const & gridExtent, - T_VectorBlock const & blockExtent, - size_t const sharedMemByte - ) const - -> KernelStarter< - Kernel, - T_VectorGrid, - T_VectorBlock - > + namespace exec { - return KernelStarter< - Kernel, - T_VectorGrid, - T_VectorBlock - >( - *this, - gridExtent, - blockExtent, - sharedMemByte - ); - } -} // namespace exec + template + template + HINLINE auto Kernel::operator()( + T_VectorGrid const& gridExtent, + T_VectorBlock const& blockExtent, + size_t const sharedMemByte) const -> KernelStarter + { + return KernelStarter(*this, gridExtent, blockExtent, sharedMemByte); + } + } // namespace exec } // namespace pmacc diff --git a/include/pmacc/eventSystem/streams/EventStream.hpp b/include/pmacc/eventSystem/streams/EventStream.hpp index 4919badec5..784b5bd1ab 100644 --- a/include/pmacc/eventSystem/streams/EventStream.hpp +++ b/include/pmacc/eventSystem/streams/EventStream.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -25,57 +25,54 @@ #include "pmacc/types.hpp" - namespace pmacc { - -/** - * Wrapper for a single cuda stream. - * Allows recording cuda events on the stream. - */ -class EventStream -{ -public: - /** - * Constructor. - * Creates the cudaStream_t object. + * Wrapper for a single cupla stream. + * Allows recording cupla events on the stream. */ - EventStream() : stream(nullptr) + class EventStream { - CUDA_CHECK(cudaStreamCreate(&stream)); - } + public: + /** + * Constructor. + * Creates the cuplaStream_t object. + */ + EventStream() : stream(nullptr) + { + CUDA_CHECK(cuplaStreamCreate(&stream)); + } - /** - * Destructor. - * Waits for the stream to finish and destroys it. - */ - virtual ~EventStream() - { - // wait for all kernels in stream to finish - CUDA_CHECK_NO_EXCEPT(cudaStreamSynchronize(stream)); - CUDA_CHECK_NO_EXCEPT(cudaStreamDestroy(stream)); - } + /** + * Destructor. + * Waits for the stream to finish and destroys it. + */ + virtual ~EventStream() + { + // wait for all kernels in stream to finish + CUDA_CHECK_NO_EXCEPT(cuplaStreamSynchronize(stream)); + CUDA_CHECK_NO_EXCEPT(cuplaStreamDestroy(stream)); + } - /** - * Returns the cudaStream_t object associated with this EventStream. - * @return the internal cuda stream object - */ - cudaStream_t getCudaStream() const - { - return stream; - } + /** + * Returns the cuplaStream_t object associated with this EventStream. + * @return the internal cupla stream object + */ + cuplaStream_t getCudaStream() const + { + return stream; + } - void waitOn(const CudaEventHandle& ev) - { - if (this->stream != ev.getStream()) + void waitOn(const CudaEventHandle& ev) { - CUDA_CHECK(cudaStreamWaitEvent(this->getCudaStream(), *ev, 0)); + if(this->stream != ev.getStream()) + { + CUDA_CHECK(cuplaStreamWaitEvent(this->getCudaStream(), *ev, 0)); + } } - } -private: - cudaStream_t stream; -}; + private: + cuplaStream_t stream; + }; -} +} // namespace pmacc diff --git a/include/pmacc/eventSystem/streams/StreamController.hpp b/include/pmacc/eventSystem/streams/StreamController.hpp index 2148eacda9..be6c654055 100644 --- a/include/pmacc/eventSystem/streams/StreamController.hpp +++ b/include/pmacc/eventSystem/streams/StreamController.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -28,7 +28,6 @@ #include "pmacc/Environment.def" - #include #include #include @@ -42,7 +41,6 @@ namespace pmacc class StreamController { public: - /** * Returns a pointer to the next EventStream in the controller's queue. * @return pointer to next EventStream @@ -50,10 +48,11 @@ namespace pmacc EventStream* getNextStream() { if(!isActivated) - throw std::runtime_error(std::string("StreamController is not activated but getNextStream() was called")); + throw std::runtime_error( + std::string("StreamController is not activated but getNextStream() was called")); size_t oldIndex = currentStreamIndex; currentStreamIndex++; - if (currentStreamIndex == streams.size()) + if(currentStreamIndex == streams.size()) currentStreamIndex = 0; return streams[oldIndex]; @@ -65,8 +64,7 @@ namespace pmacc */ virtual ~StreamController() { - - for (size_t i = 0; i < streams.size(); i++) + for(size_t i = 0; i < streams.size(); i++) { __delete(streams[i]); } @@ -74,8 +72,8 @@ namespace pmacc /* This is the single point in PIC where ALL CUDA work must be finished. */ /* Accessing CUDA objects after this point may fail! */ - CUDA_CHECK_NO_EXCEPT(cudaDeviceSynchronize()); - CUDA_CHECK_NO_EXCEPT(cudaDeviceReset()); + CUDA_CHECK_NO_EXCEPT(cuplaDeviceSynchronize()); + CUDA_CHECK_NO_EXCEPT(cuplaDeviceReset()); } /** @@ -84,7 +82,7 @@ namespace pmacc */ void addStreams(size_t count) { - for (size_t i = 0; i < count; i++) + for(size_t i = 0; i < count; i++) { streams.push_back(new EventStream()); } @@ -97,7 +95,7 @@ namespace pmacc void activate() { addStreams(1); - isActivated=true; + isActivated = true; } /** @@ -110,13 +108,12 @@ namespace pmacc } private: - friend struct detail::Environment; /** * Constructor. */ - StreamController() : isActivated(false),currentStreamIndex(0) + StreamController() : isActivated(false), currentStreamIndex(0) { } @@ -134,7 +131,6 @@ namespace pmacc std::vector streams; size_t currentStreamIndex; bool isActivated; - }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/Factory.hpp b/include/pmacc/eventSystem/tasks/Factory.hpp index 22440e8564..b4a5a44a48 100644 --- a/include/pmacc/eventSystem/tasks/Factory.hpp +++ b/include/pmacc/eventSystem/tasks/Factory.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -30,13 +30,13 @@ namespace pmacc { - template + template class HostBuffer; - template + template class DeviceBuffer; - template + template class Exchange; class TaskKernel; @@ -49,110 +49,126 @@ namespace pmacc class Factory { public: - /** * creates a TaskCopyHostToDevice * @param src HostBuffer to copy data from * @param dst DeviceBuffer to copy data to - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ - template - EventTask createTaskCopyHostToDevice(HostBuffer& src, DeviceBuffer& dst, - ITask *registeringTask = nullptr); + template + EventTask createTaskCopyHostToDevice( + HostBuffer& src, + DeviceBuffer& dst, + ITask* registeringTask = nullptr); /** * creates a TaskCopyDeviceToHost * @param src DeviceBuffer to copy data from * @param dst HostBuffer to copy data to - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ - template - EventTask createTaskCopyDeviceToHost(DeviceBuffer& src, - HostBuffer& dst, - ITask *registeringTask = nullptr); + template + EventTask createTaskCopyDeviceToHost( + DeviceBuffer& src, + HostBuffer& dst, + ITask* registeringTask = nullptr); /** * creates a TaskCopyDeviceToDevice * @param src DeviceBuffer to copy data from * @param dst DeviceBuffer to copy data to - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ - template - EventTask createTaskCopyDeviceToDevice( DeviceBuffer& src, DeviceBuffer& dst, - ITask *registeringTask = nullptr); + template + EventTask createTaskCopyDeviceToDevice( + DeviceBuffer& src, + DeviceBuffer& dst, + ITask* registeringTask = nullptr); /** * Creates a TaskReceive. * @param ex Exchange to create new TaskReceive with - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ - template - EventTask createTaskReceive(Exchange &ex, - ITask *registeringTask = nullptr); + template + EventTask createTaskReceive(Exchange& ex, ITask* registeringTask = nullptr); /** * Creates a TaskSend. * @param ex Exchange to create new TaskSend with - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ - template - EventTask createTaskSend(Exchange &ex, - ITask *registeringTask = nullptr); + template + EventTask createTaskSend(Exchange& ex, ITask* registeringTask = nullptr); /** * Creates a TaskSendMPI. * @param exchange Exchange to create new TaskSendMPI with - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ - template - EventTask createTaskSendMPI(Exchange *ex, - ITask *registeringTask = nullptr); + template + EventTask createTaskSendMPI(Exchange* ex, ITask* registeringTask = nullptr); /** * Creates a TaskReceiveMPI. * @param ex Exchange to create new TaskReceiveMPI with - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ - template - EventTask createTaskReceiveMPI(Exchange *ex, - ITask *registeringTask = nullptr); + template + EventTask createTaskReceiveMPI(Exchange* ex, ITask* registeringTask = nullptr); /** * Creates a new TaskSetValue. * @param dst destination DeviceBuffer to set value on * @param value value to be set in the DeviceBuffer - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ - template - EventTask createTaskSetValue(DeviceBuffer& dst, const TYPE& value, - ITask *registeringTask = nullptr); + template + EventTask createTaskSetValue( + DeviceBuffer& dst, + const TYPE& value, + ITask* registeringTask = nullptr); /** * Creates a new TaskSetCurrentSizeOnDevice. * @param dst destination DeviceBuffer to set current size on * @param size size to be set on DeviceBuffer - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ - template - EventTask createTaskSetCurrentSizeOnDevice(DeviceBuffer& dst, size_t size, - ITask *registeringTask = nullptr); + template + EventTask createTaskSetCurrentSizeOnDevice( + DeviceBuffer& dst, + size_t size, + ITask* registeringTask = nullptr); /** * Creates a new TaskGetCurrentSizeFromDevic. * @param buffer DeviceBuffer to get current size from - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ - template - EventTask createTaskGetCurrentSizeFromDevice(DeviceBuffer& buffer, - ITask *registeringTask = nullptr); + template + EventTask createTaskGetCurrentSizeFromDevice( + DeviceBuffer& buffer, + ITask* registeringTask = nullptr); /** * Creates a new TaskKernel. * @param kernelname name of the kernel which should be called - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer * @return the newly created TaskKernel */ - TaskKernel* createTaskKernel(std::string kernelname, ITask *registeringTask = nullptr); + TaskKernel* createTaskKernel(std::string kernelname, ITask* registeringTask = nullptr); /** * Starts a task by initialising it and adding it to the Manager's queue. @@ -160,23 +176,20 @@ namespace pmacc * @param task the ITask to start * @param registeringTask optional task which can be registered as an observer for task */ - EventTask startTask(ITask& task, ITask *registeringTask); + EventTask startTask(ITask& task, ITask* registeringTask); private: - friend struct detail::Environment; - Factory() {}; + Factory(){}; - Factory(const Factory&) { }; + Factory(const Factory&){}; static Factory& getInstance() { static Factory instance; return instance; } - }; -} //namespace pmacc - +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/Factory.tpp b/include/pmacc/eventSystem/tasks/Factory.tpp index 428e99c9cc..07e97f8538 100644 --- a/include/pmacc/eventSystem/tasks/Factory.tpp +++ b/include/pmacc/eventSystem/tasks/Factory.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -43,19 +43,19 @@ namespace pmacc { - /** * creates a TaskCopyHostToDevice * @param src HostBuffer to copy data from * @param dst DeviceBuffer to copy data to * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer */ - template - inline EventTask Factory::createTaskCopyHostToDevice(HostBuffer& src, DeviceBuffer& dst, - ITask *registeringTask) + template + inline EventTask Factory::createTaskCopyHostToDevice( + HostBuffer& src, + DeviceBuffer& dst, + ITask* registeringTask) { - - TaskCopyHostToDevice* task = new TaskCopyHostToDevice (src, dst); + TaskCopyHostToDevice* task = new TaskCopyHostToDevice(src, dst); return startTask(*task, registeringTask); } @@ -66,12 +66,13 @@ namespace pmacc * @param dst HostBuffer to copy data to * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer */ - template - inline EventTask Factory::createTaskCopyDeviceToHost(DeviceBuffer& src, - HostBuffer& dst, - ITask *registeringTask) + template + inline EventTask Factory::createTaskCopyDeviceToHost( + DeviceBuffer& src, + HostBuffer& dst, + ITask* registeringTask) { - TaskCopyDeviceToHost* task = new TaskCopyDeviceToHost (src, dst); + TaskCopyDeviceToHost* task = new TaskCopyDeviceToHost(src, dst); return startTask(*task, registeringTask); } @@ -82,11 +83,13 @@ namespace pmacc * @param dst DeviceBuffer to copy data to * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer */ - template - inline EventTask Factory::createTaskCopyDeviceToDevice( DeviceBuffer& src, DeviceBuffer& dst, - ITask *registeringTask) + template + inline EventTask Factory::createTaskCopyDeviceToDevice( + DeviceBuffer& src, + DeviceBuffer& dst, + ITask* registeringTask) { - TaskCopyDeviceToDevice* task = new TaskCopyDeviceToDevice (src, dst); + TaskCopyDeviceToDevice* task = new TaskCopyDeviceToDevice(src, dst); return startTask(*task, registeringTask); } @@ -96,11 +99,10 @@ namespace pmacc * @param ex Exchange to create new TaskReceive with * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer */ - template - inline EventTask Factory::createTaskReceive(Exchange &ex, - ITask *registeringTask) + template + inline EventTask Factory::createTaskReceive(Exchange& ex, ITask* registeringTask) { - TaskReceive* task = new TaskReceive (ex); + TaskReceive* task = new TaskReceive(ex); return startTask(*task, registeringTask); } @@ -110,11 +112,10 @@ namespace pmacc * @param ex Exchange to create new TaskSend with * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer */ - template - inline EventTask Factory::createTaskSend(Exchange &ex, - ITask *registeringTask) + template + inline EventTask Factory::createTaskSend(Exchange& ex, ITask* registeringTask) { - TaskSend* task = new TaskSend (ex); + TaskSend* task = new TaskSend(ex); return startTask(*task, registeringTask); } @@ -124,11 +125,10 @@ namespace pmacc * @param exchange Exchange to create new TaskSendMPI with * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer */ - template - inline EventTask Factory::createTaskSendMPI(Exchange *ex, - ITask *registeringTask) + template + inline EventTask Factory::createTaskSendMPI(Exchange* ex, ITask* registeringTask) { - TaskSendMPI* task = new TaskSendMPI (ex); + TaskSendMPI* task = new TaskSendMPI(ex); return startTask(*task, registeringTask); } @@ -138,11 +138,10 @@ namespace pmacc * @param ex Exchange to create new TaskReceiveMPI with * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer */ - template - inline EventTask Factory::createTaskReceiveMPI(Exchange *ex, - ITask *registeringTask) + template + inline EventTask Factory::createTaskReceiveMPI(Exchange* ex, ITask* registeringTask) { - TaskReceiveMPI* task = new TaskReceiveMPI (ex); + TaskReceiveMPI* task = new TaskReceiveMPI(ex); return startTask(*task, registeringTask); } @@ -153,20 +152,21 @@ namespace pmacc * @param value value to be set in the DeviceBuffer * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer */ - template - inline EventTask Factory::createTaskSetValue(DeviceBuffer& dst,const TYPE& value, - ITask *registeringTask) + template + inline EventTask Factory::createTaskSetValue( + DeviceBuffer& dst, + const TYPE& value, + ITask* registeringTask) { - /* sizeof(TYPE)<256 use fast set method for small data and slow method for big data * the rest of 256bytes are reserved for other kernel parameter */ enum { - isSmall = (sizeof (TYPE) <= 128) - }; //if we use const variable the compiler create warnings + isSmall = (sizeof(TYPE) <= 128) + }; // if we use const variable the compiler create warnings - TaskSetValue* task = new TaskSetValue (dst, value); + TaskSetValue* task = new TaskSetValue(dst, value); return startTask(*task, registeringTask); } @@ -177,11 +177,13 @@ namespace pmacc * @param size size to be set on DeviceBuffer * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer */ - template - inline EventTask Factory::createTaskSetCurrentSizeOnDevice(DeviceBuffer& dst, size_t size, - ITask *registeringTask) + template + inline EventTask Factory::createTaskSetCurrentSizeOnDevice( + DeviceBuffer& dst, + size_t size, + ITask* registeringTask) { - TaskSetCurrentSizeOnDevice* task = new TaskSetCurrentSizeOnDevice (dst, size); + TaskSetCurrentSizeOnDevice* task = new TaskSetCurrentSizeOnDevice(dst, size); return startTask(*task, registeringTask); } @@ -191,11 +193,12 @@ namespace pmacc * @param buffer DeviceBuffer to get current size from * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer */ - template - inline EventTask Factory::createTaskGetCurrentSizeFromDevice(DeviceBuffer& buffer, - ITask *registeringTask) + template + inline EventTask Factory::createTaskGetCurrentSizeFromDevice( + DeviceBuffer& buffer, + ITask* registeringTask) { - TaskGetCurrentSizeFromDevice* task = new TaskGetCurrentSizeFromDevice (buffer); + TaskGetCurrentSizeFromDevice* task = new TaskGetCurrentSizeFromDevice(buffer); return startTask(*task, registeringTask); } @@ -206,20 +209,21 @@ namespace pmacc * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer * @return the newly created TaskKernel */ - inline TaskKernel* Factory::createTaskKernel(std::string kernelname, ITask *registeringTask) + inline TaskKernel* Factory::createTaskKernel(std::string kernelname, ITask* registeringTask) { TaskKernel* task = new TaskKernel(kernelname); - if (registeringTask != nullptr) + if(registeringTask != nullptr) task->addObserver(registeringTask); return task; } - inline EventTask Factory::startTask(ITask& task, ITask *registeringTask ) + inline EventTask Factory::startTask(ITask& task, ITask* registeringTask) { - if (registeringTask != nullptr){ + if(registeringTask != nullptr) + { task.addObserver(registeringTask); } EventTask event(task.getId()); @@ -232,7 +236,4 @@ namespace pmacc } -} //namespace pmacc - - - +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/ITask.hpp b/include/pmacc/eventSystem/tasks/ITask.hpp index 0f4f1eed80..e8b28402a8 100644 --- a/include/pmacc/eventSystem/tasks/ITask.hpp +++ b/include/pmacc/eventSystem/tasks/ITask.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -36,19 +36,23 @@ namespace pmacc /** * Abstract base class for all tasks. */ - class ITask : public EventNotify, public IEvent + class ITask + : public EventNotify + , public IEvent { public: - enum TaskType { - TASK_UNKNOWN, TASK_CUDA, TASK_MPI, TASK_HOST + TASK_UNKNOWN, + TASK_DEVICE, + TASK_MPI, + TASK_HOST }; /** * constructor */ - ITask(): myType(ITask::TASK_UNKNOWN) + ITask() : myType(ITask::TASK_UNKNOWN) { // task id 0 is reserved for invalid static id_t globalId = 1; @@ -69,7 +73,7 @@ namespace pmacc */ bool execute() { - //std::cout << "execute: " << toString() << std::endl; + // std::cout << "execute: " << toString() << std::endl; return executeIntern(); } @@ -77,7 +81,7 @@ namespace pmacc * Initializes the task. * Must be called before adding the task to the Manager's queue. */ - virtual void init()=0; + virtual void init() = 0; /** * Returns the unique id of this task. @@ -116,6 +120,7 @@ namespace pmacc * @return a string naming this task */ virtual std::string toString() = 0; + protected: virtual bool executeIntern() = 0; @@ -123,4 +128,4 @@ namespace pmacc TaskType myType; }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/MPITask.hpp b/include/pmacc/eventSystem/tasks/MPITask.hpp index 542a5fcf2a..af5c7f9574 100644 --- a/include/pmacc/eventSystem/tasks/MPITask.hpp +++ b/include/pmacc/eventSystem/tasks/MPITask.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -27,21 +27,17 @@ namespace pmacc { - /** * Abstract base class for all tasks which depend on MPI communication. */ class MPITask : public ITask { public: - /** * Constructor. * Starts a MPI operation on the transaction system. */ - MPITask() : - ITask(), - finished(false) + MPITask() : ITask(), finished(false) { this->setTaskType(ITask::TASK_MPI); } @@ -54,7 +50,6 @@ namespace pmacc } protected: - /** * Returns if the task is finished. * @@ -72,7 +67,8 @@ namespace pmacc { finished = true; } + private: bool finished; }; -} +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/StreamTask.hpp b/include/pmacc/eventSystem/tasks/StreamTask.hpp index dce83d07ed..a816972a64 100644 --- a/include/pmacc/eventSystem/tasks/StreamTask.hpp +++ b/include/pmacc/eventSystem/tasks/StreamTask.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Rene Widera * * This file is part of PMacc. * @@ -29,12 +29,11 @@ namespace pmacc class EventStream; /** - * Abstract base class for all tasks which depend on cuda streams. + * Abstract base class for all tasks which depend on cupla streams. */ class StreamTask : public ITask { public: - /** * Constructor * @@ -50,19 +49,19 @@ namespace pmacc } /** - * Returns the cuda event associated with this task. + * Returns the cupla event associated with this task. * An event has to be recorded or set before calling this. * - * @return the task's cuda event + * @return the task's cupla event */ CudaEventHandle getCudaEventHandle() const; /** * Sets the * - * @param cudaEvent + * @param cuplaEvent */ - void setCudaEventHandle(const CudaEventHandle& cudaEvent); + void setCudaEventHandle(const CudaEventHandle& cuplaEvent); /** * Returns if this task is finished. @@ -86,25 +85,24 @@ namespace pmacc void setEventStream(EventStream* newStream); /** - * Returns the cuda stream of the underlying EventStream. + * Returns the cupla stream of the underlying EventStream. * - * @return the associated cuda stream + * @return the associated cupla stream */ - cudaStream_t getCudaStream(); + cuplaStream_t getCudaStream(); protected: - /** * Activates this task by recording an event on its stream. */ inline void activate(); - EventStream *stream; - CudaEventHandle cudaEvent; + EventStream* stream; + CudaEventHandle cuplaEvent; bool hasCudaEventHandle; bool alwaysFinished; }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/StreamTask.tpp b/include/pmacc/eventSystem/tasks/StreamTask.tpp index 2a36779238..c328a28636 100644 --- a/include/pmacc/eventSystem/tasks/StreamTask.tpp +++ b/include/pmacc/eventSystem/tasks/StreamTask.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -29,69 +29,64 @@ namespace pmacc { + inline StreamTask::StreamTask() : ITask(), stream(nullptr), hasCudaEventHandle(false), alwaysFinished(false) + { + this->setTaskType(ITask::TASK_DEVICE); + } -inline StreamTask::StreamTask( ) : -ITask( ), -stream( nullptr ), -hasCudaEventHandle( false ), -alwaysFinished( false ) -{ - this->setTaskType( ITask::TASK_CUDA ); -} - -inline CudaEventHandle StreamTask::getCudaEventHandle( ) const -{ - PMACC_ASSERT( hasCudaEventHandle ); - return cudaEvent; -} + inline CudaEventHandle StreamTask::getCudaEventHandle() const + { + PMACC_ASSERT(hasCudaEventHandle); + return cuplaEvent; + } -inline void StreamTask::setCudaEventHandle(const CudaEventHandle& cudaEvent ) -{ - this->hasCudaEventHandle = true; - this->cudaEvent = cudaEvent; -} + inline void StreamTask::setCudaEventHandle(const CudaEventHandle& cuplaEvent) + { + this->hasCudaEventHandle = true; + this->cuplaEvent = cuplaEvent; + } -inline bool StreamTask::isFinished( ) -{ - if ( alwaysFinished ) - return true; - if ( hasCudaEventHandle ) + inline bool StreamTask::isFinished() { - if ( cudaEvent.isFinished( ) ) - { - alwaysFinished = true; + if(alwaysFinished) return true; + if(hasCudaEventHandle) + { + if(cuplaEvent.isFinished()) + { + alwaysFinished = true; + return true; + } } + return false; } - return false; -} -inline EventStream* StreamTask::getEventStream( ) -{ - if ( stream == nullptr ) - stream = __getEventStream( TASK_CUDA ); - return stream; -} + inline EventStream* StreamTask::getEventStream() + { + if(stream == nullptr) + stream = __getEventStream(TASK_DEVICE); + return stream; + } -inline void StreamTask::setEventStream( EventStream* newStream ) -{ - PMACC_ASSERT( newStream != nullptr ); - PMACC_ASSERT( stream == nullptr ); //it is only allowed to set a stream if no stream is set before - this->stream = newStream; -} + inline void StreamTask::setEventStream(EventStream* newStream) + { + PMACC_ASSERT(newStream != nullptr); + PMACC_ASSERT(stream == nullptr); // it is only allowed to set a stream if no stream is set before + this->stream = newStream; + } -inline cudaStream_t StreamTask::getCudaStream( ) -{ - if ( stream == nullptr ) - stream = Environment<>::get( ).TransactionManager( ).getEventStream( TASK_CUDA ); - return stream->getCudaStream( ); -} + inline cuplaStream_t StreamTask::getCudaStream() + { + if(stream == nullptr) + stream = Environment<>::get().TransactionManager().getEventStream(TASK_DEVICE); + return stream->getCudaStream(); + } -inline void StreamTask::activate( ) -{ - cudaEvent = Environment<>::get().EventPool( ).pop( ); - cudaEvent.recordEvent( getCudaStream( ) ); - hasCudaEventHandle = true; -} + inline void StreamTask::activate() + { + cuplaEvent = Environment<>::get().EventPool().pop(); + cuplaEvent.recordEvent(getCudaStream()); + hasCudaEventHandle = true; + } -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskCopyDeviceToDevice.hpp b/include/pmacc/eventSystem/tasks/TaskCopyDeviceToDevice.hpp index fe64d8517c..c39a224459 100644 --- a/include/pmacc/eventSystem/tasks/TaskCopyDeviceToDevice.hpp +++ b/include/pmacc/eventSystem/tasks/TaskCopyDeviceToDevice.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -28,23 +28,19 @@ #include "pmacc/types.hpp" - namespace pmacc { - - template + template class DeviceBuffer; - template + template class TaskCopyDeviceToDeviceBase : public StreamTask { public: - - TaskCopyDeviceToDeviceBase( DeviceBuffer& src, DeviceBuffer& dst) : - StreamTask() + TaskCopyDeviceToDeviceBase(DeviceBuffer& src, DeviceBuffer& dst) : StreamTask() { - this->source = & src; - this->destination = & dst; + this->source = &src; + this->destination = &dst; } virtual ~TaskCopyDeviceToDeviceBase() @@ -59,7 +55,6 @@ namespace pmacc void event(id_t, EventType, IEventData*) { - } virtual void init() @@ -67,7 +62,7 @@ namespace pmacc size_t current_size = source->getCurrentSize(); destination->setCurrentSize(current_size); DataSpace devCurrentSize = source->getCurrentDataSpace(current_size); - if (source->is1D() && destination->is1D()) + if(source->is1D() && destination->is1D()) fastCopy(source->getPointer(), destination->getPointer(), devCurrentSize.productOfComponents()); else copy(devCurrentSize); @@ -81,114 +76,99 @@ namespace pmacc } protected: - - virtual void copy(DataSpace &devCurrentSize) = 0; + virtual void copy(DataSpace& devCurrentSize) = 0; void fastCopy(TYPE* src, TYPE* dst, size_t size) { - CUDA_CHECK(cudaMemcpyAsync(dst, - src, - size * sizeof (TYPE), cudaMemcpyDeviceToDevice, - this->getCudaStream())); + CUDA_CHECK( + cuplaMemcpyAsync(dst, src, size * sizeof(TYPE), cuplaMemcpyDeviceToDevice, this->getCudaStream())); } - DeviceBuffer *source; - DeviceBuffer *destination; + DeviceBuffer* source; + DeviceBuffer* destination; }; - template + template class TaskCopyDeviceToDevice; - template + template class TaskCopyDeviceToDevice : public TaskCopyDeviceToDeviceBase { public: - - TaskCopyDeviceToDevice(DeviceBuffer& src, DeviceBuffer& dst) : - TaskCopyDeviceToDeviceBase(src, dst) + TaskCopyDeviceToDevice(DeviceBuffer& src, DeviceBuffer& dst) + : TaskCopyDeviceToDeviceBase(src, dst) { } private: - - virtual void copy(DataSpace &devCurrentSize) + virtual void copy(DataSpace& devCurrentSize) { - - CUDA_CHECK(cudaMemcpyAsync(this->destination->getPointer(), - this->source->getPointer(), - devCurrentSize[0] * sizeof (TYPE), cudaMemcpyDeviceToDevice, - this->getCudaStream())); + CUDA_CHECK(cuplaMemcpyAsync( + this->destination->getPointer(), + this->source->getPointer(), + devCurrentSize[0] * sizeof(TYPE), + cuplaMemcpyDeviceToDevice, + this->getCudaStream())); } - }; - template + template class TaskCopyDeviceToDevice : public TaskCopyDeviceToDeviceBase { public: - - TaskCopyDeviceToDevice( DeviceBuffer& src, DeviceBuffer& dst) : - TaskCopyDeviceToDeviceBase(src, dst) + TaskCopyDeviceToDevice(DeviceBuffer& src, DeviceBuffer& dst) + : TaskCopyDeviceToDeviceBase(src, dst) { } private: - - virtual void copy(DataSpace &devCurrentSize) + virtual void copy(DataSpace& devCurrentSize) { - CUDA_CHECK(cudaMemcpy2DAsync(this->destination->getPointer(), - this->destination->getPitch(), - this->source->getPointer(), - this->source->getPitch(), - devCurrentSize[0] * sizeof (TYPE), - devCurrentSize[1], - cudaMemcpyDeviceToDevice, - this->getCudaStream())); - + CUDA_CHECK(cuplaMemcpy2DAsync( + this->destination->getPointer(), + this->destination->getPitch(), + this->source->getPointer(), + this->source->getPitch(), + devCurrentSize[0] * sizeof(TYPE), + devCurrentSize[1], + cuplaMemcpyDeviceToDevice, + this->getCudaStream())); } - }; - template + template class TaskCopyDeviceToDevice : public TaskCopyDeviceToDeviceBase { public: - - TaskCopyDeviceToDevice( DeviceBuffer& src, DeviceBuffer& dst) : - TaskCopyDeviceToDeviceBase(src, dst) + TaskCopyDeviceToDevice(DeviceBuffer& src, DeviceBuffer& dst) + : TaskCopyDeviceToDeviceBase(src, dst) { } private: - - virtual void copy(DataSpace &devCurrentSize) + virtual void copy(DataSpace& devCurrentSize) { - - cudaMemcpy3DParms params; + cuplaMemcpy3DParms params; params.srcArray = nullptr; - params.srcPos = make_cudaPos( - this->source->getOffset()[0] * sizeof (TYPE), - this->source->getOffset()[1], - this->source->getOffset()[2]); + params.srcPos = make_cuplaPos( + this->source->getOffset()[0] * sizeof(TYPE), + this->source->getOffset()[1], + this->source->getOffset()[2]); params.srcPtr = this->source->getCudaPitched(); params.dstArray = nullptr; - params.dstPos = make_cudaPos( - this->destination->getOffset()[0] * sizeof (TYPE), - this->destination->getOffset()[1], - this->destination->getOffset()[2]); + params.dstPos = make_cuplaPos( + this->destination->getOffset()[0] * sizeof(TYPE), + this->destination->getOffset()[1], + this->destination->getOffset()[2]); ; params.dstPtr = this->destination->getCudaPitched(); - params.extent = make_cudaExtent( - devCurrentSize[0] * sizeof (TYPE), - devCurrentSize[1], - devCurrentSize[2]); - params.kind = cudaMemcpyDeviceToDevice; - CUDA_CHECK(cudaMemcpy3DAsync(¶ms, this->getCudaStream())); + params.extent = make_cuplaExtent(devCurrentSize[0] * sizeof(TYPE), devCurrentSize[1], devCurrentSize[2]); + params.kind = cuplaMemcpyDeviceToDevice; + CUDA_CHECK(cuplaMemcpy3DAsync(¶ms, this->getCudaStream())); } - }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskCopyDeviceToHost.hpp b/include/pmacc/eventSystem/tasks/TaskCopyDeviceToHost.hpp index 4ddd908e57..0e7bb501cd 100644 --- a/include/pmacc/eventSystem/tasks/TaskCopyDeviceToHost.hpp +++ b/include/pmacc/eventSystem/tasks/TaskCopyDeviceToHost.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -27,27 +27,23 @@ #include "pmacc/eventSystem/tasks/StreamTask.hpp" - #include namespace pmacc { - - template + template class HostBuffer; - template + template class DeviceBuffer; - template + template class TaskCopyDeviceToHostBase : public StreamTask { public: - - TaskCopyDeviceToHostBase( DeviceBuffer& src, HostBuffer& dst) : - StreamTask() + TaskCopyDeviceToHostBase(DeviceBuffer& src, HostBuffer& dst) : StreamTask() { - this->host = & dst; - this->device = & src; + this->host = &dst; + this->device = &src; } virtual ~TaskCopyDeviceToHostBase() @@ -74,8 +70,8 @@ namespace pmacc size_t current_size = device->getCurrentSize(); host->setCurrentSize(current_size); DataSpace devCurrentSize = device->getCurrentDataSpace(current_size); - if (host->is1D() && device->is1D()) - fastCopy(device->getPointer(),host->getPointer(), devCurrentSize.productOfComponents()); + if(host->is1D() && device->is1D()) + fastCopy(device->getPointer(), host->getPointer(), devCurrentSize.productOfComponents()); else copy(devCurrentSize); @@ -83,118 +79,101 @@ namespace pmacc } protected: + virtual void copy(DataSpace& devCurrentSize) = 0; - virtual void copy(DataSpace &devCurrentSize) = 0; - - void fastCopy(TYPE* src,TYPE* dst, size_t size) + void fastCopy(TYPE* src, TYPE* dst, size_t size) { - CUDA_CHECK(cudaMemcpyAsync(dst, - src, - size * sizeof (TYPE), - cudaMemcpyDeviceToHost, - this->getCudaStream())); + CUDA_CHECK( + cuplaMemcpyAsync(dst, src, size * sizeof(TYPE), cuplaMemcpyDeviceToHost, this->getCudaStream())); } - HostBuffer *host; - DeviceBuffer *device; + HostBuffer* host; + DeviceBuffer* device; }; - template + template class TaskCopyDeviceToHost; - template + template class TaskCopyDeviceToHost : public TaskCopyDeviceToHostBase { public: - - TaskCopyDeviceToHost( DeviceBuffer& src, HostBuffer& dst) : - TaskCopyDeviceToHostBase(src, dst) + TaskCopyDeviceToHost(DeviceBuffer& src, HostBuffer& dst) + : TaskCopyDeviceToHostBase(src, dst) { } private: - - virtual void copy(DataSpace &devCurrentSize) + virtual void copy(DataSpace& devCurrentSize) { - - CUDA_CHECK(cudaMemcpyAsync(this->host->getBasePointer(), - this->device->getPointer(), - devCurrentSize[0] * sizeof (TYPE), - cudaMemcpyDeviceToHost, - this->getCudaStream())); - + CUDA_CHECK(cuplaMemcpyAsync( + this->host->getBasePointer(), + this->device->getPointer(), + devCurrentSize[0] * sizeof(TYPE), + cuplaMemcpyDeviceToHost, + this->getCudaStream())); } - }; - template + template class TaskCopyDeviceToHost : public TaskCopyDeviceToHostBase { public: - - TaskCopyDeviceToHost(DeviceBuffer& src, HostBuffer& dst) : - TaskCopyDeviceToHostBase(src, dst) + TaskCopyDeviceToHost(DeviceBuffer& src, HostBuffer& dst) + : TaskCopyDeviceToHostBase(src, dst) { } private: - - virtual void copy(DataSpace &devCurrentSize) + virtual void copy(DataSpace& devCurrentSize) { - CUDA_CHECK(cudaMemcpy2DAsync(this->host->getBasePointer(), - this->host->getDataSpace()[0] * sizeof (TYPE), /*this is pitch*/ - this->device->getPointer(), - this->device->getPitch(), /*this is pitch*/ - devCurrentSize[0] * sizeof (TYPE), - devCurrentSize[1], - cudaMemcpyDeviceToHost, - this->getCudaStream())); - + CUDA_CHECK(cuplaMemcpy2DAsync( + this->host->getBasePointer(), + this->host->getDataSpace()[0] * sizeof(TYPE), /*this is pitch*/ + this->device->getPointer(), + this->device->getPitch(), /*this is pitch*/ + devCurrentSize[0] * sizeof(TYPE), + devCurrentSize[1], + cuplaMemcpyDeviceToHost, + this->getCudaStream())); } - }; - template + template class TaskCopyDeviceToHost : public TaskCopyDeviceToHostBase { public: - - TaskCopyDeviceToHost( DeviceBuffer& src, HostBuffer& dst) : - TaskCopyDeviceToHostBase(src, dst) + TaskCopyDeviceToHost(DeviceBuffer& src, HostBuffer& dst) + : TaskCopyDeviceToHostBase(src, dst) { } private: - - virtual void copy(DataSpace &devCurrentSize) + virtual void copy(DataSpace& devCurrentSize) { - cudaPitchedPtr hostPtr; - hostPtr.pitch = this->host->getDataSpace()[0] * sizeof (TYPE); + cuplaPitchedPtr hostPtr; + hostPtr.pitch = this->host->getDataSpace()[0] * sizeof(TYPE); hostPtr.ptr = this->host->getBasePointer(); - hostPtr.xsize = this->host->getDataSpace()[0] * sizeof (TYPE); + hostPtr.xsize = this->host->getDataSpace()[0] * sizeof(TYPE); hostPtr.ysize = this->host->getDataSpace()[1]; - cudaMemcpy3DParms params; + cuplaMemcpy3DParms params; params.srcArray = nullptr; - params.srcPos = make_cudaPos(this->device->getOffset()[0] * sizeof (TYPE), - this->device->getOffset()[1], - this->device->getOffset()[2]); + params.srcPos = make_cuplaPos( + this->device->getOffset()[0] * sizeof(TYPE), + this->device->getOffset()[1], + this->device->getOffset()[2]); params.srcPtr = this->device->getCudaPitched(); params.dstArray = nullptr; - params.dstPos = make_cudaPos(0, 0, 0); + params.dstPos = make_cuplaPos(0, 0, 0); params.dstPtr = hostPtr; - params.extent = make_cudaExtent( - devCurrentSize[0] * sizeof (TYPE), - devCurrentSize[1], - devCurrentSize[2]); - params.kind = cudaMemcpyDeviceToHost; - - CUDA_CHECK(cudaMemcpy3DAsync(¶ms, this->getCudaStream())); + params.extent = make_cuplaExtent(devCurrentSize[0] * sizeof(TYPE), devCurrentSize[1], devCurrentSize[2]); + params.kind = cuplaMemcpyDeviceToHost; + CUDA_CHECK(cuplaMemcpy3DAsync(¶ms, this->getCudaStream())); } - }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskCopyHostToDevice.hpp b/include/pmacc/eventSystem/tasks/TaskCopyHostToDevice.hpp index 716efb5ea3..9c34bb15d6 100644 --- a/include/pmacc/eventSystem/tasks/TaskCopyHostToDevice.hpp +++ b/include/pmacc/eventSystem/tasks/TaskCopyHostToDevice.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -27,25 +27,21 @@ #include "pmacc/eventSystem/tasks/StreamTask.hpp" - namespace pmacc { - - template + template class HostBuffer; - template + template class DeviceBuffer; - template + template class TaskCopyHostToDeviceBase : public StreamTask { public: - - TaskCopyHostToDeviceBase(HostBuffer& src, DeviceBuffer& dst) : - StreamTask() + TaskCopyHostToDeviceBase(HostBuffer& src, DeviceBuffer& dst) : StreamTask() { - this->host = & src; - this->device = & dst; + this->host = &src; + this->device = &dst; } virtual ~TaskCopyHostToDeviceBase() @@ -66,13 +62,13 @@ namespace pmacc { size_t current_size = host->getCurrentSize(); DataSpace hostCurrentSize = host->getCurrentDataSpace(current_size); - /* IMPORTENT: `setCurrentSize()` must be called before the native cuda memcopy + /* IMPORTENT: `setCurrentSize()` must be called before the native cupla memcopy * is called else `setCurrentSize()` is not handled as part of this task. * The reason for that is that the native memcopy calls `this->getCudaStream()` * but not register an task before this `init()` is finished. */ device->setCurrentSize(current_size); - if (host->is1D() && device->is1D()) + if(host->is1D() && device->is1D()) fastCopy(host->getPointer(), device->getPointer(), hostCurrentSize.productOfComponents()); else copy(hostCurrentSize); @@ -87,110 +83,104 @@ namespace pmacc protected: - - virtual void copy(DataSpace &hostCurrentSize) = 0; + virtual void copy(DataSpace& hostCurrentSize) = 0; void fastCopy(TYPE* src, TYPE* dst, size_t size) { - CUDA_CHECK(cudaMemcpyAsync(dst, - src, - size * sizeof (TYPE), - cudaMemcpyHostToDevice, - this->getCudaStream())); + CUDA_CHECK( + cuplaMemcpyAsync(dst, src, size * sizeof(TYPE), cuplaMemcpyHostToDevice, this->getCudaStream())); } - HostBuffer *host; - DeviceBuffer *device; - + HostBuffer* host; + DeviceBuffer* device; }; - template + template class TaskCopyHostToDevice; - template + template class TaskCopyHostToDevice : public TaskCopyHostToDeviceBase { public: - - TaskCopyHostToDevice(HostBuffer& src, DeviceBuffer& dst) : - TaskCopyHostToDeviceBase(src, dst) + TaskCopyHostToDevice(HostBuffer& src, DeviceBuffer& dst) + : TaskCopyHostToDeviceBase(src, dst) { } - private: - virtual void copy(DataSpace &hostCurrentSize) + private: + virtual void copy(DataSpace& hostCurrentSize) { - CUDA_CHECK(cudaMemcpyAsync(this->device->getPointer(), /*pointer include X offset*/ - this->host->getBasePointer(), - hostCurrentSize[0] * sizeof (TYPE), cudaMemcpyHostToDevice, - this->getCudaStream())); + CUDA_CHECK(cuplaMemcpyAsync( + this->device->getPointer(), /*pointer include X offset*/ + this->host->getBasePointer(), + hostCurrentSize[0] * sizeof(TYPE), + cuplaMemcpyHostToDevice, + this->getCudaStream())); } }; - template + template class TaskCopyHostToDevice : public TaskCopyHostToDeviceBase { public: - - TaskCopyHostToDevice( HostBuffer& src, DeviceBuffer& dst) : - TaskCopyHostToDeviceBase(src, dst) + TaskCopyHostToDevice(HostBuffer& src, DeviceBuffer& dst) + : TaskCopyHostToDeviceBase(src, dst) { } - private: - virtual void copy(DataSpace &hostCurrentSize) + private: + virtual void copy(DataSpace& hostCurrentSize) { - CUDA_CHECK(cudaMemcpy2DAsync(this->device->getPointer(), - this->device->getPitch(), /*this is pitch*/ - this->host->getBasePointer(), - this->host->getDataSpace()[0] * sizeof (TYPE), /*this is pitch*/ - hostCurrentSize[0] * sizeof (TYPE), - hostCurrentSize[1], - cudaMemcpyHostToDevice, - this->getCudaStream())); + CUDA_CHECK(cuplaMemcpy2DAsync( + this->device->getPointer(), + this->device->getPitch(), /*this is pitch*/ + this->host->getBasePointer(), + this->host->getDataSpace()[0] * sizeof(TYPE), /*this is pitch*/ + hostCurrentSize[0] * sizeof(TYPE), + hostCurrentSize[1], + cuplaMemcpyHostToDevice, + this->getCudaStream())); } }; - template + template class TaskCopyHostToDevice : public TaskCopyHostToDeviceBase { public: - - TaskCopyHostToDevice( HostBuffer& src, DeviceBuffer& dst) : - TaskCopyHostToDeviceBase(src, dst) + TaskCopyHostToDevice(HostBuffer& src, DeviceBuffer& dst) + : TaskCopyHostToDeviceBase(src, dst) { } - private: - virtual void copy(DataSpace &hostCurrentSize) + private: + virtual void copy(DataSpace& hostCurrentSize) { - cudaPitchedPtr hostPtr; - hostPtr.pitch = this->host->getDataSpace()[0] * sizeof (TYPE); + cuplaPitchedPtr hostPtr; + hostPtr.pitch = this->host->getDataSpace()[0] * sizeof(TYPE); hostPtr.ptr = this->host->getBasePointer(); - hostPtr.xsize = this->host->getDataSpace()[0] * sizeof (TYPE); + hostPtr.xsize = this->host->getDataSpace()[0] * sizeof(TYPE); hostPtr.ysize = this->host->getDataSpace()[1]; - cudaMemcpy3DParms params; + cuplaMemcpy3DParms params; params.dstArray = nullptr; - params.dstPos = make_cudaPos(this->device->getOffset()[0] * sizeof (TYPE), - this->device->getOffset()[1], - this->device->getOffset()[2]); + params.dstPos = make_cuplaPos( + this->device->getOffset()[0] * sizeof(TYPE), + this->device->getOffset()[1], + this->device->getOffset()[2]); params.dstPtr = this->device->getCudaPitched(); params.srcArray = nullptr; - params.srcPos = make_cudaPos(0, 0, 0); + params.srcPos = make_cuplaPos(0, 0, 0); params.srcPtr = hostPtr; - params.extent = make_cudaExtent( - hostCurrentSize[0] * sizeof (TYPE), - hostCurrentSize[1], - hostCurrentSize[2]); - params.kind = cudaMemcpyHostToDevice; + params.extent + = make_cuplaExtent(hostCurrentSize[0] * sizeof(TYPE), hostCurrentSize[1], hostCurrentSize[2]); + params.kind = cuplaMemcpyHostToDevice; - CUDA_CHECK(cudaMemcpy3DAsync(¶ms, this->getCudaStream())); + CUDA_CHECK(cuplaMemcpy3DAsync(¶ms, this->getCudaStream())); } }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskGetCurrentSizeFromDevice.hpp b/include/pmacc/eventSystem/tasks/TaskGetCurrentSizeFromDevice.hpp index 1d9117e727..15d67a140e 100644 --- a/include/pmacc/eventSystem/tasks/TaskGetCurrentSizeFromDevice.hpp +++ b/include/pmacc/eventSystem/tasks/TaskGetCurrentSizeFromDevice.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -29,58 +29,52 @@ #include "pmacc/types.hpp" - - namespace pmacc { + template + class DeviceBuffer; - -template -class DeviceBuffer; - -template -class TaskGetCurrentSizeFromDevice : public StreamTask -{ -public: - - TaskGetCurrentSizeFromDevice(DeviceBuffer& buffer): - StreamTask() - { - this->buffer = & buffer; - } - - virtual ~TaskGetCurrentSizeFromDevice() - { - notify(this->myId,GETVALUE, nullptr); - } - - bool executeIntern() + template + class TaskGetCurrentSizeFromDevice : public StreamTask { - return isFinished(); - } - - void event(id_t, EventType, IEventData*) - { - } - - virtual void init() - { - CUDA_CHECK(cudaMemcpyAsync((void*) buffer->getCurrentSizeHostSidePointer(), - buffer->getCurrentSizeOnDevicePointer(), - sizeof (size_t), - cudaMemcpyDeviceToHost, - this->getCudaStream())); - this->activate(); - } - - virtual std::string toString() - { - return "TaskGetCurrentSizeFromDevice"; - } - -private: - - DeviceBuffer *buffer; -}; - -} //namespace pmacc + public: + TaskGetCurrentSizeFromDevice(DeviceBuffer& buffer) : StreamTask() + { + this->buffer = &buffer; + } + + virtual ~TaskGetCurrentSizeFromDevice() + { + notify(this->myId, GETVALUE, nullptr); + } + + bool executeIntern() + { + return isFinished(); + } + + void event(id_t, EventType, IEventData*) + { + } + + virtual void init() + { + CUDA_CHECK(cuplaMemcpyAsync( + (void*) buffer->getCurrentSizeHostSidePointer(), + buffer->getCurrentSizeOnDevicePointer(), + sizeof(size_t), + cuplaMemcpyDeviceToHost, + this->getCudaStream())); + this->activate(); + } + + virtual std::string toString() + { + return "TaskGetCurrentSizeFromDevice"; + } + + private: + DeviceBuffer* buffer; + }; + +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskKernel.hpp b/include/pmacc/eventSystem/tasks/TaskKernel.hpp index 2c330b5729..44af6cab5d 100644 --- a/include/pmacc/eventSystem/tasks/TaskKernel.hpp +++ b/include/pmacc/eventSystem/tasks/TaskKernel.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -28,15 +28,10 @@ namespace pmacc { - class TaskKernel : public StreamTask { public: - - TaskKernel(std::string kernelName) : - StreamTask(), - kernelName(kernelName), - canBeChecked(false) + TaskKernel(std::string kernelName) : StreamTask(), kernelName(kernelName), canBeChecked(false) { } @@ -74,5 +69,4 @@ namespace pmacc std::string kernelName; }; -} //namespace pmacc - +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskKernel.tpp b/include/pmacc/eventSystem/tasks/TaskKernel.tpp index 04c476a0e2..d4aadf7336 100644 --- a/include/pmacc/eventSystem/tasks/TaskKernel.tpp +++ b/include/pmacc/eventSystem/tasks/TaskKernel.tpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -25,8 +25,8 @@ #include "pmacc/eventSystem/tasks/TaskKernel.hpp" #include "pmacc/Environment.hpp" -namespace pmacc{ - +namespace pmacc +{ void TaskKernel::activateChecks() { canBeChecked = true; @@ -35,4 +35,4 @@ namespace pmacc{ Environment<>::get().Manager().addTask(this); __setTransactionEvent(EventTask(this->getId())); } -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskLogicalAnd.hpp b/include/pmacc/eventSystem/tasks/TaskLogicalAnd.hpp index 609f87e7c9..92d17aa8ab 100644 --- a/include/pmacc/eventSystem/tasks/TaskLogicalAnd.hpp +++ b/include/pmacc/eventSystem/tasks/TaskLogicalAnd.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -29,22 +29,17 @@ namespace pmacc { - /** * TaskLogicalAnd AND-connects tasks to a new single task */ class TaskLogicalAnd : public StreamTask { public: - /** * s1 and s1 must be a valid IStreamTask * constructor */ - TaskLogicalAnd(ITask* s1, ITask* s2) : - StreamTask(), - task1(s1->getId()), - task2(s2->getId()) + TaskLogicalAnd(ITask* s1, ITask* s2) : StreamTask(), task1(s1->getId()), task2(s2->getId()) { combine(s1, s2); } @@ -54,13 +49,11 @@ namespace pmacc */ virtual ~TaskLogicalAnd() { - notify(this->myId, LOGICALAND, nullptr); } void init() { - } bool executeIntern() @@ -81,16 +74,16 @@ namespace pmacc if(task != nullptr) { ITask::TaskType type = task->getTaskType(); - if (type == ITask::TASK_CUDA ) + if(type == ITask::TASK_DEVICE) { this->stream = static_cast(task)->getEventStream(); - this->setTaskType(ITask::TASK_CUDA); - this->cudaEvent = static_cast(task)->getCudaEventHandle(); + this->setTaskType(ITask::TASK_DEVICE); + this->cuplaEvent = static_cast(task)->getCudaEventHandle(); this->hasCudaEventHandle = true; } } } - else if (task2 == eventId) + else if(task2 == eventId) { task2 = 0; @@ -98,11 +91,11 @@ namespace pmacc if(task != nullptr) { ITask::TaskType type = task->getTaskType(); - if (type == ITask::TASK_CUDA ) + if(type == ITask::TASK_DEVICE) { this->stream = static_cast(task)->getEventStream(); - this->setTaskType(ITask::TASK_CUDA); - this->cudaEvent = static_cast(task)->getCudaEventHandle(); + this->setTaskType(ITask::TASK_DEVICE); + this->cuplaEvent = static_cast(task)->getCudaEventHandle(); this->hasCudaEventHandle = true; } } @@ -118,32 +111,28 @@ namespace pmacc std::string toString() { - return std::string("TaskLogicalAnd (") + - EventTask(task1).toString() + - std::string(" - ") + - EventTask(task2).toString() + - std::string(" )"); + return std::string("TaskLogicalAnd (") + EventTask(task1).toString() + std::string(" - ") + + EventTask(task2).toString() + std::string(" )"); } private: - inline void combine(ITask* s1, ITask* s2) { s1->addObserver(this); s2->addObserver(this); - if(s1->getTaskType() == ITask::TASK_CUDA && s2->getTaskType() == ITask::TASK_CUDA) + if(s1->getTaskType() == ITask::TASK_DEVICE && s2->getTaskType() == ITask::TASK_DEVICE) { - this->setTaskType(ITask::TASK_CUDA); - this->setEventStream(static_cast (s2)->getEventStream()); - if(static_cast (s1)->getEventStream() != static_cast (s2)->getEventStream()) - this->getEventStream()->waitOn(static_cast (s1)->getCudaEventHandle()); + this->setTaskType(ITask::TASK_DEVICE); + this->setEventStream(static_cast(s2)->getEventStream()); + if(static_cast(s1)->getEventStream() != static_cast(s2)->getEventStream()) + this->getEventStream()->waitOn(static_cast(s1)->getCudaEventHandle()); this->activate(); } - else if(s1->getTaskType() == ITask::TASK_MPI && s2->getTaskType() == ITask::TASK_CUDA) + else if(s1->getTaskType() == ITask::TASK_MPI && s2->getTaskType() == ITask::TASK_DEVICE) { this->setTaskType(ITask::TASK_MPI); } - else if(s2->getTaskType() == ITask::TASK_MPI && s1->getTaskType() == ITask::TASK_CUDA) + else if(s2->getTaskType() == ITask::TASK_MPI && s1->getTaskType() == ITask::TASK_DEVICE) { this->setTaskType(ITask::TASK_MPI); } @@ -157,5 +146,4 @@ namespace pmacc id_t task2; }; -} //namespace pmacc - +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskReceive.hpp b/include/pmacc/eventSystem/tasks/TaskReceive.hpp index 95d216d288..d89bb2f295 100644 --- a/include/pmacc/eventSystem/tasks/TaskReceive.hpp +++ b/include/pmacc/eventSystem/tasks/TaskReceive.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -32,16 +32,11 @@ namespace pmacc { - - - template + template class TaskReceive : public MPITask { public: - - TaskReceive(Exchange &ex) : - exchange(&ex), - state(Constructor) + TaskReceive(Exchange& ex) : exchange(&ex), state(Constructor) { } @@ -53,38 +48,75 @@ namespace pmacc bool executeIntern() { - switch (state) + switch(state) { - case WaitForReceived: - break; - case RunCopy: - state = WaitForFinish; - __startTransaction(); - exchange->getHostBuffer().setCurrentSize(newBufferSize); - if (exchange->hasDeviceDoubleBuffer()) + case WaitForReceived: + break; + case RunCopy: + state = WaitForFinish; + __startTransaction(); + + /* If MPI direct is enabled + * - we do not have any host representation of an exchange + * - MPI will write directly into the device buffer + * or double buffer when available. + */ + if(exchange->hasDeviceDoubleBuffer()) + { + if(Environment<>::get().isMpiDirectEnabled()) { - - Environment<>::get().Factory().createTaskCopyHostToDevice(exchange->getHostBuffer(), - exchange->getDeviceDoubleBuffer()); - Environment<>::get().Factory().createTaskCopyDeviceToDevice(exchange->getDeviceDoubleBuffer(), - exchange->getDeviceBuffer(), - this); + exchange->getDeviceDoubleBuffer().setCurrentSize(newBufferSize); } else { + exchange->getHostBuffer().setCurrentSize(newBufferSize); + Environment<>::get().Factory().createTaskCopyHostToDevice( + exchange->getHostBuffer(), + exchange->getDeviceDoubleBuffer()); + } - Environment<>::get().Factory().createTaskCopyHostToDevice(exchange->getHostBuffer(), - exchange->getDeviceBuffer(), - this); + Environment<>::get().Factory().createTaskCopyDeviceToDevice( + exchange->getDeviceDoubleBuffer(), + exchange->getDeviceBuffer(), + this); + } + else + { + if(Environment<>::get().isMpiDirectEnabled()) + { + exchange->getDeviceBuffer().setCurrentSize(newBufferSize); + /* We can not be notified from setCurrentSize() therefore + * we need to wait that the current event is finished. + */ + setSizeEvent = __getTransactionEvent(); + state = WaitForSetSize; } - __endTransaction(); - break; - case WaitForFinish: - break; - case Finish: + else + { + exchange->getHostBuffer().setCurrentSize(newBufferSize); + Environment<>::get().Factory().createTaskCopyHostToDevice( + exchange->getHostBuffer(), + exchange->getDeviceBuffer(), + this); + } + } + + __endTransaction(); + break; + case WaitForSetSize: + // this code is only passed if gpu direct is enabled + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(setSizeEvent.getTaskId())) + { + state = Finish; return true; - default: - return false; + } + break; + case WaitForFinish: + break; + case Finish: + return true; + default: + return false; } return false; @@ -97,51 +129,51 @@ namespace pmacc void event(id_t, EventType type, IEventData* data) { - switch (type) + switch(type) { - case RECVFINISHED: - if (data != nullptr) - { - EventDataReceive *rdata = static_cast (data); - // std::cout<<" data rec "<getReceivedCount()/sizeof(TYPE)<getReceivedCount() / sizeof (TYPE); - state = RunCopy; - executeIntern(); - } - break; - case COPYHOST2DEVICE: - case COPYDEVICE2DEVICE: - state = Finish; - break; - default: - return; + case RECVFINISHED: + if(data != nullptr) + { + EventDataReceive* rdata = static_cast(data); + // std::cout<<" data rec "<getReceivedCount()/sizeof(TYPE)<getReceivedCount() / sizeof(TYPE); + state = RunCopy; + executeIntern(); + } + break; + case COPYHOST2DEVICE: + case COPYDEVICE2DEVICE: + state = Finish; + break; + default: + return; } } std::string toString() { std::stringstream ss; - ss< *exchange; + Exchange* exchange; state_t state; size_t newBufferSize; + EventTask setSizeEvent; }; -} //namespace pmacc - +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskReceiveMPI.hpp b/include/pmacc/eventSystem/tasks/TaskReceiveMPI.hpp index ec23fdca60..313b449f98 100644 --- a/include/pmacc/eventSystem/tasks/TaskReceiveMPI.hpp +++ b/include/pmacc/eventSystem/tasks/TaskReceiveMPI.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -31,80 +31,72 @@ namespace pmacc { - -template -class TaskReceiveMPI : public MPITask -{ -public: - - TaskReceiveMPI(Exchange *exchange) : - MPITask(), - exchange(exchange) - { - - } - - virtual void init() + template + class TaskReceiveMPI : public MPITask { - this->request = Environment::get().EnvironmentController() - .getCommunicator().startReceive( - exchange->getExchangeType(), - (char*) exchange->getHostBuffer().getBasePointer(), - exchange->getHostBuffer().getDataSpace().productOfComponents() * sizeof (TYPE), - exchange->getCommunicationTag()); - } - - bool executeIntern() - { - if (this->isFinished()) - return true; - - if (this->request == nullptr) - throw std::runtime_error("request was nullptr (call executeIntern after freed"); - - int flag=0; - MPI_CHECK(MPI_Test(this->request, &flag, &(this->status))); - - if (flag) //finished + public: + TaskReceiveMPI(Exchange* exchange) : MPITask(), exchange(exchange) { - delete this->request; - this->request = nullptr; - setFinished(); - return true; } - return false; - } - - virtual ~TaskReceiveMPI() - { - //! \todo this make problems because we send bytes and not combined types - int recv_data_count; - MPI_CHECK_NO_EXCEPT(MPI_Get_count(&(this->status), MPI_CHAR, &recv_data_count)); + virtual void init() + { + Buffer* dst = exchange->getCommunicationBuffer(); - IEventData *edata = new EventDataReceive(nullptr, recv_data_count); + this->request = Environment::get().EnvironmentController().getCommunicator().startReceive( + exchange->getExchangeType(), + reinterpret_cast(dst->getPointer()), + dst->getDataSpace().productOfComponents() * sizeof(TYPE), + exchange->getCommunicationTag()); + } - notify(this->myId, RECVFINISHED, edata); /*add notify her*/ - __delete(edata); + bool executeIntern() + { + if(this->isFinished()) + return true; + + if(this->request == nullptr) + throw std::runtime_error("request was nullptr (call executeIntern after freed"); + + int flag = 0; + MPI_CHECK(MPI_Test(this->request, &flag, &(this->status))); + + if(flag) // finished + { + delete this->request; + this->request = nullptr; + setFinished(); + return true; + } + return false; + } - } + virtual ~TaskReceiveMPI() + { + //! \todo this make problems because we send bytes and not combined types + int recv_data_count; + MPI_CHECK_NO_EXCEPT(MPI_Get_count(&(this->status), MPI_CHAR, &recv_data_count)); - void event(id_t, EventType, IEventData*) - { + IEventData* edata = new EventDataReceive(nullptr, recv_data_count); - } + notify(this->myId, RECVFINISHED, edata); /*add notify her*/ + __delete(edata); + } - std::string toString() - { - return "TaskReceiveMPI"; - } + void event(id_t, EventType, IEventData*) + { + } -private: - Exchange *exchange; - MPI_Request *request; - MPI_Status status; -}; + std::string toString() + { + return std::string("TaskReceiveMPI exchange type=") + std::to_string(exchange->getExchangeType()); + } -} //namespace pmacc + private: + Exchange* exchange; + MPI_Request* request; + MPI_Status status; + }; +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskSend.hpp b/include/pmacc/eventSystem/tasks/TaskSend.hpp index 9b5ac8c19d..21b7b1ae3c 100644 --- a/include/pmacc/eventSystem/tasks/TaskSend.hpp +++ b/include/pmacc/eventSystem/tasks/TaskSend.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -32,59 +32,72 @@ namespace pmacc { - - - - template + template class TaskSend : public MPITask { public: - - TaskSend(Exchange &ex) : - exchange(&ex), - state(Constructor) + TaskSend(Exchange& ex) : exchange(&ex), state(Constructor) { } virtual void init() { state = InitDone; - if (exchange->hasDeviceDoubleBuffer()) + if(exchange->hasDeviceDoubleBuffer()) { - Environment<>::get().Factory().createTaskCopyDeviceToDevice(exchange->getDeviceBuffer(), - exchange->getDeviceDoubleBuffer() - ); - Environment<>::get().Factory().createTaskCopyDeviceToHost(exchange->getDeviceDoubleBuffer(), - exchange->getHostBuffer(), - this); + if(Environment<>::get().isMpiDirectEnabled()) + Environment<>::get().Factory().createTaskCopyDeviceToDevice( + exchange->getDeviceBuffer(), + exchange->getDeviceDoubleBuffer(), + this); + else + { + Environment<>::get().Factory().createTaskCopyDeviceToDevice( + exchange->getDeviceBuffer(), + exchange->getDeviceDoubleBuffer()); + + Environment<>::get().Factory().createTaskCopyDeviceToHost( + exchange->getDeviceDoubleBuffer(), + exchange->getHostBuffer(), + this); + } } else { - Environment<>::get().Factory().createTaskCopyDeviceToHost(exchange->getDeviceBuffer(), - exchange->getHostBuffer(), - this); + if(Environment<>::get().isMpiDirectEnabled()) + { + /* Wait to be sure that all device work is finished before MPI is triggered. + * MPI will not wait for work in our device streams + */ + __getTransactionEvent().waitForFinished(); + state = ReadyForMPISend; + } + else + Environment<>::get().Factory().createTaskCopyDeviceToHost( + exchange->getDeviceBuffer(), + exchange->getHostBuffer(), + this); } - } bool executeIntern() { - switch (state) + switch(state) { - case InitDone: - break; - case DeviceToHostFinished: - state = SendDone; - __startTransaction(); - Environment<>::get().Factory().createTaskSendMPI(exchange, this); - __endTransaction(); - break; - case SendDone: - break; - case Finish: - return true; - default: - return false; + case InitDone: + break; + case ReadyForMPISend: + state = SendDone; + __startTransaction(); + Environment<>::get().Factory().createTaskSendMPI(exchange, this); + __endTransaction(); + break; + case SendDone: + break; + case Finish: + return true; + default: + return false; } return false; @@ -97,41 +110,37 @@ namespace pmacc void event(id_t, EventType type, IEventData*) { - if (type == COPYDEVICE2HOST) + if(type == COPYDEVICE2HOST || type == COPYDEVICE2DEVICE) { - state = DeviceToHostFinished; + state = ReadyForMPISend; executeIntern(); - } - if (type == SENDFINISHED) + if(type == SENDFINISHED) { state = Finish; } - } std::string toString() { std::stringstream ss; - ss< *exchange; + Exchange* exchange; state_t state; }; -} //namespace pmacc - +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskSendMPI.hpp b/include/pmacc/eventSystem/tasks/TaskSendMPI.hpp index b4a82cfc1a..a36633ff72 100644 --- a/include/pmacc/eventSystem/tasks/TaskSendMPI.hpp +++ b/include/pmacc/eventSystem/tasks/TaskSendMPI.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Wolfgang Hoenig, * Benjamin Worpitz * * This file is part of PMacc. @@ -31,70 +31,64 @@ namespace pmacc { - -template -class TaskSendMPI : public MPITask -{ -public: - - TaskSendMPI(Exchange *exchange) : - MPITask(), - exchange(exchange) - { - - } - - virtual void init() + template + class TaskSendMPI : public MPITask { - this->request = Environment::get().EnvironmentController() - .getCommunicator().startSend( - exchange->getExchangeType(), - (char*) exchange->getHostBuffer().getPointer(), - exchange->getHostBuffer().getCurrentSize() * sizeof (TYPE), - exchange->getCommunicationTag()); - } - - bool executeIntern() - { - if (this->isFinished()) - return true; + public: + TaskSendMPI(Exchange* exchange) : MPITask(), exchange(exchange) + { + } - if (this->request == nullptr) - throw std::runtime_error("request was nullptr (call executeIntern after freed"); + virtual void init() + { + Buffer* src = exchange->getCommunicationBuffer(); - int flag=0; - MPI_CHECK(MPI_Test(this->request, &flag, &(this->status))); + this->request = Environment::get().EnvironmentController().getCommunicator().startSend( + exchange->getExchangeType(), + reinterpret_cast(src->getPointer()), + src->getCurrentSize() * sizeof(TYPE), + exchange->getCommunicationTag()); + } - if (flag) //finished + bool executeIntern() { - delete this->request; - this->request = nullptr; - this->setFinished(); - return true; + if(this->isFinished()) + return true; + + if(this->request == nullptr) + throw std::runtime_error("request was nullptr (call executeIntern after freed"); + + int flag = 0; + MPI_CHECK(MPI_Test(this->request, &flag, &(this->status))); + + if(flag) // finished + { + delete this->request; + this->request = nullptr; + this->setFinished(); + return true; + } + return false; } - return false; - } - virtual ~TaskSendMPI() - { - notify(this->myId, SENDFINISHED, nullptr); - } - - void event(id_t, EventType, IEventData*) - { - - } + virtual ~TaskSendMPI() + { + notify(this->myId, SENDFINISHED, nullptr); + } - std::string toString() - { - return "TaskSendMPI"; - } + void event(id_t, EventType, IEventData*) + { + } -private: - Exchange *exchange; - MPI_Request *request; - MPI_Status status; -}; + std::string toString() + { + return std::string("TaskSendMPI exchange type=") + std::to_string(exchange->getExchangeType()); + } -} //namespace pmacc + private: + Exchange* exchange; + MPI_Request* request; + MPI_Status status; + }; +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskSetCurrentSizeOnDevice.hpp b/include/pmacc/eventSystem/tasks/TaskSetCurrentSizeOnDevice.hpp index 3e3a9a3c42..59e7e2ca80 100644 --- a/include/pmacc/eventSystem/tasks/TaskSetCurrentSizeOnDevice.hpp +++ b/include/pmacc/eventSystem/tasks/TaskSetCurrentSizeOnDevice.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -32,76 +32,62 @@ namespace pmacc { - -struct KernelSetValueOnDeviceMemory -{ - template< typename T_Acc > - DINLINE void operator()(const T_Acc&, size_t* pointer, const size_t size) const - { - *pointer = size; - } -}; - -template -class DeviceBuffer; - -template -class TaskSetCurrentSizeOnDevice : public StreamTask -{ -public: - - TaskSetCurrentSizeOnDevice(DeviceBuffer& dst, size_t size) : - StreamTask(), - size(size) - { - this->destination = & dst; - } - - virtual ~TaskSetCurrentSizeOnDevice() + struct KernelSetValueOnDeviceMemory { - notify(this->myId, SETVALUE, nullptr); - } - - virtual void init() - { - setSize(); - } - - bool executeIntern() + template + DINLINE void operator()(const T_Acc&, size_t* pointer, const size_t size) const + { + *pointer = size; + } + }; + + template + class DeviceBuffer; + + template + class TaskSetCurrentSizeOnDevice : public StreamTask { - return isFinished(); - } - - void event(id_t, EventType, IEventData*) - { - } - - std::string toString() - { - return "TaskSetCurrentSizeOnDevice"; - } - -private: - - void setSize() - { - auto sizePtr = destination->getCurrentSizeOnDevicePointer(); - CUPLA_KERNEL( KernelSetValueOnDeviceMemory )( - 1, - 1, - 0, - this->getCudaStream() - )( - sizePtr, - size - ); - - activate(); - } - - DeviceBuffer *destination; - const size_t size; -}; - -} //namespace pmacc - + public: + TaskSetCurrentSizeOnDevice(DeviceBuffer& dst, size_t size) : StreamTask(), size(size) + { + this->destination = &dst; + } + + virtual ~TaskSetCurrentSizeOnDevice() + { + notify(this->myId, SETVALUE, nullptr); + } + + virtual void init() + { + setSize(); + } + + bool executeIntern() + { + return isFinished(); + } + + void event(id_t, EventType, IEventData*) + { + } + + std::string toString() + { + return "TaskSetCurrentSizeOnDevice"; + } + + private: + void setSize() + { + auto sizePtr = destination->getCurrentSizeOnDevicePointer(); + CUPLA_KERNEL(KernelSetValueOnDeviceMemory)(1, 1, 0, this->getCudaStream())(sizePtr, size); + + activate(); + } + + DeviceBuffer* destination; + const size_t size; + }; + +} // namespace pmacc diff --git a/include/pmacc/eventSystem/tasks/TaskSetValue.hpp b/include/pmacc/eventSystem/tasks/TaskSetValue.hpp index a69bc2a193..4e97483724 100644 --- a/include/pmacc/eventSystem/tasks/TaskSetValue.hpp +++ b/include/pmacc/eventSystem/tasks/TaskSetValue.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Benjamin Worpitz * * This file is part of PMacc. @@ -37,329 +37,260 @@ #include - - namespace pmacc { -namespace taskSetValueHelper -{ - -/** define access operation for non-pointer types - */ -template -struct Value -{ - typedef const T_Type type; - - HDINLINE type& operator()(type& v) const + namespace taskSetValueHelper { - return v; - } -}; + /** define access operation for non-pointer types + */ + template + struct Value + { + typedef const T_Type type; -/** define access operation for pointer types - * - * access first element of a pointer - */ -template -struct Value -{ - typedef const T_Type PtrType; - typedef const typename boost::remove_pointer::type type; + HDINLINE type& operator()(type& v) const + { + return v; + } + }; + + /** define access operation for pointer types + * + * access first element of a pointer + */ + template + struct Value + { + typedef const T_Type PtrType; + typedef const typename boost::remove_pointer::type type; - HDINLINE type& operator()(PtrType v) const - { - return *v; - } -}; + HDINLINE type& operator()(PtrType v) const + { + return *v; + } + }; -/** Get access to a value from a pointer or reference with the same method - */ -template -HDINLINE typename Value::value >::type& -getValue(T_Type& value) -{ - typedef Value::value > Functor; - return Functor()(value); -} + /** Get access to a value from a pointer or reference with the same method + */ + template + HDINLINE typename Value::value>::type& getValue(T_Type& value) + { + typedef Value::value> Functor; + return Functor()(value); + } -} + } // namespace taskSetValueHelper -/** set a value to all elements of a box - * - * @tparam T_numWorkers number of workers - * @tparam T_xChunkSize number of elements in x direction to prepare with one cuda block - */ -template< - uint32_t T_numWorkers, - uint32_t T_xChunkSize -> -struct KernelSetValue -{ - /** set value to all elements + /** set a value to all elements of a box * - * @tparam T_DataBox pmacc::DataBox, type of the memory box - * @tparam T_ValueType type of the value - * @tparam T_SizeVecType pmacc::math::Vector, index type - * @tparam T_Acc alpaka accelerator type - * - * @param memBox box of which all elements shall be set to value - * @param value value to set to all elements of memBox - * @param size extents of memBox + * @tparam T_numWorkers number of workers + * @tparam T_xChunkSize number of elements in x direction to prepare with one cupla block */ - template< - typename T_DataBox, - typename T_ValueType, - typename T_SizeVecType, - typename T_Acc - > - DINLINE void - operator()( - T_Acc const & acc, - T_DataBox & memBox, - T_ValueType const & value, - T_SizeVecType const & size - ) const + template + struct KernelSetValue { - using namespace mappings::threads; - using SizeVecType = T_SizeVecType; - - SizeVecType const blockIndex( blockIdx ); - SizeVecType blockSize( SizeVecType::create( 1 ) ); - blockSize.x( ) = T_xChunkSize; - - constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; - - ForEachIdx< - IdxConfig< - T_xChunkSize, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - auto virtualWorkerIdx( SizeVecType::create( 0 ) ); - virtualWorkerIdx.x( ) = linearIdx; + /** set value to all elements + * + * @tparam T_DataBox pmacc::DataBox, type of the memory box + * @tparam T_ValueType type of the value + * @tparam T_SizeVecType pmacc::math::Vector, index type + * @tparam T_Acc alpaka accelerator type + * + * @param memBox box of which all elements shall be set to value + * @param value value to set to all elements of memBox + * @param size extents of memBox + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_DataBox& memBox, + T_ValueType const& value, + T_SizeVecType const& size) const + { + using namespace mappings::threads; + using SizeVecType = T_SizeVecType; - SizeVecType const idx( blockSize * blockIndex + virtualWorkerIdx ); - if( idx.x() < size.x() ) - memBox( idx ) = taskSetValueHelper::getValue( value ); - } - ); - } -}; + SizeVecType const blockIndex(cupla::blockIdx(acc)); + SizeVecType blockSize(SizeVecType::create(1)); + blockSize.x() = T_xChunkSize; -template -class DeviceBuffer; + constexpr uint32_t numWorkers = T_numWorkers; + uint32_t const workerIdx = cupla::threadIdx(acc).x; -/** Set all cells of a GridBuffer on the device to a given value - * - * T_ValueType = data type (e.g. float, float2) - * T_dim = dimension of the GridBuffer - * T_isSmallValue = true if T_ValueType can be send via kernel parameter (on cuda T_ValueType must be smaller than 256 byte) - */ -template -class TaskSetValue; + ForEachIdx>{workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + auto virtualWorkerIdx(SizeVecType::create(0)); + virtualWorkerIdx.x() = linearIdx; -template -class TaskSetValueBase : public StreamTask -{ -public: - typedef T_ValueType ValueType; - static constexpr uint32_t dim = T_dim; + SizeVecType const idx(blockSize * blockIndex + virtualWorkerIdx); + if(idx.x() < size.x()) + memBox(idx) = taskSetValueHelper::getValue(value); + }); + } + }; - TaskSetValueBase(DeviceBuffer& dst, const ValueType& value) : - StreamTask(), - value(value) - { - this->destination = &dst; - } + template + class DeviceBuffer; - virtual ~TaskSetValueBase() + /** Set all cells of a GridBuffer on the device to a given value + * + * T_ValueType = data type (e.g. float, float2) + * T_dim = dimension of the GridBuffer + * T_isSmallValue = true if T_ValueType can be send via kernel parameter (on cupla T_ValueType must be smaller than + * 256 byte) + */ + template + class TaskSetValue; + + template + class TaskSetValueBase : public StreamTask { - notify(this->myId, SETVALUE, nullptr); + public: + typedef T_ValueType ValueType; + static constexpr uint32_t dim = T_dim; - } + TaskSetValueBase(DeviceBuffer& dst, const ValueType& value) : StreamTask(), value(value) + { + this->destination = &dst; + } - virtual void init() = 0; + virtual ~TaskSetValueBase() + { + notify(this->myId, SETVALUE, nullptr); + } - bool executeIntern() - { - return isFinished(); - } + virtual void init() = 0; - void event(id_t, EventType, IEventData*) - { - } + bool executeIntern() + { + return isFinished(); + } -protected: + void event(id_t, EventType, IEventData*) + { + } - std::string toString() + protected: + std::string toString() + { + return "TaskSetValue"; + } + + DeviceBuffer* destination; + ValueType value; + }; + + /** implementation for small values (<= 256byte) + */ + template + class TaskSetValue : public TaskSetValueBase { - return "TaskSetValue"; - } + public: + typedef T_ValueType ValueType; + static constexpr uint32_t dim = T_dim; - DeviceBuffer *destination; - ValueType value; -}; + TaskSetValue(DeviceBuffer& dst, const ValueType& value) + : TaskSetValueBase(dst, value) + { + } -/** implementation for small values (<= 256byte) - */ -template -class TaskSetValue : public TaskSetValueBase -{ -public: - typedef T_ValueType ValueType; - static constexpr uint32_t dim = T_dim; + virtual ~TaskSetValue() + { + } - TaskSetValue(DeviceBuffer& dst, const ValueType& value) : - TaskSetValueBase(dst, value) - { - } + virtual void init() + { + // number of elements in destination + size_t const current_size = this->destination->getCurrentSize(); + // n-dimensional size of destination based on `current_size` + DataSpace const area_size(this->destination->getCurrentDataSpace(current_size)); - virtual ~TaskSetValue() - { + if(area_size.productOfComponents() != 0) + { + auto gridSize = area_size; - } + /* number of elements in x direction used to chunk the destination buffer + * for block parallel processing + */ + constexpr uint32_t xChunkSize = 256; + constexpr uint32_t numWorkers = traits::GetNumWorkers::value; - virtual void init() - { - // number of elements in destination - size_t const current_size = this->destination->getCurrentSize( ); - // n-dimensional size of destination based on `current_size` - DataSpace< dim > const area_size( this->destination->getCurrentDataSpace( current_size ) ); + // number of blocks in x direction + gridSize.x() = ceil(static_cast(gridSize.x()) / static_cast(xChunkSize)); - if( area_size.productOfComponents() != 0 ) - { - auto gridSize = area_size; - - /* number of elements in x direction used to chunk the destination buffer - * for block parallel processing - */ - constexpr uint32_t xChunkSize = 256; - constexpr uint32_t numWorkers = traits::GetNumWorkers< - xChunkSize - >::value; - - // number of blocks in x direction - gridSize.x() = ceil( - static_cast< double >( gridSize.x( ) ) / - static_cast< double >( xChunkSize ) - ); - - auto destBox = this->destination->getDataBox( ); - CUPLA_KERNEL( - KernelSetValue< - numWorkers, - xChunkSize - > - )( - gridSize.toDim3(), - numWorkers, - 0, - this->getCudaStream( ) - )( - destBox, - this->value, - area_size - ); + auto destBox = this->destination->getDataBox(); + CUPLA_KERNEL(KernelSetValue) + (gridSize.toDim3(), numWorkers, 0, this->getCudaStream())(destBox, this->value, area_size); + } + this->activate(); } - this->activate( ); - } -}; - -/** implementation for big values (>256 byte) - * - * This class uses CUDA memcopy to copy an instance of T_ValueType to the GPU - * and runs a kernel which assigns this value to all cells. - */ -template -class TaskSetValue : public TaskSetValueBase -{ -public: - typedef T_ValueType ValueType; - static constexpr uint32_t dim = T_dim; + }; - TaskSetValue(DeviceBuffer& dst, const ValueType& value) : - TaskSetValueBase(dst, value), valuePointer_host(nullptr) + /** implementation for big values (>256 byte) + * + * This class uses CUDA memcopy to copy an instance of T_ValueType to the GPU + * and runs a kernel which assigns this value to all cells. + */ + template + class TaskSetValue : public TaskSetValueBase { - } + public: + typedef T_ValueType ValueType; + static constexpr uint32_t dim = T_dim; - virtual ~TaskSetValue() - { - if (valuePointer_host != nullptr) + TaskSetValue(DeviceBuffer& dst, const ValueType& value) + : TaskSetValueBase(dst, value) + , valuePointer_host(nullptr) { - CUDA_CHECK_NO_EXCEPT(cudaFreeHost(valuePointer_host)); - valuePointer_host = nullptr; } - } - void init() - { - size_t current_size = this->destination->getCurrentSize(); - const DataSpace area_size(this->destination->getCurrentDataSpace(current_size)); - if(area_size.productOfComponents() != 0) + virtual ~TaskSetValue() { - auto gridSize = area_size; - - /* number of elements in x direction used to chunk the destination buffer - * for block parallel processing - */ - constexpr int xChunkSize = 256; - constexpr uint32_t numWorkers = traits::GetNumWorkers< - xChunkSize - >::value; - - // number of blocks in x direction - gridSize.x() = ceil( - static_cast< double >( gridSize.x( ) ) / - static_cast< double >( xChunkSize ) - ); - - ValueType* devicePtr = this->destination->getPointer(); - - CUDA_CHECK( cudaMallocHost( - (void**)&valuePointer_host, - sizeof( ValueType ) - )); - *valuePointer_host = this->value; //copy value to new place - - CUDA_CHECK( cudaMemcpyAsync( - devicePtr, - valuePointer_host, - sizeof( ValueType ), - cudaMemcpyHostToDevice, - this->getCudaStream( ) - )); - - auto destBox = this->destination->getDataBox( ); - CUPLA_KERNEL( - KernelSetValue< - numWorkers, - xChunkSize - > - )( - gridSize.toDim3(), - numWorkers, - 0, - this->getCudaStream() - )( - destBox, - devicePtr, - area_size - ); + if(valuePointer_host != nullptr) + { + CUDA_CHECK_NO_EXCEPT(cuplaFreeHost(valuePointer_host)); + valuePointer_host = nullptr; + } } - this->activate(); - } + void init() + { + size_t current_size = this->destination->getCurrentSize(); + const DataSpace area_size(this->destination->getCurrentDataSpace(current_size)); + if(area_size.productOfComponents() != 0) + { + auto gridSize = area_size; + + /* number of elements in x direction used to chunk the destination buffer + * for block parallel processing + */ + constexpr int xChunkSize = 256; + constexpr uint32_t numWorkers = traits::GetNumWorkers::value; + + // number of blocks in x direction + gridSize.x() = ceil(static_cast(gridSize.x()) / static_cast(xChunkSize)); + + ValueType* devicePtr = this->destination->getPointer(); -private: - ValueType *valuePointer_host; + CUDA_CHECK(cuplaMallocHost((void**) &valuePointer_host, sizeof(ValueType))); + *valuePointer_host = this->value; // copy value to new place + + CUDA_CHECK(cuplaMemcpyAsync( + devicePtr, + valuePointer_host, + sizeof(ValueType), + cuplaMemcpyHostToDevice, + this->getCudaStream())); + + auto destBox = this->destination->getDataBox(); + CUPLA_KERNEL(KernelSetValue) + (gridSize.toDim3(), numWorkers, 0, this->getCudaStream())(destBox, devicePtr, area_size); + } + + this->activate(); + } -}; + private: + ValueType* valuePointer_host; + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/transactions/Transaction.hpp b/include/pmacc/eventSystem/transactions/Transaction.hpp index 37ab193467..33aa581f2b 100644 --- a/include/pmacc/eventSystem/transactions/Transaction.hpp +++ b/include/pmacc/eventSystem/transactions/Transaction.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -26,54 +26,51 @@ namespace pmacc { - -class EventStream; - -/** - * Represents a single transaction in the task/event synchronization system. - */ -class Transaction -{ -public: - - /** - * Constructor. - * - * @param event initial EventTask for base event - */ - HINLINE Transaction(EventTask event); + class EventStream; /** - * Adds event to the base event of this transaction. - * - * @param event EventTask to add to base event - * @return new base event + * Represents a single transaction in the task/event synchronization system. */ - HINLINE EventTask setTransactionEvent(const EventTask& event); + class Transaction + { + public: + /** + * Constructor. + * + * @param event initial EventTask for base event + */ + HINLINE Transaction(EventTask event); - /** - * Returns the current base event. - * - * @return current base event - */ - HINLINE EventTask getTransactionEvent(); + /** + * Adds event to the base event of this transaction. + * + * @param event EventTask to add to base event + * @return new base event + */ + HINLINE EventTask setTransactionEvent(const EventTask& event); - /** - * Performs an operation on the transaction which leads to synchronization. - * - * @param operation type of operation to perform, defines resulting synchronization. - */ - HINLINE void operation(ITask::TaskType operation); + /** + * Returns the current base event. + * + * @return current base event + */ + HINLINE EventTask getTransactionEvent(); - /* Get a EventStream which include all dependencies - * @param operation type of operation to perform - * @return EventStream with solved dependencies - */ - HINLINE EventStream* getEventStream(ITask::TaskType operation); + /** + * Performs an operation on the transaction which leads to synchronization. + * + * @param operation type of operation to perform, defines resulting synchronization. + */ + HINLINE void operation(ITask::TaskType operation); -private: - EventTask baseEvent; -}; + /* Get a EventStream which include all dependencies + * @param operation type of operation to perform + * @return EventStream with solved dependencies + */ + HINLINE EventStream* getEventStream(ITask::TaskType operation); -} + private: + EventTask baseEvent; + }; +} // namespace pmacc diff --git a/include/pmacc/eventSystem/transactions/Transaction.tpp b/include/pmacc/eventSystem/transactions/Transaction.tpp index 51738a2e04..1f1f0c6d10 100644 --- a/include/pmacc/eventSystem/transactions/Transaction.tpp +++ b/include/pmacc/eventSystem/transactions/Transaction.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -29,60 +29,58 @@ namespace pmacc { + Transaction::Transaction(EventTask event) : baseEvent(event) + { + } -Transaction::Transaction( EventTask event ) : baseEvent( event ) -{ - -} - -EventTask Transaction::setTransactionEvent( const EventTask& event ) -{ - baseEvent += event; - return baseEvent; -} - -EventTask Transaction::getTransactionEvent( ) -{ - return baseEvent; -} + EventTask Transaction::setTransactionEvent(const EventTask& event) + { + baseEvent += event; + return baseEvent; + } -void Transaction::operation( ITask::TaskType operation ) -{ - if ( operation == ITask::TASK_CUDA ) + EventTask Transaction::getTransactionEvent() { - Manager &manager = Environment<>::get( ).Manager( ); + return baseEvent; + } - ITask* baseTask = manager.getITaskIfNotFinished( this->baseEvent.getTaskId( ) ); - if ( baseTask != nullptr ) + void Transaction::operation(ITask::TaskType operation) + { + if(operation == ITask::TASK_DEVICE) { - if ( baseTask->getTaskType( ) == ITask::TASK_CUDA ) + Manager& manager = Environment<>::get().Manager(); + + ITask* baseTask = manager.getITaskIfNotFinished(this->baseEvent.getTaskId()); + if(baseTask != nullptr) { - /* no blocking is needed */ - return; + if(baseTask->getTaskType() == ITask::TASK_DEVICE) + { + /* no blocking is needed */ + return; + } } } + baseEvent.waitForFinished(); } - baseEvent.waitForFinished( ); -} -EventStream* Transaction::getEventStream( ITask::TaskType ) -{ - Manager &manager = Environment<>::get( ).Manager( ); - ITask* baseTask = manager.getITaskIfNotFinished( this->baseEvent.getTaskId( ) ); - - if ( baseTask != nullptr ) + EventStream* Transaction::getEventStream(ITask::TaskType) { - if ( baseTask->getTaskType( ) == ITask::TASK_CUDA ) + Manager& manager = Environment<>::get().Manager(); + ITask* baseTask = manager.getITaskIfNotFinished(this->baseEvent.getTaskId()); + + if(baseTask != nullptr) { - /* `StreamTask` from previous task must be reused to guarantee - * that the dependency chain not brake - */ - StreamTask* task = static_cast ( baseTask ); - return task->getEventStream( ); + if(baseTask->getTaskType() == ITask::TASK_DEVICE) + { + /* `StreamTask` from previous task must be reused to guarantee + * that the dependency chain not brake + */ + StreamTask* task = static_cast(baseTask); + return task->getEventStream(); + } + baseEvent.waitForFinished(); } - baseEvent.waitForFinished( ); + return Environment<>::get().StreamController().getNextStream(); } - return Environment<>::get( ).StreamController( ).getNextStream( ); -} -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/eventSystem/transactions/TransactionManager.hpp b/include/pmacc/eventSystem/transactions/TransactionManager.hpp index 1ce012b40c..ed7f6e6a77 100644 --- a/include/pmacc/eventSystem/transactions/TransactionManager.hpp +++ b/include/pmacc/eventSystem/transactions/TransactionManager.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -28,78 +28,74 @@ namespace pmacc { -// forward declaration -template -class Environment; + // forward declaration + template + class Environment; -class EventStream; - -/** - * Manages the task/event synchronization system using task 'transactions'. - * Transactions are grouped on a stack. - */ -class TransactionManager -{ -public: - /** - * Destructor. - */ - virtual ~TransactionManager() /*noexcept(false)*/; - - /** - * Adds a new transaction to the stack. - * - * @param serialEvent initial base event for new transaction - */ - void startTransaction(EventTask serialEvent = EventTask()); + class EventStream; /** - * Removes the top-most transaction from the stack. - * - * @return the base event of the removed transaction + * Manages the task/event synchronization system using task 'transactions'. + * Transactions are grouped on a stack. */ - EventTask endTransaction(); - - /** - * Synchronizes a blocking operation with events on the top-most transaction. - * - * @param op operation type for synchronization - * @return an EventStream which can be used for StreamTasks - */ - void startOperation(ITask::TaskType op); - - /** - * Adds event to the base event of the top-most transaction. - * - * @param event event to add to base event - * @return new base event - */ - EventTask setTransactionEvent(const EventTask& event); - - /** - * Returns the base event of the top-most transaction. - * - * @return base event - */ - EventTask getTransactionEvent(); - - EventStream* getEventStream(ITask::TaskType op); - -private: - - friend struct detail::Environment; - - TransactionManager(); - - TransactionManager(const TransactionManager& cc); - - static TransactionManager& getInstance(); - - std::stack transactions; -}; - - -} - - - + class TransactionManager + { + public: + /** + * Destructor. + */ + virtual ~TransactionManager() /*noexcept(false)*/; + + /** + * Adds a new transaction to the stack. + * + * @param serialEvent initial base event for new transaction + */ + void startTransaction(EventTask serialEvent = EventTask()); + + /** + * Removes the top-most transaction from the stack. + * + * @return the base event of the removed transaction + */ + EventTask endTransaction(); + + /** + * Synchronizes a blocking operation with events on the top-most transaction. + * + * @param op operation type for synchronization + * @return an EventStream which can be used for StreamTasks + */ + void startOperation(ITask::TaskType op); + + /** + * Adds event to the base event of the top-most transaction. + * + * @param event event to add to base event + * @return new base event + */ + EventTask setTransactionEvent(const EventTask& event); + + /** + * Returns the base event of the top-most transaction. + * + * @return base event + */ + EventTask getTransactionEvent(); + + EventStream* getEventStream(ITask::TaskType op); + + private: + friend struct detail::Environment; + + TransactionManager(); + + TransactionManager(const TransactionManager& cc); + + static TransactionManager& getInstance(); + + std::stack transactions; + }; + + +} // namespace pmacc diff --git a/include/pmacc/eventSystem/transactions/TransactionManager.tpp b/include/pmacc/eventSystem/transactions/TransactionManager.tpp index 32e33f18f6..6cbc0bc881 100644 --- a/include/pmacc/eventSystem/transactions/TransactionManager.tpp +++ b/include/pmacc/eventSystem/transactions/TransactionManager.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -28,80 +28,78 @@ namespace pmacc { - -inline TransactionManager::~TransactionManager() /*noexcept(false)*/ -{ - if(transactions.size() == 0) - std::cerr << "[PMacc] [TransactionManager] " - << "Missing transaction on the stack!" << std::endl; - else if(transactions.size() > 1) - std::cerr << "[PMacc] [TransactionManager] " - << "Unfinished transactions on the stack" << std::endl; - transactions.pop( ); -} - -inline TransactionManager::TransactionManager( ) -{ - startTransaction( EventTask( ) ); -} - -inline TransactionManager::TransactionManager( const TransactionManager& ) -{ - -} - -inline void TransactionManager::startTransaction( EventTask serialEvent ) -{ - transactions.push( Transaction( serialEvent ) ); -} - -inline EventTask TransactionManager::endTransaction( ) -{ - if ( transactions.size( ) == 0 ) - throw std::runtime_error( "Calling endTransaction on empty transaction stack is not allowed" ); - - EventTask event = transactions.top( ).getTransactionEvent( ); - transactions.pop( ); - return event; -} - -inline void TransactionManager::startOperation( ITask::TaskType op ) -{ - if ( transactions.size( ) == 0 ) - throw std::runtime_error( "Calling startOperation on empty transaction stack is not allowed" ); - - transactions.top( ).operation( op ); -} - -inline EventStream* TransactionManager::getEventStream( ITask::TaskType op ) -{ - if ( transactions.size( ) == 0 ) - throw std::runtime_error( "Calling startOperation on empty transaction stack is not allowed" ); - - return transactions.top( ).getEventStream( op ); -} - -inline EventTask TransactionManager::setTransactionEvent( const EventTask& event ) -{ - if ( transactions.size( ) == 0 ) - throw std::runtime_error( "Calling setTransactionEvent on empty transaction stack is not allowed" ); - - return transactions.top( ).setTransactionEvent( event ); -} - -inline EventTask TransactionManager::getTransactionEvent( ) -{ - if ( transactions.size( ) == 0 ) - throw std::runtime_error( "Calling getTransactionEvent on empty transaction stack is not allowed" ); - - return transactions.top( ).getTransactionEvent( ); -} - -inline TransactionManager& TransactionManager::getInstance( ) -{ - static TransactionManager instance; - return instance; -} - - -} + inline TransactionManager::~TransactionManager() /*noexcept(false)*/ + { + if(transactions.size() == 0) + std::cerr << "[PMacc] [TransactionManager] " + << "Missing transaction on the stack!" << std::endl; + else if(transactions.size() > 1) + std::cerr << "[PMacc] [TransactionManager] " + << "Unfinished transactions on the stack" << std::endl; + transactions.pop(); + } + + inline TransactionManager::TransactionManager() + { + startTransaction(EventTask()); + } + + inline TransactionManager::TransactionManager(const TransactionManager&) + { + } + + inline void TransactionManager::startTransaction(EventTask serialEvent) + { + transactions.push(Transaction(serialEvent)); + } + + inline EventTask TransactionManager::endTransaction() + { + if(transactions.size() == 0) + throw std::runtime_error("Calling endTransaction on empty transaction stack is not allowed"); + + EventTask event = transactions.top().getTransactionEvent(); + transactions.pop(); + return event; + } + + inline void TransactionManager::startOperation(ITask::TaskType op) + { + if(transactions.size() == 0) + throw std::runtime_error("Calling startOperation on empty transaction stack is not allowed"); + + transactions.top().operation(op); + } + + inline EventStream* TransactionManager::getEventStream(ITask::TaskType op) + { + if(transactions.size() == 0) + throw std::runtime_error("Calling startOperation on empty transaction stack is not allowed"); + + return transactions.top().getEventStream(op); + } + + inline EventTask TransactionManager::setTransactionEvent(const EventTask& event) + { + if(transactions.size() == 0) + throw std::runtime_error("Calling setTransactionEvent on empty transaction stack is not allowed"); + + return transactions.top().setTransactionEvent(event); + } + + inline EventTask TransactionManager::getTransactionEvent() + { + if(transactions.size() == 0) + throw std::runtime_error("Calling getTransactionEvent on empty transaction stack is not allowed"); + + return transactions.top().getTransactionEvent(); + } + + inline TransactionManager& TransactionManager::getInstance() + { + static TransactionManager instance; + return instance; + } + + +} // namespace pmacc diff --git a/include/pmacc/fields/SimulationFieldHelper.hpp b/include/pmacc/fields/SimulationFieldHelper.hpp index a074ae3cc0..7ea42322a5 100644 --- a/include/pmacc/fields/SimulationFieldHelper.hpp +++ b/include/pmacc/fields/SimulationFieldHelper.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -28,37 +28,37 @@ namespace pmacc { + template + class SimulationFieldHelper + { + public: + typedef CellDescription MappingDesc; -template -class SimulationFieldHelper -{ -public: - - typedef CellDescription MappingDesc; - - SimulationFieldHelper(CellDescription description) : - cellDescription(description) - {} + SimulationFieldHelper(CellDescription description) : cellDescription(description) + { + } - virtual ~SimulationFieldHelper(){} + virtual ~SimulationFieldHelper() + { + } - /** - * Reset is as well used for init. - */ - virtual void reset(uint32_t currentStep) = 0; + /** + * Reset is as well used for init. + */ + virtual void reset(uint32_t currentStep) = 0; - /** - * Synchronize data from host to device. - */ - virtual void syncToDevice() = 0; + /** + * Synchronize data from host to device. + */ + virtual void syncToDevice() = 0; - CellDescription getCellDescription() const - { - return cellDescription; - } + CellDescription getCellDescription() const + { + return cellDescription; + } -protected: - CellDescription cellDescription; -}; + protected: + CellDescription cellDescription; + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/fields/operations/AddExchangeToBorder.hpp b/include/pmacc/fields/operations/AddExchangeToBorder.hpp index 30088c2887..c1349af50e 100644 --- a/include/pmacc/fields/operations/AddExchangeToBorder.hpp +++ b/include/pmacc/fields/operations/AddExchangeToBorder.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten, * Benjamin Worpitz * * This file is part of PMacc. @@ -34,208 +34,167 @@ namespace pmacc { -namespace fields -{ -namespace operations -{ - - /** Add field values from a received temporary buffer (exchange) to the local box (border) - * - * @tparam T_numWorkers number of workers - */ - template< uint32_t T_numWorkers > - struct KernelAddExchangeToBorder + namespace fields { - /** add intermediate box to the border of the local box - * - * The `template< typename T> operator+( T const & rhs )` must be defined for - * the value type of exchangeBox and destBox. - * - * @tparam T_DestBox pmacc::DataBox, type of the local box - * @tparam T_ExchangeBox pmacc::ExchangeBox, type of the intermediate box - * @tparam T_Extent pmacc::DataSpace, type to describe n-dimensional sizes - * @tparam T_Mapping mapper functor type - * - * @param destBox box to a local field - * @param exchangeBox exchange box with guard data from the neighboring GPU - * @param exchangeSize dimensions of exchangeBox - * @param direction the direction of exchangeBox - * @param mapper functor to map a CUDA block to a supercell - */ - template< - typename T_DestBox, - typename T_ExchangeBox, - typename T_Extent, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_DestBox & destBox, - T_ExchangeBox const & exchangeBox, - T_Extent const & exchangeSize, - T_Extent const & direction, - T_Mapping const & mapper - ) const + namespace operations { - using namespace mappings::threads; - - using SuperCellSize = typename T_Mapping::SuperCellSize; - - // number of cells in a superCell - constexpr uint32_t numCells = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; - PMACC_CONSTEXPR_CAPTURE int dim = T_Mapping::Dim; - - uint32_t const workerIdx = threadIdx.x; - - DataSpace< dim > const blockCell( - mapper.getSuperCellIndex( DataSpace< dim >( blockIdx ) ) - * SuperCellSize::toRT() - ); - - // origin in area from local GPU - DataSpace< dim > nullSourceCell( - mapper.getSuperCellIndex( DataSpace< dim > () ) - * SuperCellSize::toRT() - ); - - auto const numGuardSuperCells = mapper.getGuardingSuperCells(); - - ForEachIdx< - IdxConfig< - numCells, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) + /** Add field values from a received temporary buffer (exchange) to the local box (border) + * + * @tparam T_numWorkers number of workers + */ + template + struct KernelAddExchangeToBorder + { + /** add intermediate box to the border of the local box + * + * The `template< typename T> operator+( T const & rhs )` must be defined for + * the value type of exchangeBox and destBox. + * + * @tparam T_DestBox pmacc::DataBox, type of the local box + * @tparam T_ExchangeBox pmacc::ExchangeBox, type of the intermediate box + * @tparam T_Extent pmacc::DataSpace, type to describe n-dimensional sizes + * @tparam T_Mapping mapper functor type + * + * @param destBox box to a local field + * @param exchangeBox exchange box with guard data from the neighboring GPU + * @param exchangeSize dimensions of exchangeBox + * @param direction the direction of exchangeBox + * @param mapper functor to map a CUDA block to a supercell + */ + template< + typename T_DestBox, + typename T_ExchangeBox, + typename T_Extent, + typename T_Mapping, + typename T_Acc> + DINLINE void operator()( + T_Acc const& acc, + T_DestBox& destBox, + T_ExchangeBox const& exchangeBox, + T_Extent const& exchangeSize, + T_Extent const& direction, + T_Mapping const& mapper) const { - // cell index within the superCell - DataSpace< dim > const cellIdx = DataSpaceOperations< dim >::template map< SuperCellSize >( linearIdx ); - DataSpace< dim > targetCell( blockCell + cellIdx ); - DataSpace< dim > sourceCell( targetCell - nullSourceCell ); + using namespace mappings::threads; - // supercell offset relative to the guard origin (in cells) - DataSpace< dim > superCellOffsetInGuard( ( sourceCell / SuperCellSize::toRT() ) * SuperCellSize::toRT() ); + using SuperCellSize = typename T_Mapping::SuperCellSize; - /* defines if the virtual worker needs to add the value from - * the exchange box to the cell in the border - */ - bool addValue = true; + // number of cells in a superCell + constexpr uint32_t numCells = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; + PMACC_CONSTEXPR_CAPTURE int dim = T_Mapping::Dim; - for( uint32_t d = 0; d < dim; ++d ) - { - if( direction[ d ] == 1 ) - { - if( - superCellOffsetInGuard[ d ] + cellIdx[ d ] < - numGuardSuperCells[ d ] * SuperCellSize::toRT()[ d ] - exchangeSize[ d ] - ) - addValue = false; - sourceCell[ d ] -= numGuardSuperCells[ d ] * SuperCellSize::toRT()[ d ] - exchangeSize[ d ]; - targetCell[ d ] -= numGuardSuperCells[ d ] * SuperCellSize::toRT()[ d ]; - } - else if( direction[ d ] == -1 ) + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + DataSpace const blockCell( + mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))) * SuperCellSize::toRT()); + + // origin in area from local GPU + DataSpace nullSourceCell(mapper.getSuperCellIndex(DataSpace()) * SuperCellSize::toRT()); + + auto const numGuardSuperCells = mapper.getGuardingSuperCells(); + + ForEachIdx>{ + workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + // cell index within the superCell + DataSpace const cellIdx + = DataSpaceOperations::template map(linearIdx); + DataSpace targetCell(blockCell + cellIdx); + DataSpace sourceCell(targetCell - nullSourceCell); + + // supercell offset relative to the guard origin (in cells) + DataSpace superCellOffsetInGuard( + (sourceCell / SuperCellSize::toRT()) * SuperCellSize::toRT()); + + /* defines if the virtual worker needs to add the value from + * the exchange box to the cell in the border + */ + bool addValue = true; + + for(uint32_t d = 0; d < dim; ++d) { - if( superCellOffsetInGuard[ d ] + cellIdx[ d ] >= exchangeSize[ d ] ) - addValue = false; - targetCell[ d ] += numGuardSuperCells[ d ] * SuperCellSize::toRT()[ d ]; + if(direction[d] == 1) + { + if(superCellOffsetInGuard[d] + cellIdx[d] + < numGuardSuperCells[d] * SuperCellSize::toRT()[d] - exchangeSize[d]) + addValue = false; + sourceCell[d] -= numGuardSuperCells[d] * SuperCellSize::toRT()[d] - exchangeSize[d]; + targetCell[d] -= numGuardSuperCells[d] * SuperCellSize::toRT()[d]; + } + else if(direction[d] == -1) + { + if(superCellOffsetInGuard[d] + cellIdx[d] >= exchangeSize[d]) + addValue = false; + targetCell[d] += numGuardSuperCells[d] * SuperCellSize::toRT()[d]; + } } - } - if( addValue ) - destBox( targetCell ) += exchangeBox( sourceCell ); + if(addValue) + destBox(targetCell) += exchangeBox(sourceCell); + }); } - ); - } - }; + }; - /** add a exchange buffer to the border of the local buffer - * - * CopyGuardToExchange is the opposite operation for the neighboring - * device to create an exchange which can be added with this functor. - */ - struct AddExchangeToBorder - { - /** add exchange to border of the local buffer - * - * Add data cell-wise from the exchange to the border of the local buffer. - * The `template< typename T> operator+( T const & rhs )` must be defined for - * the value type of the buffer. - * - * @tparam T_DestBuffer pmacc::GridBuffer, type of the used buffer - * @tparam T_SuperCellSize pmacc::math::CT::vector, size of the supercell in each direction - * - * @param destBuffer destination buffer with exchanges - * @param superCellSize compile time supercell size - * @param exchangeType the exchange direction which needs to be copied - */ - template< - typename T_DestBuffer, - typename T_SuperCellSize - > - void operator()( - T_DestBuffer & destBuffer, - T_SuperCellSize const & superCellSize, - uint32_t const exchangeType - ) const - { - boost::ignore_unused( superCellSize ); + /** add a exchange buffer to the border of the local buffer + * + * CopyGuardToExchange is the opposite operation for the neighboring + * device to create an exchange which can be added with this functor. + */ + struct AddExchangeToBorder + { + /** add exchange to border of the local buffer + * + * Add data cell-wise from the exchange to the border of the local buffer. + * The `template< typename T> operator+( T const & rhs )` must be defined for + * the value type of the buffer. + * + * @tparam T_DestBuffer pmacc::GridBuffer, type of the used buffer + * @tparam T_SuperCellSize pmacc::math::CT::vector, size of the supercell in each direction + * + * @param destBuffer destination buffer with exchanges + * @param superCellSize compile time supercell size + * @param exchangeType the exchange direction which needs to be copied + */ + template + void operator()( + T_DestBuffer& destBuffer, + T_SuperCellSize const& superCellSize, + uint32_t const exchangeType) const + { + boost::ignore_unused(superCellSize); - using SuperCellSize = T_SuperCellSize; + using SuperCellSize = T_SuperCellSize; - constexpr int dim = T_SuperCellSize::dim; + constexpr int dim = T_SuperCellSize::dim; - using MappingDesc = MappingDescription< - dim, - SuperCellSize - >; + using MappingDesc = MappingDescription; - /* use only the x dimension to determine the number of supercells in the GUARD - * - * @warning pmacc restriction: all dimension must have the some number of guarding - * supercells - */ - auto const numGuardSuperCells = destBuffer.getGridLayout().getGuard() / - SuperCellSize::toRT(); - - MappingDesc const mappingDesc( - destBuffer.getGridLayout().getDataSpace(), - numGuardSuperCells - ); - - ExchangeMapping< - GUARD, - MappingDesc - > mapper( - mappingDesc, - exchangeType - ); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - const DataSpace< dim > direction = Mask::getRelativeDirections< dim >( - mapper.getExchangeType( ) - ); - - PMACC_KERNEL( KernelAddExchangeToBorder< numWorkers >{ } )( - mapper.getGridDim( ), - numWorkers - )( - destBuffer.getDeviceBuffer( ).getDataBox( ), - destBuffer.getReceiveExchange( exchangeType ).getDeviceBuffer( ).getDataBox( ), - destBuffer.getReceiveExchange( exchangeType ).getDeviceBuffer( ).getDataSpace( ), - direction, - mapper - ); - } - }; - -} // namespace operations -} // namespace fields + /* use only the x dimension to determine the number of supercells in the GUARD + * + * @warning pmacc restriction: all dimension must have the some number of guarding + * supercells + */ + auto const numGuardSuperCells = destBuffer.getGridLayout().getGuard() / SuperCellSize::toRT(); + + MappingDesc const mappingDesc(destBuffer.getGridLayout().getDataSpace(), numGuardSuperCells); + + ExchangeMapping mapper(mappingDesc, exchangeType); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + const DataSpace direction = Mask::getRelativeDirections(mapper.getExchangeType()); + + PMACC_KERNEL(KernelAddExchangeToBorder{}) + (mapper.getGridDim(), numWorkers)( + destBuffer.getDeviceBuffer().getDataBox(), + destBuffer.getReceiveExchange(exchangeType).getDeviceBuffer().getDataBox(), + destBuffer.getReceiveExchange(exchangeType).getDeviceBuffer().getDataSpace(), + direction, + mapper); + } + }; + + } // namespace operations + } // namespace fields } // namespace pmacc diff --git a/include/pmacc/fields/operations/CopyGuardToExchange.hpp b/include/pmacc/fields/operations/CopyGuardToExchange.hpp index 374cf82310..9d1e56797e 100644 --- a/include/pmacc/fields/operations/CopyGuardToExchange.hpp +++ b/include/pmacc/fields/operations/CopyGuardToExchange.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Marco Garten, * Benjamin Worpitz * * This file is part of PMacc. @@ -34,198 +34,157 @@ namespace pmacc { -namespace fields -{ -namespace operations -{ - - /** copy guarding cells to an intermediate buffer - * - * @tparam T_numWorkers number of workers - */ - template< uint32_t T_numWorkers > - struct KernelCopyGuardToExchange + namespace fields { - /** copy guarding cells to an intermediate box - * - * @tparam T_ExchangeBox pmacc::ExchangeBox, type of the intermediate box - * @tparam T_SrcBox pmacc::DataBox, type of the local box - * @tparam T_Extent pmacc::DataSpace, type to describe n-dimensional sizes - * @tparam T_Mapping mapper functor type - * - * @param exchangeBox exchange box for the guard data of the local GPU - * @param srcBox box to a local field - * @param exchangeSize dimensions of exchangeBox - * @param direction the direction of exchangeBox - * @param mapper functor to map a CUDA block to a supercell - */ - template< - typename T_ExchangeBox, - typename T_SrcBox, - typename T_Extent, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_ExchangeBox & exchangeBox, - T_SrcBox const & srcBox, - T_Extent const & exchangeSize, - T_Extent const & direction, - T_Mapping const & mapper - ) const + namespace operations { - using namespace mappings::threads; - - using SuperCellSize = typename T_Mapping::SuperCellSize; - - // number of cells in a superCell - constexpr uint32_t numCells = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; - PMACC_CONSTEXPR_CAPTURE int dim = T_Mapping::Dim; - - uint32_t const workerIdx = threadIdx.x; - - DataSpace< dim > const blockCell( - mapper.getSuperCellIndex( DataSpace< dim >( blockIdx ) ) * - SuperCellSize::toRT() - ); - - // origin in area from local GPU - DataSpace< dim > nullSourceCell( - mapper.getSuperCellIndex( DataSpace< dim > () ) * - SuperCellSize::toRT() - ); - - auto const numGuardSuperCells = mapper.getGuardingSuperCells(); - - ForEachIdx< - IdxConfig< - numCells, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) + /** copy guarding cells to an intermediate buffer + * + * @tparam T_numWorkers number of workers + */ + template + struct KernelCopyGuardToExchange + { + /** copy guarding cells to an intermediate box + * + * @tparam T_ExchangeBox pmacc::ExchangeBox, type of the intermediate box + * @tparam T_SrcBox pmacc::DataBox, type of the local box + * @tparam T_Extent pmacc::DataSpace, type to describe n-dimensional sizes + * @tparam T_Mapping mapper functor type + * + * @param exchangeBox exchange box for the guard data of the local GPU + * @param srcBox box to a local field + * @param exchangeSize dimensions of exchangeBox + * @param direction the direction of exchangeBox + * @param mapper functor to map a CUDA block to a supercell + */ + template< + typename T_ExchangeBox, + typename T_SrcBox, + typename T_Extent, + typename T_Mapping, + typename T_Acc> + DINLINE void operator()( + T_Acc const& acc, + T_ExchangeBox& exchangeBox, + T_SrcBox const& srcBox, + T_Extent const& exchangeSize, + T_Extent const& direction, + T_Mapping const& mapper) const { - // cell index within the superCell - DataSpace< dim > const cellIdx = DataSpaceOperations< dim >::template map< SuperCellSize >( linearIdx ); + using namespace mappings::threads; - DataSpace< T_Mapping::Dim > const sourceCell( blockCell + cellIdx ); - DataSpace< dim > targetCell( sourceCell - nullSourceCell ); + using SuperCellSize = typename T_Mapping::SuperCellSize; - // supercell offset relative to the guard origin (in cells) - DataSpace< dim > superCellOffsetInGuard( ( targetCell / SuperCellSize::toRT() ) * SuperCellSize::toRT() ); + // number of cells in a superCell + constexpr uint32_t numCells = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; + PMACC_CONSTEXPR_CAPTURE int dim = T_Mapping::Dim; - /* defines if the virtual worker needs to copy the value of - * the cell to to the exchange box - */ - bool copyValue = true; + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + DataSpace const blockCell( + mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))) * SuperCellSize::toRT()); + + // origin in area from local GPU + DataSpace nullSourceCell(mapper.getSuperCellIndex(DataSpace()) * SuperCellSize::toRT()); - for( uint32_t d = 0; d < dim; ++d ) - { - if( direction[ d ] == -1 ) + auto const numGuardSuperCells = mapper.getGuardingSuperCells(); + + ForEachIdx>{ + workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + // cell index within the superCell + DataSpace const cellIdx + = DataSpaceOperations::template map(linearIdx); + + DataSpace const sourceCell(blockCell + cellIdx); + DataSpace targetCell(sourceCell - nullSourceCell); + + // supercell offset relative to the guard origin (in cells) + DataSpace superCellOffsetInGuard( + (targetCell / SuperCellSize::toRT()) * SuperCellSize::toRT()); + + /* defines if the virtual worker needs to copy the value of + * the cell to to the exchange box + */ + bool copyValue = true; + + for(uint32_t d = 0; d < dim; ++d) { - if( - superCellOffsetInGuard[ d ] + cellIdx[ d ] < - numGuardSuperCells[ d ] * SuperCellSize::toRT()[ d ] - exchangeSize[ d ] - ) + if(direction[d] == -1) + { + if(superCellOffsetInGuard[d] + cellIdx[d] + < numGuardSuperCells[d] * SuperCellSize::toRT()[d] - exchangeSize[d]) + copyValue = false; + targetCell[d] -= numGuardSuperCells[d] * SuperCellSize::toRT()[d] - exchangeSize[d]; + } + else if(direction[d] == 1 && superCellOffsetInGuard[d] + cellIdx[d] >= exchangeSize[d]) copyValue = false; - targetCell[ d ] -= numGuardSuperCells[ d ] * SuperCellSize::toRT()[ d ] - exchangeSize[ d ]; } - else if( - direction[d] == 1 && superCellOffsetInGuard[ d ] + cellIdx[ d ] >= - exchangeSize[d] - ) - copyValue = false; - } - - if( copyValue ) - exchangeBox( targetCell ) = srcBox( sourceCell ); + + if(copyValue) + exchangeBox(targetCell) = srcBox(sourceCell); + }); } - ); - } - }; - - /** copy guard of the local buffer to the exchange buffer - * - * AddExchangeToBorder is the opposite operation for the neighboring - * device to add the exchange buffer to the local field. - */ - struct CopyGuardToExchange - { - /** copy local guard to exchange buffer - * - * Copy data cell-wise from the guard of the local to the exchange buffer. - * - * @tparam T_SrcBuffer pmacc::GridBuffer, type of the used buffer - * @tparam T_SuperCellSize pmacc::math::CT::vector, size of the supercell in each direction - * - * @param srcBuffer source buffer with exchanges - * @param superCellSize compile time supercell size - * @param exchangeType the exchange direction which needs to be copied - */ - template< - typename T_SrcBuffer, - typename T_SuperCellSize - > - void operator()( - T_SrcBuffer & srcBuffer, - T_SuperCellSize const & superCellSize, - uint32_t const exchangeType - ) const - { - boost::ignore_unused( superCellSize ); + }; - using SuperCellSize = T_SuperCellSize; + /** copy guard of the local buffer to the exchange buffer + * + * AddExchangeToBorder is the opposite operation for the neighboring + * device to add the exchange buffer to the local field. + */ + struct CopyGuardToExchange + { + /** copy local guard to exchange buffer + * + * Copy data cell-wise from the guard of the local to the exchange buffer. + * + * @tparam T_SrcBuffer pmacc::GridBuffer, type of the used buffer + * @tparam T_SuperCellSize pmacc::math::CT::vector, size of the supercell in each direction + * + * @param srcBuffer source buffer with exchanges + * @param superCellSize compile time supercell size + * @param exchangeType the exchange direction which needs to be copied + */ + template + void operator()( + T_SrcBuffer& srcBuffer, + T_SuperCellSize const& superCellSize, + uint32_t const exchangeType) const + { + boost::ignore_unused(superCellSize); - constexpr int dim = T_SuperCellSize::dim; + using SuperCellSize = T_SuperCellSize; - using MappingDesc = MappingDescription< - dim, - SuperCellSize - >; + constexpr int dim = T_SuperCellSize::dim; - /* use only the x dimension to determine the number of supercells in the guard - * pmacc restriction: all dimension must have the some number of guarding - * supercells. - */ - auto const numGuardSuperCells = srcBuffer.getGridLayout().getGuard() / - SuperCellSize::toRT(); - - MappingDesc const mappingDesc( - srcBuffer.getGridLayout().getDataSpace(), - numGuardSuperCells - ); - - ExchangeMapping< - GUARD, - MappingDesc - > mapper( mappingDesc, exchangeType ); - - DataSpace< dim > const direction = Mask::getRelativeDirections< dim >( - mapper.getExchangeType( ) - ); - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( KernelCopyGuardToExchange< numWorkers >{ } )( - mapper.getGridDim( ), - numWorkers - )( - srcBuffer.getSendExchange( exchangeType ).getDeviceBuffer( ).getDataBox( ), - srcBuffer.getDeviceBuffer( ).getDataBox( ), - srcBuffer.getSendExchange( exchangeType ).getDeviceBuffer( ).getDataSpace( ), - direction, - mapper - ); - } - }; - -} // namespace operations -} // namespace fields + using MappingDesc = MappingDescription; + + /* use only the x dimension to determine the number of supercells in the guard + * pmacc restriction: all dimension must have the some number of guarding + * supercells. + */ + auto const numGuardSuperCells = srcBuffer.getGridLayout().getGuard() / SuperCellSize::toRT(); + + MappingDesc const mappingDesc(srcBuffer.getGridLayout().getDataSpace(), numGuardSuperCells); + + ExchangeMapping mapper(mappingDesc, exchangeType); + + DataSpace const direction = Mask::getRelativeDirections(mapper.getExchangeType()); + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(KernelCopyGuardToExchange{}) + (mapper.getGridDim(), numWorkers)( + srcBuffer.getSendExchange(exchangeType).getDeviceBuffer().getDataBox(), + srcBuffer.getDeviceBuffer().getDataBox(), + srcBuffer.getSendExchange(exchangeType).getDeviceBuffer().getDataSpace(), + direction, + mapper); + } + }; + + } // namespace operations + } // namespace fields } // namespace pmacc diff --git a/include/pmacc/fields/tasks/FieldFactory.hpp b/include/pmacc/fields/tasks/FieldFactory.hpp index 7d7df44659..a2c6804c5a 100644 --- a/include/pmacc/fields/tasks/FieldFactory.hpp +++ b/include/pmacc/fields/tasks/FieldFactory.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -28,7 +28,6 @@ namespace pmacc { - /** * Singleton Factory-pattern class for creation of several types of EventTasks. * Tasks are not actually 'returned' but immediately initialised and @@ -37,34 +36,34 @@ namespace pmacc class FieldFactory { public: - /** * Creates a TaskReceive. * @param ex Exchange to create new TaskReceive with * @param task_out returns the newly created task - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ template - EventTask createTaskFieldReceiveAndInsert(Field &buffer, - ITask *registeringTask = nullptr); + EventTask createTaskFieldReceiveAndInsert(Field& buffer, ITask* registeringTask = nullptr); template - EventTask createTaskFieldReceiveAndInsertExchange(Field &buffer, uint32_t exchange, - ITask *registeringTask = nullptr); + EventTask createTaskFieldReceiveAndInsertExchange( + Field& buffer, + uint32_t exchange, + ITask* registeringTask = nullptr); /** * Creates a TaskSend. * @param ex Exchange to create new TaskSend with * @param task_in TaskReceive to register at new TaskSend - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ template - EventTask createTaskFieldSend(Field &buffer, - ITask *registeringTask = nullptr); + EventTask createTaskFieldSend(Field& buffer, ITask* registeringTask = nullptr); template - EventTask createTaskFieldSendExchange(Field &buffer, uint32_t exchange, - ITask *registeringTask = nullptr); + EventTask createTaskFieldSendExchange(Field& buffer, uint32_t exchange, ITask* registeringTask = nullptr); /** * returns the instance of this factory @@ -77,14 +76,11 @@ namespace pmacc } private: + FieldFactory(){}; - FieldFactory() { }; - - FieldFactory(const FieldFactory&) { }; - + FieldFactory(const FieldFactory&){}; }; -} //namespace pmacc +} // namespace pmacc #include "pmacc/fields/tasks/FieldFactory.tpp" - diff --git a/include/pmacc/fields/tasks/FieldFactory.tpp b/include/pmacc/fields/tasks/FieldFactory.tpp index 6c27b90a29..cc4a68c0e4 100644 --- a/include/pmacc/fields/tasks/FieldFactory.tpp +++ b/include/pmacc/fields/tasks/FieldFactory.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -32,47 +32,44 @@ namespace pmacc { - template - inline EventTask FieldFactory::createTaskFieldReceiveAndInsert(Field &buffer, - ITask *registeringTask) + inline EventTask FieldFactory::createTaskFieldReceiveAndInsert(Field& buffer, ITask* registeringTask) { - TaskFieldReceiveAndInsert* task = new TaskFieldReceiveAndInsert (buffer); + TaskFieldReceiveAndInsert* task = new TaskFieldReceiveAndInsert(buffer); return Environment<>::get().Factory().startTask(*task, registeringTask); } template - inline EventTask FieldFactory::createTaskFieldReceiveAndInsertExchange(Field &buffer, uint32_t exchange, - ITask *registeringTask) + inline EventTask FieldFactory::createTaskFieldReceiveAndInsertExchange( + Field& buffer, + uint32_t exchange, + ITask* registeringTask) { - TaskFieldReceiveAndInsertExchange* task = new TaskFieldReceiveAndInsertExchange (buffer, exchange); + TaskFieldReceiveAndInsertExchange* task + = new TaskFieldReceiveAndInsertExchange(buffer, exchange); return Environment<>::get().Factory().startTask(*task, registeringTask); } template - inline EventTask FieldFactory::createTaskFieldSend(Field &buffer, - ITask *registeringTask) + inline EventTask FieldFactory::createTaskFieldSend(Field& buffer, ITask* registeringTask) { - TaskFieldSend* task = new TaskFieldSend (buffer); + TaskFieldSend* task = new TaskFieldSend(buffer); return Environment<>::get().Factory().startTask(*task, registeringTask); } template - inline EventTask FieldFactory::createTaskFieldSendExchange(Field &buffer, uint32_t exchange, - ITask *registeringTask) + inline EventTask FieldFactory::createTaskFieldSendExchange( + Field& buffer, + uint32_t exchange, + ITask* registeringTask) { - TaskFieldSendExchange* task = new TaskFieldSendExchange (buffer, exchange); + TaskFieldSendExchange* task = new TaskFieldSendExchange(buffer, exchange); return Environment<>::get().Factory().startTask(*task, registeringTask); } - -} //namespace pmacc - - - - +} // namespace pmacc diff --git a/include/pmacc/fields/tasks/TaskFieldReceiveAndInsert.hpp b/include/pmacc/fields/tasks/TaskFieldReceiveAndInsert.hpp index 02b57946ec..fd929c9c01 100644 --- a/include/pmacc/fields/tasks/TaskFieldReceiveAndInsert.hpp +++ b/include/pmacc/fields/tasks/TaskFieldReceiveAndInsert.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -32,114 +32,107 @@ namespace pmacc { - -template -class TaskFieldReceiveAndInsert : public MPITask -{ -public: - - - static constexpr uint32_t Dim = picongpu::simDim; - - TaskFieldReceiveAndInsert(Field &buffer) : - m_buffer(buffer), - m_state(Constructor) + template + class TaskFieldReceiveAndInsert : public MPITask { - } + public: + static constexpr uint32_t Dim = picongpu::simDim; - virtual void init() - { - m_state = Init; - EventTask serialEvent = __getTransactionEvent(); + TaskFieldReceiveAndInsert(Field& buffer) : m_buffer(buffer), m_state(Constructor) + { + } - for (uint32_t i = 1; i < traits::NumberOfExchanges::value; ++i) + virtual void init() { - if (m_buffer.getGridBuffer().hasReceiveExchange(i)) + m_state = Init; + EventTask serialEvent = __getTransactionEvent(); + + for(uint32_t i = 1; i < traits::NumberOfExchanges::value; ++i) { - __startTransaction(serialEvent); - FieldFactory::getInstance().createTaskFieldReceiveAndInsertExchange(m_buffer, i); - m_tmpEvent += __endTransaction(); + if(m_buffer.getGridBuffer().hasReceiveExchange(i)) + { + __startTransaction(serialEvent); + FieldFactory::getInstance().createTaskFieldReceiveAndInsertExchange(m_buffer, i); + m_tmpEvent += __endTransaction(); + } } + m_state = WaitForReceived; } - m_state = WaitForReceived; - } - bool executeIntern() - { - switch (m_state) + bool executeIntern() { - case Init: - break; - case WaitForReceived: - if (nullptr == Environment<>::get().Manager().getITaskIfNotFinished(m_tmpEvent.getTaskId())) - { - m_state = Insert; - } - break; - case Insert: - m_state = Wait; - __startTransaction(); - for (uint32_t i = 1; i < traits::NumberOfExchanges::value; ++i) + switch(m_state) { - if (m_buffer.getGridBuffer().hasReceiveExchange(i)) + case Init: + break; + case WaitForReceived: + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(m_tmpEvent.getTaskId())) { - m_buffer.insertField(i); + m_state = Insert; } - } - m_tmpEvent = __endTransaction(); - m_state = WaitInsertFinished; - break; - case Wait: - break; - case WaitInsertFinished: - if (nullptr == Environment<>::get().Manager().getITaskIfNotFinished(m_tmpEvent.getTaskId())) - { - m_state = Finish; + break; + case Insert: + m_state = Wait; + __startTransaction(); + for(uint32_t i = 1; i < traits::NumberOfExchanges::value; ++i) + { + if(m_buffer.getGridBuffer().hasReceiveExchange(i)) + { + m_buffer.insertField(i); + } + } + m_tmpEvent = __endTransaction(); + m_state = WaitInsertFinished; + break; + case Wait: + break; + case WaitInsertFinished: + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(m_tmpEvent.getTaskId())) + { + m_state = Finish; + return true; + } + break; + case Finish: return true; + default: + return false; } - break; - case Finish: - return true; - default: + return false; } - return false; - } + virtual ~TaskFieldReceiveAndInsert() + { + notify(this->myId, RECVFINISHED, nullptr); + } - virtual ~TaskFieldReceiveAndInsert() - { - notify(this->myId, RECVFINISHED, nullptr); - } + void event(id_t, EventType, IEventData*) + { + } - void event(id_t, EventType, IEventData*) - { - } + std::string toString() + { + return "TaskFieldReceiveAndInsert"; + } - std::string toString() - { - return "TaskFieldReceiveAndInsert"; - } + private: + enum state_t + { + Constructor, + Init, + Wait, + Insert, + WaitInsertFinished, + WaitForReceived, + Finish -private: + }; - enum state_t - { - Constructor, - Init, - Wait, - Insert, - WaitInsertFinished, - WaitForReceived, - Finish + Field& m_buffer; + state_t m_state; + EventTask m_tmpEvent; }; - - Field& m_buffer; - state_t m_state; - EventTask m_tmpEvent; - -}; - -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/fields/tasks/TaskFieldReceiveAndInsertExchange.hpp b/include/pmacc/fields/tasks/TaskFieldReceiveAndInsertExchange.hpp index 8151e77d38..248a7d5c68 100644 --- a/include/pmacc/fields/tasks/TaskFieldReceiveAndInsertExchange.hpp +++ b/include/pmacc/fields/tasks/TaskFieldReceiveAndInsertExchange.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -28,89 +28,81 @@ #include "pmacc/eventSystem/events/EventDataReceive.hpp" - namespace pmacc { - -template -class TaskFieldReceiveAndInsertExchange : public MPITask -{ -public: - - TaskFieldReceiveAndInsertExchange(Field &buffer, uint32_t exchange) : - m_buffer(buffer), - m_exchange(exchange), - m_state(Constructor), - initDependency(__getTransactionEvent()) + template + class TaskFieldReceiveAndInsertExchange : public MPITask { - } + public: + TaskFieldReceiveAndInsertExchange(Field& buffer, uint32_t exchange) + : m_buffer(buffer) + , m_exchange(exchange) + , m_state(Constructor) + , initDependency(__getTransactionEvent()) + { + } - virtual void init() - { - m_state = Init; - initDependency = m_buffer.getGridBuffer().asyncReceive(initDependency, m_exchange); - m_state = WaitForReceive; - } + virtual void init() + { + m_state = Init; + initDependency = m_buffer.getGridBuffer().asyncReceive(initDependency, m_exchange); + m_state = WaitForReceive; + } - bool executeIntern() - { - switch (m_state) + bool executeIntern() { - case Init: - break; - case WaitForReceive: - if (nullptr == Environment<>::get().Manager().getITaskIfNotFinished(initDependency.getTaskId())) + switch(m_state) { - m_state = Finished; + case Init: + break; + case WaitForReceive: + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(initDependency.getTaskId())) + { + m_state = Finished; + return true; + } + break; + case Finished: return true; + default: + return false; } - break; - case Finished: - return true; - default: + return false; } - return false; - } + virtual ~TaskFieldReceiveAndInsertExchange() + { + notify(this->myId, RECVFINISHED, nullptr); + } - virtual ~TaskFieldReceiveAndInsertExchange() - { - notify(this->myId, RECVFINISHED, nullptr); - } + void event(id_t, EventType, IEventData*) + { + } - void event(id_t, EventType, IEventData*) - { - } + std::string toString() + { + std::ostringstream stateNumber; + stateNumber << m_state; + return std::string("TaskFieldReceiveAndInsertExchange/") + stateNumber.str(); + } - std::string toString() - { - std::ostringstream stateNumber; - stateNumber << m_state; - return std::string("TaskFieldReceiveAndInsertExchange/") + stateNumber.str(); - } + private: + enum state_t + { + Constructor, + Init, + WaitForReceive, + Finished -private: + }; - enum state_t - { - Constructor, - Init, - WaitForReceive, - Finished + Field& m_buffer; + state_t m_state; + EventTask insertEvent; + EventTask initDependency; + uint32_t m_exchange; }; - - - - Field& m_buffer; - state_t m_state; - EventTask insertEvent; - EventTask initDependency; - uint32_t m_exchange; -}; - -} //namespace pmacc - - +} // namespace pmacc diff --git a/include/pmacc/fields/tasks/TaskFieldSend.hpp b/include/pmacc/fields/tasks/TaskFieldSend.hpp index a132d4f6fd..adb951e91c 100644 --- a/include/pmacc/fields/tasks/TaskFieldSend.hpp +++ b/include/pmacc/fields/tasks/TaskFieldSend.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -31,29 +31,27 @@ namespace pmacc { - template class TaskFieldSend : public MPITask { public: - enum { Dim = picongpu::simDim }; - TaskFieldSend(Field &buffer) : - m_buffer(buffer), - m_state(Constructor) { } + TaskFieldSend(Field& buffer) : m_buffer(buffer), m_state(Constructor) + { + } virtual void init() { m_state = Init; EventTask serialEvent = __getTransactionEvent(); - for (uint32_t i = 1; i < traits::NumberOfExchanges::value; ++i) + for(uint32_t i = 1; i < traits::NumberOfExchanges::value; ++i) { - if (m_buffer.getGridBuffer().hasSendExchange(i)) + if(m_buffer.getGridBuffer().hasSendExchange(i)) { __startTransaction(serialEvent); FieldFactory::getInstance().createTaskFieldSendExchange(m_buffer, i); @@ -65,14 +63,14 @@ namespace pmacc bool executeIntern() { - switch (m_state) + switch(m_state) { - case Init: - break; - case WaitForSend: - return nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId()); - default: - return false; + case Init: + break; + case WaitForSend: + return nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId()); + default: + return false; } return false; @@ -83,7 +81,9 @@ namespace pmacc notify(this->myId, SENDFINISHED, nullptr); } - void event(id_t, EventType, IEventData*) { } + void event(id_t, EventType, IEventData*) + { + } std::string toString() { @@ -91,7 +91,6 @@ namespace pmacc } private: - enum state_t { Constructor, @@ -106,4 +105,4 @@ namespace pmacc EventTask tmpEvent; }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/fields/tasks/TaskFieldSendExchange.hpp b/include/pmacc/fields/tasks/TaskFieldSendExchange.hpp index b13bf3af25..bb4feb7f3f 100644 --- a/include/pmacc/fields/tasks/TaskFieldSendExchange.hpp +++ b/include/pmacc/fields/tasks/TaskFieldSendExchange.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -28,20 +28,17 @@ #include "pmacc/eventSystem/events/EventDataReceive.hpp" - namespace pmacc { - template class TaskFieldSendExchange : public MPITask { public: - - TaskFieldSendExchange(Field &buffer, uint32_t exchange) : - m_buffer(buffer), - m_exchange(exchange), - m_state(Constructor), - m_initDependency(__getTransactionEvent()) + TaskFieldSendExchange(Field& buffer, uint32_t exchange) + : m_buffer(buffer) + , m_exchange(exchange) + , m_state(Constructor) + , m_initDependency(__getTransactionEvent()) { } @@ -56,13 +53,13 @@ namespace pmacc bool executeIntern() { - switch (m_state) + switch(m_state) { case Init: break; case WaitForBash: - if (nullptr == Environment<>::get().Manager().getITaskIfNotFinished(m_initDependency.getTaskId()) ) + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(m_initDependency.getTaskId())) { m_state = InitSend; m_sendEvent = m_buffer.getGridBuffer().asyncSend(EventTask(), m_exchange); @@ -74,7 +71,7 @@ namespace pmacc case InitSend: break; case WaitForSendEnd: - if (nullptr == Environment<>::get().Manager().getITaskIfNotFinished(m_sendEvent.getTaskId())) + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(m_sendEvent.getTaskId())) { m_state = Finished; return true; @@ -104,7 +101,6 @@ namespace pmacc } private: - enum state_t { Constructor, @@ -124,5 +120,4 @@ namespace pmacc uint32_t m_exchange; }; -} //namespace pmacc - +} // namespace pmacc diff --git a/include/pmacc/filter/Interface.hpp b/include/pmacc/filter/Interface.hpp index c7e6c50760..7e6d832eed 100644 --- a/include/pmacc/filter/Interface.hpp +++ b/include/pmacc/filter/Interface.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -27,28 +27,20 @@ namespace pmacc { -namespace filter -{ - - /** Interface for a filter - * - * A filter is a functor which is evaluated to true or false depending - * on the input parameters. - * A filter can be used to decide e.g. if a particle is located in a user - * defined area or if an attribute is above a threshold. - * - * @tparam T_UserFunctor pmacc::functor::Interface, type of the functor (filter rule) - * @tparam T_numArguments number of arguments which must be supported by T_UserFunctor - */ - template< - typename T_UserFunctor, - uint32_t T_numArguments - > - using Interface = pmacc::functor::Interface< - T_UserFunctor, - T_numArguments, - bool - >; + namespace filter + { + /** Interface for a filter + * + * A filter is a functor which is evaluated to true or false depending + * on the input parameters. + * A filter can be used to decide e.g. if a particle is located in a user + * defined area or if an attribute is above a threshold. + * + * @tparam T_UserFunctor pmacc::functor::Interface, type of the functor (filter rule) + * @tparam T_numArguments number of arguments which must be supported by T_UserFunctor + */ + template + using Interface = pmacc::functor::Interface; -} // namespace filter + } // namespace filter } // namespace pmacc diff --git a/include/pmacc/filter/operators/And.hpp b/include/pmacc/filter/operators/And.hpp index 719f907810..107680e8f7 100644 --- a/include/pmacc/filter/operators/And.hpp +++ b/include/pmacc/filter/operators/And.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -24,42 +24,36 @@ namespace pmacc { -namespace filter -{ -namespace operators -{ - - //! combine all arguments by AND `&&` - struct And + namespace filter { - /** return a - * - * @param a a boolean value - * @return the input argument - */ - template< typename T_Arg > - HDINLINE bool - operator()( T_Arg const a ) const + namespace operators { - return a; - } + //! combine all arguments by AND `&&` + struct And + { + /** return a + * + * @param a a boolean value + * @return the input argument + */ + template + HDINLINE bool operator()(T_Arg const a) const + { + return a; + } - /** get AND combined result - * - * @param args arguments to combine - * @return AND combination of all arguments - */ - template< - typename T_Arg1, - typename ... T_Args - > - HDINLINE bool - operator()( T_Arg1 const a, T_Args const ... args ) const - { - return a && And{}( args ... ); - } - }; + /** get AND combined result + * + * @param args arguments to combine + * @return AND combination of all arguments + */ + template + HDINLINE bool operator()(T_Arg1 const a, T_Args const... args) const + { + return a && And{}(args...); + } + }; -} // namespace operators -} // namespace filter + } // namespace operators + } // namespace filter } // namespace pmacc diff --git a/include/pmacc/filter/operators/Or.hpp b/include/pmacc/filter/operators/Or.hpp index 13532b4ca9..fb6bca211c 100644 --- a/include/pmacc/filter/operators/Or.hpp +++ b/include/pmacc/filter/operators/Or.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -24,42 +24,36 @@ namespace pmacc { -namespace filter -{ -namespace operators -{ - - //! combine all arguments by OR `||` - struct Or + namespace filter { - /** return a - * - * @param a a boolean value - * @return the input argument - */ - template< typename T_Arg > - HDINLINE bool - operator()( T_Arg const a ) const + namespace operators { - return a; - } + //! combine all arguments by OR `||` + struct Or + { + /** return a + * + * @param a a boolean value + * @return the input argument + */ + template + HDINLINE bool operator()(T_Arg const a) const + { + return a; + } - /** get OR combined result - * - * @param args arguments to combine - * @return OR combination of all arguments - */ - template< - typename T_Arg1, - typename ... T_Args - > - HDINLINE bool - operator()( T_Arg1 const a, T_Args const ... args ) const - { - return a || Or{}( args ... ); - } - }; + /** get OR combined result + * + * @param args arguments to combine + * @return OR combination of all arguments + */ + template + HDINLINE bool operator()(T_Arg1 const a, T_Args const... args) const + { + return a || Or{}(args...); + } + }; -} // namespace operators -} // namespace filter + } // namespace operators + } // namespace filter } // namespace pmacc diff --git a/include/pmacc/functor/Filtered.hpp b/include/pmacc/functor/Filtered.hpp index 8e4503f06b..b4ff6644e6 100644 --- a/include/pmacc/functor/Filtered.hpp +++ b/include/pmacc/functor/Filtered.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -30,243 +30,145 @@ namespace pmacc { -namespace functor -{ -namespace acc -{ - - - /** interface to combine a filter and a functor on the accelerator - * - * @tparam T_FilterOperator pmacc::filter::operators, type concatenate the - * results of the filter - * @tparam T_Filter pmacc::filter::Interface, type of the filter - * @tparam T_Functor pmacc::functor::Interface, type of the functor - */ - template< - typename T_FilterOperator, - typename T_Filter, - typename T_Functor - > - struct Filtered : - private T_Filter, - public T_Functor + namespace functor { - using Filter = T_Filter; - using Functor = T_Functor; - - HDINLINE Filtered( - Filter const & filter, - Functor const & functor - ) : - Filter( filter ), - Functor( functor ) + namespace acc { - - } - - /** execute the functor depending of the filter result + /** interface to combine a filter and a functor on the accelerator + * + * @tparam T_FilterOperator pmacc::filter::operators, type concatenate the + * results of the filter + * @tparam T_Filter pmacc::filter::Interface, type of the filter + * @tparam T_Functor pmacc::functor::Interface, type of the functor + */ + template + struct Filtered + : private T_Filter + , public T_Functor + { + using Filter = T_Filter; + using Functor = T_Functor; + + HDINLINE Filtered(Filter const& filter, Functor const& functor) : Filter(filter), Functor(functor) + { + } + + /** execute the functor depending of the filter result + * + * Call the filter for each argument. If the combined result is true + * the user functor is called. + * + * @param args arguments passed to the functor if the filter results of + * each argument evaluate to true when combined + */ + template + HDINLINE auto operator()(T_Acc const& acc, T_Args&&... args) -> void + { + // call the filter on each argument and combine the results + bool const combinedResult = T_FilterOperator{}((*static_cast(this))(acc, args)...); + + if(combinedResult) + (*static_cast(this))(acc, args...); + } + }; + + } // namespace acc + + /** combine a filter and a functor * - * Call the filter for each argument. If the combined result is true - * the user functor is called. + * Creates a functor where each argument which is passed to + * the accelerator instance is evaluated by the filter and if the + * combined result is true the functor is executed. * - * @param args arguments passed to the functor if the filter results of - * each argument evaluate to true when combined + * @tparam T_FilterOperator pmacc::filter::operators, type concatenate the + * results of the filter + * @tparam T_Filter pmacc::filter::Interface, type of the filter + * @tparam T_Functor pmacc::functor::Interface, type of the functor */ - template< - typename T_Acc, - typename ... T_Args - > - HDINLINE auto operator( )( - T_Acc const & acc, - T_Args && ... args - ) - -> void - { - // call the filter on each argument and combine the results - bool const combinedResult = T_FilterOperator{ }( - ( *static_cast< Filter * >( this ) )( acc, args ) ... - ); - - if( combinedResult ) - ( *static_cast< Functor * >( this ) )( acc, args ... ); - } - }; - -} // namespace acc - - /** combine a filter and a functor - * - * Creates a functor where each argument which is passed to - * the accelerator instance is evaluated by the filter and if the - * combined result is true the functor is executed. - * - * @tparam T_FilterOperator pmacc::filter::operators, type concatenate the - * results of the filter - * @tparam T_Filter pmacc::filter::Interface, type of the filter - * @tparam T_Functor pmacc::functor::Interface, type of the functor - */ - template< - typename T_FilterOperator, - typename T_Filter, - typename T_Functor - > - struct Filtered; - - /** specialization of Filtered (with unary filter) - * - * This specialization can only be used if T_Filter is of the type pmacc::filter::Interface - * and T_Functor is of the type pmacc::functor::Interface. - * A unary filters means that each argument can only pass the same filter - * check before its results are combined. - */ - template< - typename T_FilterOperator, - typename T_Filter, - typename T_Functor, - uint32_t T_numFunctorArguments - - > - struct Filtered< - T_FilterOperator, - filter::Interface< - T_Filter, - 1u - >, - Interface< - T_Functor, - T_numFunctorArguments, - void - > - > : - private filter::Interface< - T_Filter, - 1u - >, - Interface< - T_Functor, - T_numFunctorArguments, - void - > - { + template + struct Filtered; - template< typename ... T_Params > - struct apply - { - using type = Filtered< - T_FilterOperator, - typename boost::mpl::apply< - T_Filter, - T_Params ... - >::type, - typename boost::mpl::apply< - T_Functor, - T_Params ... - >::type - >; - }; - - using Filter = filter::Interface< - T_Filter, - 1u - >; - using Functor = Interface< - T_Functor, - T_numFunctorArguments, - void - >; - - template< typename DeferFunctor = Functor > - HINLINE Filtered( uint32_t const currentStep ) : - Filter( currentStep ), - Functor( currentStep ) - { - } - - - /** create a filtered functor which can be used on the accelerator + /** specialization of Filtered (with unary filter) * - * @tparam T_OffsetType type to describe the size of a domain - * @tparam T_numWorkers number of workers - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param domainOffset offset to the origin of the local domain - * This can be e.g a supercell or cell offset and depends - * of the context where the interface is specialized. - * @param workerCfg configuration of the worker - * @return accelerator instance of the filtered functor + * This specialization can only be used if T_Filter is of the type pmacc::filter::Interface + * and T_Functor is of the type pmacc::functor::Interface. + * A unary filters means that each argument can only pass the same filter + * check before its results are combined. */ template< - typename T_OffsetType, - uint32_t T_numWorkers, - typename T_Acc - > - HDINLINE auto - operator( )( - T_Acc const & acc, - T_OffsetType const & domainOffset, - mappings::threads::WorkerCfg< T_numWorkers > const & workerCfg - ) const - -> acc::Filtered< - T_FilterOperator, - decltype( - alpaka::core::declval< Filter >( )( - acc, - domainOffset, - workerCfg - ) - ), - decltype( - alpaka::core::declval< Functor >( )( - acc, - domainOffset, - workerCfg - ) - ) - > - { - return acc::Filtered< - T_FilterOperator, - decltype( - alpaka::core::declval< Filter >( )( - acc, - domainOffset, - workerCfg - ) - ), - decltype( - alpaka::core::declval< Functor >( )( - acc, - domainOffset, - workerCfg - ) - ) - >( - ( *static_cast< Filter const * >( this ) )( - acc, - domainOffset, - workerCfg - ), - ( *static_cast< Functor const * >( this ) )( - acc, - domainOffset, - workerCfg - ) - ); - } + typename T_FilterOperator, + typename T_Filter, + typename T_Functor, + uint32_t T_numFunctorArguments - /** get name the of the filtered functor - * - * @return combination of the filter and functor name, the names are - * separated by an underscore `_` - */ - HINLINE std::string - getName( ) const + > + struct Filtered< + T_FilterOperator, + filter::Interface, + Interface> + : private filter::Interface + , Interface { - return Filter::getName( ) + std::string("_") + Functor::getName( ); - } - }; + template + struct apply + { + using type = Filtered< + T_FilterOperator, + typename boost::mpl::apply::type, + typename boost::mpl::apply::type>; + }; + + using Filter = filter::Interface; + using Functor = Interface; + + template + HINLINE Filtered(uint32_t const currentStep) : Filter(currentStep) + , Functor(currentStep) + { + } + + + /** create a filtered functor which can be used on the accelerator + * + * @tparam T_OffsetType type to describe the size of a domain + * @tparam T_numWorkers number of workers + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param domainOffset offset to the origin of the local domain + * This can be e.g a supercell or cell offset and depends + * of the context where the interface is specialized. + * @param workerCfg configuration of the worker + * @return accelerator instance of the filtered functor + */ + template + HDINLINE auto operator()( + T_Acc const& acc, + T_OffsetType const& domainOffset, + mappings::threads::WorkerCfg const& workerCfg) const + -> acc::Filtered< + T_FilterOperator, + decltype(alpaka::core::declval()(acc, domainOffset, workerCfg)), + decltype(alpaka::core::declval()(acc, domainOffset, workerCfg))> + { + return acc::Filtered< + T_FilterOperator, + decltype(alpaka::core::declval()(acc, domainOffset, workerCfg)), + decltype(alpaka::core::declval()(acc, domainOffset, workerCfg))>( + (*static_cast(this))(acc, domainOffset, workerCfg), + (*static_cast(this))(acc, domainOffset, workerCfg)); + } + + /** get name the of the filtered functor + * + * @return combination of the filter and functor name, the names are + * separated by an underscore `_` + */ + HINLINE std::string getName() const + { + return Filter::getName() + std::string("_") + Functor::getName(); + } + }; -} // namespace functor + } // namespace functor } // namespace pmacc diff --git a/include/pmacc/functor/Interface.hpp b/include/pmacc/functor/Interface.hpp index 785a6d6c5b..491ff66358 100644 --- a/include/pmacc/functor/Interface.hpp +++ b/include/pmacc/functor/Interface.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -31,219 +31,170 @@ namespace pmacc { -namespace functor -{ -namespace acc -{ -namespace detail -{ - /** Helper class to compare void with void - * - * std::is_same does not allow to use void as type. By wrapping the type before - * comparing, we can workaround this limitation. - * - * @tparam T_Type type to be wrapped - */ - template< typename T_Type > - struct VoidWrapper - { - - }; -} // namespace detail - - /** functor interface used on the accelerator side - * - * The user functor of the type T_UserFunctor must contain - * - the `operator()` with T_numArguments arguments and a return type T_ReturnType. - * - a copy constructor - * This interface is used to wrap the user functor to make sure that - * the required interface is fulfilled. - * - * @tparam T_UserFunctor user functor type - * @tparam T_numArguments number which must be supported by T_UserFunctor - * @tparam T_ReturnType required return type of T_UserFunctor - */ - template< - typename T_UserFunctor, - uint32_t T_numArguments, - typename T_ReturnType - > - struct Interface : public T_UserFunctor + namespace functor { - //! type of the user functor - using UserFunctor = T_UserFunctor; - - /** constructor - * - * @param functor user functor instance - */ - HDINLINE Interface( UserFunctor const & functor ) : - UserFunctor( functor ) - { - } - - /** execute the functor - * - * The number of arguments and the return type of the user functor are - * evaluated at compile-time and must be equal to the interface description. - * - * @tparam T_Args type of the arguments passed to the user functor - * - * @param args arguments passed to the user functor - * @return T_ReturnType - */ - template< - typename T_Acc, - typename ... T_Args - > - HDINLINE auto operator( )( - T_Acc const & acc, - T_Args && ... args ) - -> T_ReturnType + namespace acc { - /* check if the current used number of arguments to execute the - * functor is equal to the interface requirements + namespace detail + { + /** Helper class to compare void with void + * + * std::is_same does not allow to use void as type. By wrapping the type before + * comparing, we can workaround this limitation. + * + * @tparam T_Type type to be wrapped + */ + template + struct VoidWrapper + { + }; + } // namespace detail + + /** functor interface used on the accelerator side + * + * The user functor of the type T_UserFunctor must contain + * - the `operator()` with T_numArguments arguments and a return type T_ReturnType. + * - a copy constructor + * This interface is used to wrap the user functor to make sure that + * the required interface is fulfilled. + * + * @tparam T_UserFunctor user functor type + * @tparam T_numArguments number which must be supported by T_UserFunctor + * @tparam T_ReturnType required return type of T_UserFunctor */ - PMACC_CASSERT_MSG_TYPE( - __user_functor_has_wrong_number_of_arguments, - UserFunctor, - T_numArguments == sizeof...( args ) - ); - - // get the return type of the user functor - using UserFunctorReturnType = decltype( - alpaka::core::declval< UserFunctor >( )( acc, args ... ) - ); - - // compare user functor return type with the interface requirements - PMACC_CASSERT_MSG( - __wrong_user_functor_return_type, - std::is_same< - detail::VoidWrapper< UserFunctorReturnType >, - detail::VoidWrapper< T_ReturnType > - >::value - ); - return ( *static_cast< UserFunctor * >( this ) )( acc, args ... ); - } - }; - -} // namespace acc - - /** Interface for a user functor - * - * @tparam T_UserFunctor user functor type - * @tparam T_numArguments number of arguments which must be supported by T_UserFunctor - * @tparam T_ReturnType required return type of T_UserFunctor - */ - template< - typename T_UserFunctor, - uint32_t T_numArguments, - typename T_ReturnType - > - struct Interface : private T_UserFunctor - { - - //! type of the user functor - using UserFunctor = T_UserFunctor; - - /** constructor - * - * This constructor is only compiled if the user functor has - * a host side constructor with one (uint32_t) argument. + template + struct Interface : public T_UserFunctor + { + //! type of the user functor + using UserFunctor = T_UserFunctor; + + /** constructor + * + * @param functor user functor instance + */ + HDINLINE Interface(UserFunctor const& functor) : UserFunctor(functor) + { + } + + /** execute the functor + * + * The number of arguments and the return type of the user functor are + * evaluated at compile-time and must be equal to the interface description. + * + * @tparam T_Args type of the arguments passed to the user functor + * + * @param args arguments passed to the user functor + * @return T_ReturnType + */ + template + HDINLINE auto operator()(T_Acc const& acc, T_Args&&... args) -> T_ReturnType + { + /* check if the current used number of arguments to execute the + * functor is equal to the interface requirements + */ + PMACC_CASSERT_MSG_TYPE( + __user_functor_has_wrong_number_of_arguments, + UserFunctor, + T_numArguments == sizeof...(args)); + + // get the return type of the user functor + using UserFunctorReturnType = decltype(alpaka::core::declval()(acc, args...)); + + // compare user functor return type with the interface requirements + PMACC_CASSERT_MSG( + __wrong_user_functor_return_type, + std::is_same, detail::VoidWrapper>:: + value); + return (*static_cast(this))(acc, args...); + } + }; + + } // namespace acc + + /** Interface for a user functor * - * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable - * the constructor - * @param currentStep current simulation time step - * @param is used to enable/disable the constructor (do not pass any value to this parameter) + * @tparam T_UserFunctor user functor type + * @tparam T_numArguments number of arguments which must be supported by T_UserFunctor + * @tparam T_ReturnType required return type of T_UserFunctor */ - template< typename DeferFunctor = UserFunctor > - HINLINE Interface( - uint32_t const currentStep, - typename std::enable_if< - std::is_constructible< - DeferFunctor, - uint32_t - >::value - >::type* = 0 - ) : UserFunctor( currentStep ) + template + struct Interface : private T_UserFunctor { - } - - /** constructor - * - * This constructor is only compiled if the user functor has a default constructor. - * - * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable - * the constructor - * @param currentStep simulation time step - * @param is used to enable/disable the constructor (do not pass any value to this parameter) - */ - template< typename DeferFunctor = UserFunctor > - HINLINE Interface( - uint32_t const currentStep, - typename std::enable_if< - std::is_constructible< DeferFunctor >::value - >::type* = 0 - ) : UserFunctor( ) - { - boost::ignore_unused( currentStep ); - } - - /** create a functor which can be used on the accelerator - * - * @tparam T_OffsetType type to describe the size of a domain - * @tparam T_numWorkers number of workers - * @tparam T_Acc alpaka accelerator type - * - * @param alpaka accelerator - * @param domainOffset offset to the origin of the local domain - * This can be e.g a supercell or cell offset and depends - * of the context where the interface is specialized. - * @param workerCfg configuration of the worker - * @return an instance of the user functor wrapped by the accelerator - * functor interface - */ - template< - typename T_OffsetType, - uint32_t T_numWorkers, - typename T_Acc - > - HDINLINE auto - operator( )( - T_Acc const & acc, - T_OffsetType const & domainOffset, - mappings::threads::WorkerCfg< T_numWorkers > const & workerCfg - ) const - -> acc::Interface< - decltype( - alpaka::core::declval< UserFunctor >( )( - acc, - domainOffset, - workerCfg - ) - ), - T_numArguments, - T_ReturnType - > - { - return ( *static_cast< UserFunctor const * >( this ) )( - acc, - domainOffset, - workerCfg - ); - } - - /** get name of the user functor - * - * @return name to identify the functor - */ - static - HINLINE std::string - getName( ) - { - return UserFunctor::getName( ); - } - }; + //! type of the user functor + using UserFunctor = T_UserFunctor; + + /** constructor + * + * This constructor is only compiled if the user functor has + * a host side constructor with one (uint32_t) argument. + * + * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable + * the constructor + * @param currentStep current simulation time step + * @param is used to enable/disable the constructor (do not pass any value to this parameter) + */ + template + HINLINE Interface( + uint32_t const currentStep, + typename std::enable_if::value>::type* = 0) + : UserFunctor(currentStep) + { + } + + /** constructor + * + * This constructor is only compiled if the user functor has a default constructor. + * + * @tparam DeferFunctor is used to defer the functor type evaluation to enable/disable + * the constructor + * @param currentStep simulation time step + * @param is used to enable/disable the constructor (do not pass any value to this parameter) + */ + template + HINLINE Interface( + uint32_t const currentStep, + typename std::enable_if::value>::type* = 0) + : UserFunctor() + { + boost::ignore_unused(currentStep); + } + + /** create a functor which can be used on the accelerator + * + * @tparam T_OffsetType type to describe the size of a domain + * @tparam T_numWorkers number of workers + * @tparam T_Acc alpaka accelerator type + * + * @param alpaka accelerator + * @param domainOffset offset to the origin of the local domain + * This can be e.g a supercell or cell offset and depends + * of the context where the interface is specialized. + * @param workerCfg configuration of the worker + * @return an instance of the user functor wrapped by the accelerator + * functor interface + */ + template + HDINLINE auto operator()( + T_Acc const& acc, + T_OffsetType const& domainOffset, + mappings::threads::WorkerCfg const& workerCfg) const + -> acc::Interface< + decltype(alpaka::core::declval()(acc, domainOffset, workerCfg)), + T_numArguments, + T_ReturnType> + { + return (*static_cast(this))(acc, domainOffset, workerCfg); + } + + /** get name of the user functor + * + * @return name to identify the functor + */ + static HINLINE std::string getName() + { + return UserFunctor::getName(); + } + }; -} // namespace functor + } // namespace functor } // namespace pmacc diff --git a/include/pmacc/identifier/alias.hpp b/include/pmacc/identifier/alias.hpp index 7d430262b0..299b8a64c3 100644 --- a/include/pmacc/identifier/alias.hpp +++ b/include/pmacc/identifier/alias.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Benjamin Worpitz, +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -32,47 +32,9 @@ namespace pmacc { -identifier(pmacc_void); -identifier(pmacc_isAlias); -} //namespace pmacc - -#ifdef __CUDACC__ -# define PMACC_alias_CUDA(name,id) \ - namespace PMACC_JOIN(device_placeholder,id){ \ - /* This variable exists only for template parameter deduction, its - * value is never used. So in this case it is fine to have a - * separate version in each translation unit due to static. - */ \ - static __constant__ PMACC_JOIN(placeholder_definition,id)::name<> \ - PMACC_JOIN(name,_); \ - } -#else -# define PMACC_alias_CUDA(name,id) -#endif - -/*define special makros for creating classes which are only used as identifer*/ -#define PMACC_alias(name,id) \ - namespace PMACC_JOIN(placeholder_definition,id) { \ - template \ - struct name \ - { \ - static std::string getName() \ - { \ - return std::string(#name); \ - } \ - }; \ - } \ - using namespace PMACC_JOIN(placeholder_definition,id); \ - namespace PMACC_JOIN(host_placeholder,id){ \ - /* This variable exists only for template parameter deduction, its value - * is never used. So in this case it is fine to have a separate version - * in each translation unit due to static. - */ \ - static PMACC_JOIN(placeholder_definition,id)::name<> \ - PMACC_JOIN(name,_); \ - } \ - PMACC_alias_CUDA(name,id); \ - PMACC_PLACEHOLDER(id); + identifier(pmacc_void, ); + identifier(pmacc_isAlias, ); +} // namespace pmacc /** create an alias @@ -90,23 +52,30 @@ identifier(pmacc_isAlias); * get type which is represented by the alias * typedef typename traits::Resolve::type resolved_type; */ -#define alias(name) PMACC_alias(name,__COUNTER__) +#define alias(name) \ + template \ + struct name \ + { \ + static std::string getName() \ + { \ + return std::string(#name); \ + } \ + }; \ + constexpr name<> PMACC_JOIN(name, _) namespace pmacc { -namespace traits -{ - -template class T_Object, typename T_AnyType> -struct Resolve > -{ - /*solve recursive if alias is nested*/ - typedef typename bmpl::if_< - boost::is_same::type >, - T_AnyType, - typename Resolve::type - >::type type; -}; + namespace traits + { + template class T_Object, typename T_AnyType> + struct Resolve> + { + /*solve recursive if alias is nested*/ + typedef typename bmpl::if_< + boost::is_same::type>, + T_AnyType, + typename Resolve::type>::type type; + }; -} //namespace traits -} //namespace pmacc + } // namespace traits +} // namespace pmacc diff --git a/include/pmacc/identifier/identifier.hpp b/include/pmacc/identifier/identifier.hpp index 76e9eaeed8..a35adb12a4 100644 --- a/include/pmacc/identifier/identifier.hpp +++ b/include/pmacc/identifier/identifier.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Alexander Grund +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Alexander Grund * * This file is part of PMacc. * @@ -24,46 +24,6 @@ #include "pmacc/types.hpp" #include "pmacc/ppFunctions.hpp" -/* No namespace is needed because we only have defines*/ - -#ifdef __CUDA_ARCH__ //we are on gpu -# define PMACC_PLACEHOLDER(id) using namespace PMACC_JOIN(device_placeholder,id) -#else -# define PMACC_PLACEHOLDER(id) using namespace PMACC_JOIN(host_placeholder,id) -#endif - -#ifdef __CUDACC__ -# define PMACC_identifier_CUDA(name,id) \ - namespace PMACC_JOIN(device_placeholder,id){ \ - /* This variable exists only for template parameter deduction, its value - * is never used. So in this case it is fine to have a separate version - * in each translation unit due to static. - */ \ - static __constant__ PMACC_JOIN(placeholder_definition,id)::name PMACC_JOIN(name,_); \ - } -#else -# define PMACC_identifier_CUDA(name,id) -#endif - -/*define special macros for creating classes which are only used as identifier*/ -#define PMACC_identifier(name,id,...) \ - namespace PMACC_JOIN(placeholder_definition,id) { \ - struct name{ \ - __VA_ARGS__ \ - }; \ - } \ - using namespace PMACC_JOIN(placeholder_definition,id); \ - namespace PMACC_JOIN(host_placeholder,id){ \ - /* This variable exists only for template parameter deduction, its value - * is never used. So in this case it is fine to have a separate version - * in each translation unit due to static. - */ \ - static PMACC_JOIN(placeholder_definition,id)::name PMACC_JOIN(name,_); \ - } \ - PMACC_identifier_CUDA(name,id); \ - PMACC_PLACEHOLDER(id); - - /** create an identifier (identifier with arbitrary code as second parameter * !! second parameter is optional and can be any C++ code one can add inside a class * @@ -74,4 +34,9 @@ * to create an instance of this identifier you can use: * varname(); or varname_ */ -#define identifier(name,...) PMACC_identifier(name,__COUNTER__,__VA_ARGS__) +#define identifier(name, ...) \ + struct name \ + { \ + __VA_ARGS__ \ + }; \ + constexpr name PMACC_JOIN(name, _) diff --git a/include/pmacc/identifier/named_type.hpp b/include/pmacc/identifier/named_type.hpp index 4b3401c9f6..4abd70855c 100644 --- a/include/pmacc/identifier/named_type.hpp +++ b/include/pmacc/identifier/named_type.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -43,12 +43,6 @@ * length(); or length_ * */ -#define named_type(in_type,name,...) \ - identifier(name, \ - typedef in_type type; \ - static std::string getName() \ - { \ - return std::string(#name); \ - } \ - __VA_ARGS__ \ - ) +#define named_type(in_type, name, ...) \ + identifier( \ + name, typedef in_type type; static std::string getName() { return std::string(#name); } __VA_ARGS__) diff --git a/include/pmacc/identifier/value_identifier.hpp b/include/pmacc/identifier/value_identifier.hpp index 5a80fed5e0..234b5e5951 100644 --- a/include/pmacc/identifier/value_identifier.hpp +++ b/include/pmacc/identifier/value_identifier.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -45,15 +45,8 @@ * to create a instance of this value_identifier you can use: * `length()` or `length_` */ -#define value_identifier(in_type,name,in_default) \ - identifier(name, \ - typedef in_type type; \ - static HDINLINE type getValue() \ - { \ - return in_default; \ - } \ - static std::string getName() \ - { \ - return std::string(#name); \ - } \ - ) +#define value_identifier(in_type, name, in_default) \ + identifier( \ + name, typedef in_type type; static HDINLINE type getValue() { \ + return in_default; \ + } static std::string getName() { return std::string(#name); }) diff --git a/include/pmacc/mappings/kernel/AreaMapping.hpp b/include/pmacc/mappings/kernel/AreaMapping.hpp index 7136a3b336..612c8b1d47 100644 --- a/include/pmacc/mappings/kernel/AreaMapping.hpp +++ b/include/pmacc/mappings/kernel/AreaMapping.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,24 +28,19 @@ namespace pmacc { - template class AreaMapping; - template< - uint32_t areaType, - template class baseClass, - unsigned DIM, - class SuperCellSize_ - > - class AreaMapping > : public baseClass + template class baseClass, unsigned DIM, class SuperCellSize_> + class AreaMapping> : public baseClass { public: typedef baseClass BaseClass; enum { - AreaType = areaType, Dim = BaseClass::Dim + AreaType = areaType, + Dim = BaseClass::Dim }; @@ -62,8 +57,7 @@ namespace pmacc */ HINLINE DataSpace getGridDim() const { - return AreaMappingMethods::getGridDim(*this, - this->getGridSuperCells()); + return AreaMappingMethods::getGridDim(*this, this->getGridSuperCells()); } /** @@ -74,11 +68,11 @@ namespace pmacc */ HDINLINE DataSpace getSuperCellIndex(const DataSpace& realSuperCellIdx) const { - return AreaMappingMethods::getBlockIndex(*this, - this->getGridSuperCells(), - realSuperCellIdx); + return AreaMappingMethods::getBlockIndex( + *this, + this->getGridSuperCells(), + realSuperCellIdx); } - }; } // namespace pmacc diff --git a/include/pmacc/mappings/kernel/AreaMappingMethods.hpp b/include/pmacc/mappings/kernel/AreaMappingMethods.hpp index 802cbf820a..d26f8314eb 100644 --- a/include/pmacc/mappings/kernel/AreaMappingMethods.hpp +++ b/include/pmacc/mappings/kernel/AreaMappingMethods.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -26,7 +26,6 @@ namespace pmacc { - /** * Helper class for AreaMapping. * Provides methods called by AreaMapping using template specialization. @@ -37,70 +36,70 @@ namespace pmacc template class AreaMappingMethods; - //CORE + BORDER + GUARD + // CORE + BORDER + GUARD template class AreaMappingMethods { public: - template - HINLINE static DataSpace getGridDim(const Base&, const DataSpace &gBlocks) + HINLINE static DataSpace getGridDim(const Base&, const DataSpace& gBlocks) { return gBlocks; } template - HDINLINE static DataSpace getBlockIndex(const Base&, - const DataSpace&, - const DataSpace& _blockIdx) + HDINLINE static DataSpace getBlockIndex( + const Base&, + const DataSpace&, + const DataSpace& _blockIdx) { return _blockIdx; } }; - //CORE + // CORE template class AreaMappingMethods { public: - template - HINLINE static DataSpace getGridDim(const Base &base, const DataSpace &gBlocks) + HINLINE static DataSpace getGridDim(const Base& base, const DataSpace& gBlocks) { // skip 2 x (border + guard) == 4 x guard return gBlocks - 4 * base.getGuardingSuperCells(); } template - HDINLINE static DataSpace getBlockIndex(const Base &base, - const DataSpace &gBlocks, - const DataSpace& _blockIdx) + HDINLINE static DataSpace getBlockIndex( + const Base& base, + const DataSpace& gBlocks, + const DataSpace& _blockIdx) { // skip guard + border == 2 x guard return _blockIdx + 2 * base.getGuardingSuperCells(); } }; - //CORE+BORDER + // CORE+BORDER template class AreaMappingMethods { public: - template - HINLINE static DataSpace getGridDim(const Base &base, const DataSpace &gBlocks) + HINLINE static DataSpace getGridDim(const Base& base, const DataSpace& gBlocks) { // remove guard + border == 2 x guard return gBlocks - 2 * base.getGuardingSuperCells(); } template - HDINLINE static DataSpace getBlockIndex(const Base &base, - const DataSpace &gBlocks, - const DataSpace& _blockIdx) + HDINLINE static DataSpace getBlockIndex( + const Base& base, + const DataSpace& gBlocks, + const DataSpace& _blockIdx) { // skip guarding supercells return _blockIdx + base.getGuardingSuperCells(); @@ -108,20 +107,16 @@ namespace pmacc }; - //dim 2D + // dim 2D - //GUARD + // GUARD template<> class AreaMappingMethods { public: - template - HINLINE static DataSpace< DIM2 > getGridDim( - const Base &base, - const DataSpace< DIM2 >& gBlocks - ) + HINLINE static DataSpace getGridDim(const Base& base, const DataSpace& gBlocks) { const int x = gBlocks.x(); const int y_ = gBlocks.y() - 2 * base.getGuardingSuperCells().y(); @@ -129,18 +124,14 @@ namespace pmacc const int xArea = x * base.getGuardingSuperCells().y(); const int y_Area = y_ * base.getGuardingSuperCells().x(); - return DataSpace< DIM2 >( - xArea + y_Area, - 2 - ); + return DataSpace(xArea + y_Area, 2); } template - HDINLINE static DataSpace< DIM2 > getBlockIndex( - const Base &base, - const DataSpace< DIM2 >& gBlocks, - const DataSpace< DIM2 >& _blockIdx - ) + HDINLINE static DataSpace getBlockIndex( + const Base& base, + const DataSpace& gBlocks, + const DataSpace& _blockIdx) { const int x = gBlocks.x(); @@ -149,63 +140,51 @@ namespace pmacc if(_blockIdx.x() < xArea) { const int tmp_x = _blockIdx.x(); - return DataSpace< DIM2 >( + return DataSpace( tmp_x % x, tmp_x / x + - // if _blockIdx.y() == 1 means bottom plane - _blockIdx.y() * (gBlocks.y() - base.getGuardingSuperCells().y()) - ); + // if _blockIdx.y() == 1 means bottom plane + _blockIdx.y() * (gBlocks.y() - base.getGuardingSuperCells().y())); } else { const int tmp_x = _blockIdx.x() - xArea; - return DataSpace< DIM2 >( + return DataSpace( tmp_x % base.getGuardingSuperCells().x() + - // if _blockIdx.y() == 1 means right plane - _blockIdx.y() * (gBlocks.x() - base.getGuardingSuperCells().x()), - tmp_x / base.getGuardingSuperCells().x() + base.getGuardingSuperCells().y() - ); + // if _blockIdx.y() == 1 means right plane + _blockIdx.y() * (gBlocks.x() - base.getGuardingSuperCells().x()), + tmp_x / base.getGuardingSuperCells().x() + base.getGuardingSuperCells().y()); } } }; - //BORDER + // BORDER template<> class AreaMappingMethods { public: - template - HINLINE static DataSpace< DIM2 > getGridDim( - const Base& base, - const DataSpace< DIM2 >& gBlocks - ) + HINLINE static DataSpace getGridDim(const Base& base, const DataSpace& gBlocks) { // removes the guard, than BORDER is the new GUARD and we can reuse the GUARD mapper - const DataSpace< DIM2 > sizeWithoutGuard(gBlocks - 2 * base.getGuardingSuperCells()); + const DataSpace sizeWithoutGuard(gBlocks - 2 * base.getGuardingSuperCells()); return AreaMappingMethods{}.getGridDim(base, sizeWithoutGuard); } template - HDINLINE static DataSpace< DIM2 > getBlockIndex( + HDINLINE static DataSpace getBlockIndex( const Base& base, - const DataSpace< DIM2 >& gBlocks, - const DataSpace< DIM2 >& _blockIdx - ) + const DataSpace& gBlocks, + const DataSpace& _blockIdx) { // removes the guard, than BORDER is the new GUARD and we can reuse the GUARD mapper - const DataSpace< DIM2 > sizeWithoutGuard(gBlocks - 2 * base.getGuardingSuperCells()); + const DataSpace sizeWithoutGuard(gBlocks - 2 * base.getGuardingSuperCells()); // use result of the shrinked domain and skip guarding supercells - return - AreaMappingMethods{}.getBlockIndex( - base, - sizeWithoutGuard, - _blockIdx - ) + - base.getGuardingSuperCells(); + return AreaMappingMethods{}.getBlockIndex(base, sizeWithoutGuard, _blockIdx) + + base.getGuardingSuperCells(); } }; @@ -213,9 +192,8 @@ namespace pmacc class AreaMappingMethods { public: - template - HINLINE static DataSpace< DIM3 > getGridDim(const Base &base, const DataSpace< DIM3 > &gBlocks) + HINLINE static DataSpace getGridDim(const Base& base, const DataSpace& gBlocks) { const int x = gBlocks.x(); const int x_ = gBlocks.x() - 2 * base.getGuardingSuperCells().x(); @@ -226,19 +204,14 @@ namespace pmacc const int z_yVolume = z_ * y * base.getGuardingSuperCells().x(); const int z_x_Volume = z_ * x_ * base.getGuardingSuperCells().y(); - return DataSpace< DIM3 >( - xyVolume + z_x_Volume + z_yVolume, - 2, - 1 - ); + return DataSpace(xyVolume + z_x_Volume + z_yVolume, 2, 1); } template - HDINLINE static DataSpace< DIM3 > getBlockIndex( - const Base &base, - const DataSpace< DIM3 >& gBlocks, - const DataSpace< DIM3 >& _blockIdx - ) + HDINLINE static DataSpace getBlockIndex( + const Base& base, + const DataSpace& gBlocks, + const DataSpace& _blockIdx) { const int x = gBlocks.x(); const int x_ = gBlocks.x() - 2 * base.getGuardingSuperCells().x(); @@ -258,9 +231,8 @@ namespace pmacc tmp_x % x, tmp_x / x % y, tmp_x / xyPlane + - // if _blockIdx.y() == 1 means back plane - _blockIdx.y() * (gBlocks.z() - base.getGuardingSuperCells().z()) - ); + // if _blockIdx.y() == 1 means back plane + _blockIdx.y() * (gBlocks.z() - base.getGuardingSuperCells().z())); } else if(_blockIdx.x() >= xyVolume && _blockIdx.x() < xyVolume + z_yVolume) { @@ -268,13 +240,12 @@ namespace pmacc const int z_yPlane = z_ * y; const int tmp_x = _blockIdx.x() - xyVolume; - return DataSpace< DIM3 >( + return DataSpace( tmp_x / z_yPlane + - // if _blockIdx.y() == 1 means right plane - _blockIdx.y() * (gBlocks.x() - base.getGuardingSuperCells().x()), + // if _blockIdx.y() == 1 means right plane + _blockIdx.y() * (gBlocks.x() - base.getGuardingSuperCells().x()), tmp_x % y, - tmp_x / y % z_ + base.getGuardingSuperCells().z() - ); + tmp_x / y % z_ + base.getGuardingSuperCells().z()); } else { @@ -284,10 +255,9 @@ namespace pmacc return DataSpace( (tmp_x % x_) + base.getGuardingSuperCells().x(), tmp_x / x_z_Plane + - // if _blockIdx.y() == 1 means bottom plane - _blockIdx.y() * (gBlocks.y() - base.getGuardingSuperCells().y()), - tmp_x / x_ % z_ + base.getGuardingSuperCells().z() - ); + // if _blockIdx.y() == 1 means bottom plane + _blockIdx.y() * (gBlocks.y() - base.getGuardingSuperCells().y()), + tmp_x / x_ % z_ + base.getGuardingSuperCells().z()); } } }; @@ -296,38 +266,28 @@ namespace pmacc class AreaMappingMethods { public: - template - HINLINE static DataSpace< DIM3 > getGridDim( - const Base &base, - const DataSpace< DIM3 > &gBlocks - ) + HINLINE static DataSpace getGridDim(const Base& base, const DataSpace& gBlocks) { // removes the guard, than BORDER is the new GUARD and we can reuse the GUARD mapper - const DataSpace< DIM3 > sizeWithoutGuard(gBlocks - 2 * base.getGuardingSuperCells()); + const DataSpace sizeWithoutGuard(gBlocks - 2 * base.getGuardingSuperCells()); return AreaMappingMethods{}.getGridDim(base, sizeWithoutGuard); } template - HDINLINE static DataSpace< DIM3 > getBlockIndex( - const Base &base, - const DataSpace< DIM3 >& gBlocks, - const DataSpace< DIM3 >& _blockIdx - ) + HDINLINE static DataSpace getBlockIndex( + const Base& base, + const DataSpace& gBlocks, + const DataSpace& _blockIdx) { // removes the guard, than BORDER is the new GUARD and we can reuse the GUARD mapper const DataSpace sizeWithoutGuard(gBlocks - 2 * base.getGuardingSuperCells()); // use result of the shrinked domain and skip guarding supercells - return - AreaMappingMethods{}.getBlockIndex( - base, - sizeWithoutGuard, - _blockIdx - ) + - base.getGuardingSuperCells(); + return AreaMappingMethods{}.getBlockIndex(base, sizeWithoutGuard, _blockIdx) + + base.getGuardingSuperCells(); } }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/mappings/kernel/BorderMapping.hpp b/include/pmacc/mappings/kernel/BorderMapping.hpp index 76fb498914..7426fbed45 100644 --- a/include/pmacc/mappings/kernel/BorderMapping.hpp +++ b/include/pmacc/mappings/kernel/BorderMapping.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Alexander Grund +/* Copyright 2013-2021 Alexander Grund * * This file is part of PMacc. * @@ -29,7 +29,6 @@ namespace pmacc { - /** * This maps onto the border to 1 exchange direction (e.g. TOP, BOTTOM, TOP + LEFT, ...) * Choosing multiple directions defines an intersection [1] in mathematical set theory. @@ -47,12 +46,8 @@ namespace pmacc template class BorderMapping; - template< - template class T_BaseClass, - unsigned T_dim, - class T_SuperCellSize - > - class BorderMapping >: public T_BaseClass + template class T_BaseClass, unsigned T_dim, class T_SuperCellSize> + class BorderMapping> : public T_BaseClass { public: typedef T_BaseClass BaseClass; @@ -72,7 +67,9 @@ namespace pmacc * @param base object of base class baseClass (see template parameters) * @param direction exchange direction to map to */ - HINLINE BorderMapping(const BaseClass& base, pmacc::ExchangeType direction): BaseClass(base), m_direction(direction) + HINLINE BorderMapping(const BaseClass& base, pmacc::ExchangeType direction) + : BaseClass(base) + , m_direction(direction) { PMACC_ASSERT(direction != 0); } @@ -80,8 +77,7 @@ namespace pmacc /** * Returns the exchange direction used by this mapper */ - HDINLINE pmacc::ExchangeType - getDirection() const + HDINLINE pmacc::ExchangeType getDirection() const { return m_direction; } @@ -99,7 +95,7 @@ namespace pmacc for(int i = 0; i < Dim; i++) { - if (directions[i] != 0) + if(directions[i] != 0) result[i] = this->getGuardingSuperCells()[i]; } @@ -120,7 +116,7 @@ namespace pmacc for(int i = 0; i < Dim; i++) { - if (directions[i] == 1) + if(directions[i] == 1) result[i] += this->getGridSuperCells()[i] - 2 * this->getGuardingSuperCells()[i]; else result[i] += this->getGuardingSuperCells()[i]; @@ -128,6 +124,7 @@ namespace pmacc return result; } + private: PMACC_ALIGN(m_direction, const pmacc::ExchangeType); }; diff --git a/include/pmacc/mappings/kernel/ExchangeMapping.hpp b/include/pmacc/mappings/kernel/ExchangeMapping.hpp index 37dd8d5ce7..2bc5ea1d55 100644 --- a/include/pmacc/mappings/kernel/ExchangeMapping.hpp +++ b/include/pmacc/mappings/kernel/ExchangeMapping.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,7 +28,6 @@ namespace pmacc { - template class ExchangeMapping; @@ -39,16 +38,12 @@ namespace pmacc * @tparam areaType are to map to * @tparam baseClass base class for mapping, should be MappingDescription */ - template< - uint32_t areaType, - template class baseClass, - unsigned DIM, - class SuperCellSize_ - > - class ExchangeMapping > : public baseClass + template class baseClass, unsigned DIM, class SuperCellSize_> + class ExchangeMapping> : public baseClass { private: uint32_t exchangeType; + public: typedef baseClass BaseClass; @@ -66,9 +61,7 @@ namespace pmacc * @param base object of base class baseClass (see template parameters) * @param exchangeType exchange type for mapping */ - HINLINE ExchangeMapping(BaseClass base, uint32_t exchangeType) : - BaseClass(base), - exchangeType(exchangeType) + HINLINE ExchangeMapping(BaseClass base, uint32_t exchangeType) : BaseClass(base), exchangeType(exchangeType) { } @@ -98,12 +91,8 @@ namespace pmacc */ HDINLINE DataSpace getSuperCellIndex(const DataSpace& realSuperCellIdx) const { - return ExchangeMappingMethods::getBlockIndex( - *this, - realSuperCellIdx, - exchangeType); + return ExchangeMappingMethods::getBlockIndex(*this, realSuperCellIdx, exchangeType); } - }; } // namespace pmacc diff --git a/include/pmacc/mappings/kernel/ExchangeMappingMethods.hpp b/include/pmacc/mappings/kernel/ExchangeMappingMethods.hpp index f9186bff46..360343b788 100644 --- a/include/pmacc/mappings/kernel/ExchangeMappingMethods.hpp +++ b/include/pmacc/mappings/kernel/ExchangeMappingMethods.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,7 +28,6 @@ namespace pmacc { - /** * Helper class for ExchangeMapping. * Provides methods called by ExchangeMapping using template specialization. @@ -40,16 +39,17 @@ namespace pmacc class ExchangeMappingMethods { public: - template - HINLINE static DataSpace getGridDim(const Base &base, uint32_t exchangeType) + HINLINE static DataSpace getGridDim(const Base& base, uint32_t exchangeType) { return base.getGridSuperCells(); } template - HDINLINE static DataSpace getBlockIndex(const Base &base, - const DataSpace& _blockIdx, uint32_t exchangeType) + HDINLINE static DataSpace getBlockIndex( + const Base& base, + const DataSpace& _blockIdx, + uint32_t exchangeType) { return _blockIdx; } @@ -61,40 +61,40 @@ namespace pmacc class ExchangeMappingMethods { public: - template - HINLINE static DataSpace< DIM > getGridDim(const Base &base, uint32_t exchangeType) + HINLINE static DataSpace getGridDim(const Base& base, uint32_t exchangeType) { - const DataSpace< DIM > guardingSupercells = base.getGuardingSuperCells(); - DataSpace< DIM > result(base.getGridSuperCells() - 2 * guardingSupercells); + const DataSpace guardingSupercells = base.getGuardingSuperCells(); + DataSpace result(base.getGridSuperCells() - 2 * guardingSupercells); - const DataSpace< DIM > directions = Mask::getRelativeDirections< DIM > (exchangeType); + const DataSpace directions = Mask::getRelativeDirections(exchangeType); - for( uint32_t d = 0; d < DIM; ++d ) + for(uint32_t d = 0; d < DIM; ++d) { - if (directions[ d ] != 0) - result[ d ] = guardingSupercells[ d ]; + if(directions[d] != 0) + result[d] = guardingSupercells[d]; } return result; } template - HDINLINE static DataSpace< DIM > getBlockIndex(const Base &base, - const DataSpace< DIM >& _blockIdx, uint32_t exchangeType) + HDINLINE static DataSpace getBlockIndex( + const Base& base, + const DataSpace& _blockIdx, + uint32_t exchangeType) { - DataSpace< DIM > result(_blockIdx); + DataSpace result(_blockIdx); - const DataSpace< DIM > directions = Mask::getRelativeDirections< DIM > (exchangeType); - const DataSpace< DIM > guardingSupercells = base.getGuardingSuperCells(); + const DataSpace directions = Mask::getRelativeDirections(exchangeType); + const DataSpace guardingSupercells = base.getGuardingSuperCells(); - for( uint32_t d = 0; d < DIM; ++d ) + for(uint32_t d = 0; d < DIM; ++d) { - if (directions[ d ] == 0) - result[ d ] += guardingSupercells[ d ]; - else - if (directions[ d ] == 1) - result[ d ] += base.getGridSuperCells()[ d ] - guardingSupercells[ d ]; + if(directions[d] == 0) + result[d] += guardingSupercells[d]; + else if(directions[d] == 1) + result[d] += base.getGridSuperCells()[d] - guardingSupercells[d]; } return result; @@ -104,51 +104,52 @@ namespace pmacc // areaType == BORDER - template< unsigned DIM > + template class ExchangeMappingMethods { public: - template - HINLINE static DataSpace< DIM > getGridDim(const Base &base, uint32_t exchangeType) + HINLINE static DataSpace getGridDim(const Base& base, uint32_t exchangeType) { // skip 2 x (border + guard) == 4 x guard - DataSpace< DIM > result(base.getGridSuperCells() - 4 * base.getGuardingSuperCells()); + DataSpace result(base.getGridSuperCells() - 4 * base.getGuardingSuperCells()); - DataSpace< DIM > directions = Mask::getRelativeDirections< DIM > (exchangeType); + DataSpace directions = Mask::getRelativeDirections(exchangeType); - for( uint32_t d = 0; d < DIM; ++d ) + for(uint32_t d = 0; d < DIM; ++d) { - if (directions[ d ] != 0) - result[ d ] = base.getGuardingSuperCells()[ d ]; + if(directions[d] != 0) + result[d] = base.getGuardingSuperCells()[d]; } return result; } template - HDINLINE static DataSpace< DIM > getBlockIndex(const Base &base, - const DataSpace< DIM >& _blockIdx, uint32_t exchangeType) + HDINLINE static DataSpace getBlockIndex( + const Base& base, + const DataSpace& _blockIdx, + uint32_t exchangeType) { - DataSpace< DIM > result(_blockIdx); + DataSpace result(_blockIdx); - DataSpace< DIM > directions = Mask::getRelativeDirections< DIM > (exchangeType); + DataSpace directions = Mask::getRelativeDirections(exchangeType); - DataSpace< DIM > guardingBlocks = base.getGuardingSuperCells(); + DataSpace guardingBlocks = base.getGuardingSuperCells(); - for( uint32_t d = 0; d < DIM; ++d ) + for(uint32_t d = 0; d < DIM; ++d) { - switch (directions[ d ]) + switch(directions[d]) { - case 0: - result[ d ] += 2 * guardingBlocks[ d ]; - break; - case -1: - result[ d ] += guardingBlocks[ d ]; - break; - case 1: - result[ d ] += base.getGridSuperCells()[ d ] - 2 * guardingBlocks[ d ]; - break; + case 0: + result[d] += 2 * guardingBlocks[d]; + break; + case -1: + result[d] += guardingBlocks[d]; + break; + case 1: + result[d] += base.getGridSuperCells()[d] - 2 * guardingBlocks[d]; + break; } } @@ -156,4 +157,4 @@ namespace pmacc } }; -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/mappings/kernel/MappingDescription.hpp b/include/pmacc/mappings/kernel/MappingDescription.hpp index a490ef8030..bb58e5af25 100644 --- a/include/pmacc/mappings/kernel/MappingDescription.hpp +++ b/include/pmacc/mappings/kernel/MappingDescription.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -31,105 +31,102 @@ namespace pmacc { + /** + * Abstracts logical block information from block variables. + * + * @tparam DIM dimension for grid/blocks + * @tparam SuperCellSize mapper class for logical grid information + */ -/** - * Abstracts logical block information from block variables. - * - * @tparam DIM dimension for grid/blocks - * @tparam SuperCellSize mapper class for logical grid information - */ - -template -class MappingDescription -{ -public: - - enum + template + class MappingDescription { - Dim = DIM - }; + public: + enum + { + Dim = DIM + }; - typedef SuperCellSize_ SuperCellSize; + typedef SuperCellSize_ SuperCellSize; - /** constructor - * - * @param localGridCells number of cells in the local value (including guarding cells) - * @param guardingSuperCells number of **supercells** within the guard - */ - MappingDescription( - DataSpace localGridCells, - DataSpace guardingSuperCells = DataSpace::create(0) - ) : - gridSuperCells(localGridCells / SuperCellSize::toRT()), /*block count per dimension*/ - guardingSuperCells(guardingSuperCells) - { - /* each dimension needs at least one supercell for the core and 2 * guardingSuperCells - * (one supercell for the border and one for the guard) or it has no guarding and border - * and contains only a core (this is allowed for local arrays which can not sync the - * outer supercells with there neighbor MPI ranks. + /** constructor + * + * @param localGridCells number of cells in the local value (including guarding cells) + * @param guardingSuperCells number of **supercells** within the guard */ - for( uint32_t d = 0; d < DIM; ++d ) + MappingDescription( + DataSpace localGridCells, + DataSpace guardingSuperCells = DataSpace::create(0)) + : gridSuperCells(localGridCells / SuperCellSize::toRT()) + , /*block count per dimension*/ + guardingSuperCells(guardingSuperCells) { - /*minimal 3 blocks are needed if we have guarding blocks*/ - int minBlock = std::min(gridSuperCells.x(), gridSuperCells.y()); - if (DIM == DIM3) + /* each dimension needs at least one supercell for the core and 2 * guardingSuperCells + * (one supercell for the border and one for the guard) or it has no guarding and border + * and contains only a core (this is allowed for local arrays which can not sync the + * outer supercells with there neighbor MPI ranks. + */ + for(uint32_t d = 0; d < DIM; ++d) { - minBlock = std::min(minBlock, gridSuperCells[2]); + /*minimal 3 blocks are needed if we have guarding blocks*/ + int minBlock = std::min(gridSuperCells.x(), gridSuperCells.y()); + if(DIM == DIM3) + { + minBlock = std::min(minBlock, gridSuperCells[2]); + } + PMACC_VERIFY( + (guardingSuperCells[d] == 0 && gridSuperCells[d] >= 1) + || gridSuperCells[d] >= 2 * guardingSuperCells[d] + 1); } - PMACC_VERIFY( - ( guardingSuperCells[ d ] == 0 && gridSuperCells[ d ] >= 1) || - gridSuperCells[ d ] >= 2 * guardingSuperCells[ d ] + 1 - ); } - } - HDINLINE DataSpace getGridSuperCells() const - { - return this->gridSuperCells; - } - - HDINLINE DataSpace getGuardingSuperCells() const - { - return guardingSuperCells; - } - - HDINLINE void setGridSuperCells(DataSpace superCellsCount) - { - gridSuperCells = superCellsCount; - } + HDINLINE DataSpace getGridSuperCells() const + { + return this->gridSuperCells; + } - /*! get the Coordinate of the root supercell in the hole simulation area - * * root supercell in 2D LEFT+TOP | in 3D LEFT+TOP+FRONT - * @param globaOffset cells - * @return global index of the root supercell - */ - HINLINE DataSpace getRootSuperCellCoordinate(const DataSpace globalOffset) const - { - return globalOffset/SuperCellSize::toRT(); - } + HDINLINE DataSpace getGuardingSuperCells() const + { + return guardingSuperCells; + } - HDINLINE DataSpace getSuperCellSize() const - { - return SuperCellSize::toRT(); - } + HDINLINE void setGridSuperCells(DataSpace superCellsCount) + { + gridSuperCells = superCellsCount; + } - HDINLINE GridLayout getGridLayout() const - { - return GridLayout (SuperCellSize::toRT()*(gridSuperCells - 2 * guardingSuperCells), SuperCellSize::toRT() * guardingSuperCells); - } + /*! get the Coordinate of the root supercell in the hole simulation area + * * root supercell in 2D LEFT+TOP | in 3D LEFT+TOP+FRONT + * @param globaOffset cells + * @return global index of the root supercell + */ + HINLINE DataSpace getRootSuperCellCoordinate(const DataSpace globalOffset) const + { + return globalOffset / SuperCellSize::toRT(); + } - HINLINE DataSpace getGlobalSuperCells() const - { - return Environment::get().GridController().getGpuNodes() * (gridSuperCells - 2 * guardingSuperCells); - } + HDINLINE DataSpace getSuperCellSize() const + { + return SuperCellSize::toRT(); + } + HDINLINE GridLayout getGridLayout() const + { + return GridLayout( + SuperCellSize::toRT() * (gridSuperCells - 2 * guardingSuperCells), + SuperCellSize::toRT() * guardingSuperCells); + } -protected: + HINLINE DataSpace getGlobalSuperCells() const + { + return Environment::get().GridController().getGpuNodes() * (gridSuperCells - 2 * guardingSuperCells); + } - //\todo: keine Eigenschaft einer Zelle - PMACC_ALIGN(gridSuperCells, DataSpace); - PMACC_ALIGN(guardingSuperCells, DataSpace); -}; + protected: + //\todo: keine Eigenschaft einer Zelle + PMACC_ALIGN(gridSuperCells, DataSpace); + PMACC_ALIGN(guardingSuperCells, DataSpace); + }; } // namespace pmacc diff --git a/include/pmacc/mappings/kernel/StrideMapping.hpp b/include/pmacc/mappings/kernel/StrideMapping.hpp index 38b3e52d00..6b407ea515 100644 --- a/include/pmacc/mappings/kernel/StrideMapping.hpp +++ b/include/pmacc/mappings/kernel/StrideMapping.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -29,82 +29,82 @@ namespace pmacc { - -template -class StrideMapping; - -template< -uint32_t areaType, -uint32_t stride, -template class baseClass, -unsigned DIM, -class SuperCellSize_ -> -class StrideMapping > : public baseClass -{ -public: - typedef baseClass BaseClass; - - enum + template + class StrideMapping; + + template< + uint32_t areaType, + uint32_t stride, + template + class baseClass, + unsigned DIM, + class SuperCellSize_> + class StrideMapping> : public baseClass { - AreaType = areaType, Dim = BaseClass::Dim, Stride = stride + public: + typedef baseClass BaseClass; + + enum + { + AreaType = areaType, + Dim = BaseClass::Dim, + Stride = stride + }; + + + typedef typename BaseClass::SuperCellSize SuperCellSize; + + HINLINE StrideMapping(BaseClass base) : BaseClass(base), offset() + { + } + + /** + * Generate grid dimension information for kernel calls + * + * @return size of the grid + */ + HINLINE DataSpace getGridDim() const + { + return (StrideMappingMethods::getGridDim(*this) - offset + (int) Stride - 1) / (int) Stride; + } + + /** + * Returns index of current logical block + * + * @param realSuperCellIdx current SuperCell index (block index) + * @return mapped SuperCell index + */ + HDINLINE DataSpace getSuperCellIndex(const DataSpace& realSuperCellIdx) const + { + const DataSpace blockId((realSuperCellIdx * (int) Stride) + offset); + return StrideMappingMethods::shift(*this, blockId); + } + + HDINLINE DataSpace getOffset() const + { + return offset; + } + + HDINLINE void setOffset(const DataSpace offset) + { + this->offset = offset; + } + + /** set mapper to next domain + * + * @return true if domain is valid, else false + */ + HINLINE bool next() + { + int linearOffset = DataSpaceOperations::map(DataSpace::create(stride), offset); + linearOffset++; + offset = DataSpaceOperations::map(DataSpace::create(stride), linearOffset); + + return linearOffset < DataSpace::create(stride).productOfComponents(); + } + + private: + PMACC_ALIGN(offset, DataSpace); }; - - typedef typename BaseClass::SuperCellSize SuperCellSize; - - HINLINE StrideMapping(BaseClass base) : BaseClass(base), offset() - { - } - - /** - * Generate grid dimension information for kernel calls - * - * @return size of the grid - */ - HINLINE DataSpace getGridDim() const - { - return (StrideMappingMethods::getGridDim(*this) - offset + (int)Stride - 1) / (int)Stride; - } - - /** - * Returns index of current logical block - * - * @param realSuperCellIdx current SuperCell index (block index) - * @return mapped SuperCell index - */ - HDINLINE DataSpace getSuperCellIndex(const DataSpace& realSuperCellIdx) const - { - const DataSpace blockId((realSuperCellIdx * (int)Stride) + offset); - return StrideMappingMethods::shift(*this, blockId); - } - - HDINLINE DataSpace getOffset() const - { - return offset; - } - - HDINLINE void setOffset(const DataSpace offset) - { - this->offset = offset; - } - - /** set mapper to next domain - * - * @return true if domain is valid, else false - */ - HINLINE bool next() - { - int linearOffset = DataSpaceOperations::map(DataSpace::create(stride), offset); - linearOffset++; - offset = DataSpaceOperations::map(DataSpace::create(stride), linearOffset); - - return linearOffset < DataSpace::create(stride).productOfComponents(); - } - -private: - PMACC_ALIGN(offset, DataSpace); - -}; - } // namespace pmacc diff --git a/include/pmacc/mappings/kernel/StrideMappingMethods.hpp b/include/pmacc/mappings/kernel/StrideMappingMethods.hpp index 4c6912607f..c42dec3282 100644 --- a/include/pmacc/mappings/kernel/StrideMappingMethods.hpp +++ b/include/pmacc/mappings/kernel/StrideMappingMethods.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,7 +27,6 @@ namespace pmacc { - /** * Helper class for StrideMapping. * Provides methods called by StrideMapping using template specialization. @@ -38,67 +37,63 @@ namespace pmacc template class StrideMappingMethods; - //CORE + BORDER + GUARD + // CORE + BORDER + GUARD template class StrideMappingMethods { public: - template - HINLINE static DataSpace getGridDim(const Base &base) + HINLINE static DataSpace getGridDim(const Base& base) { return base.getGridSuperCells(); } template - HDINLINE static DataSpace shift(const Base &base, const DataSpace& value) + HDINLINE static DataSpace shift(const Base& base, const DataSpace& value) { return value; } }; - //CORE + // CORE template class StrideMappingMethods { public: - template - HINLINE static DataSpace getGridDim(const Base &base) + HINLINE static DataSpace getGridDim(const Base& base) { // skip 2 x (border + guard) == 4 x guard return base.getGridSuperCells() - 4 * base.getGuardingSuperCells(); } template - HDINLINE static DataSpace shift(const Base &base, const DataSpace& value) + HDINLINE static DataSpace shift(const Base& base, const DataSpace& value) { // skip guard + border == 2 x guard return value + 2 * base.getGuardingSuperCells(); } - }; - //CORE+BORDER + // CORE+BORDER template class StrideMappingMethods { public: - template - HINLINE static DataSpace getGridDim(const Base &base) + HINLINE static DataSpace getGridDim(const Base& base) { return base.getGridSuperCells() - 2 * base.getGuardingSuperCells(); } template - HDINLINE static DataSpace shift(const Base &base, const DataSpace& value) + HDINLINE static DataSpace shift(const Base& base, const DataSpace& value) { return value + base.getGuardingSuperCells(); } }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/mappings/simulation/EnvironmentController.hpp b/include/pmacc/mappings/simulation/EnvironmentController.hpp index ec652bf7ea..f3a29fef46 100644 --- a/include/pmacc/mappings/simulation/EnvironmentController.hpp +++ b/include/pmacc/mappings/simulation/EnvironmentController.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Wolfgang Hoenig, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Wolfgang Hoenig, Benjamin Worpitz * * This file is part of PMacc. * @@ -28,58 +28,54 @@ namespace pmacc { - -class EnvironmentController -{ -public: - - /*! Get communicator - * @return Communicator for MPI - */ - ICommunicator& getCommunicator() const - { - return *comm; - } - - - - /*! Get Mask with all GPU neighbar - * @return Mask with neighbar - */ - const Mask& getCommunicationMask() const - { - return comm->getCommunicationMask(); - } - - - /*! Set MPI communicator - * @param comm A instance of ICommunicator - */ - void setCommunicator(ICommunicator& comm) + class EnvironmentController { - this->comm = &comm; - } - -private: - - friend struct detail::Environment; - - /*! Default constructor. - */ - EnvironmentController() {} - - static EnvironmentController& getInstance() - { - static EnvironmentController instance; - return instance; - } - -private: - - /*! Pointer to MPI communicator. - */ - ICommunicator* comm; - -}; - -} //namespace pmacc + public: + /*! Get communicator + * @return Communicator for MPI + */ + ICommunicator& getCommunicator() const + { + return *comm; + } + + + /*! Get Mask with all GPU neighbar + * @return Mask with neighbar + */ + const Mask& getCommunicationMask() const + { + return comm->getCommunicationMask(); + } + + + /*! Set MPI communicator + * @param comm A instance of ICommunicator + */ + void setCommunicator(ICommunicator& comm) + { + this->comm = &comm; + } + + private: + friend struct detail::Environment; + + /*! Default constructor. + */ + EnvironmentController() + { + } + + static EnvironmentController& getInstance() + { + static EnvironmentController instance; + return instance; + } + + private: + /*! Pointer to MPI communicator. + */ + ICommunicator* comm; + }; + +} // namespace pmacc diff --git a/include/pmacc/mappings/simulation/Filesystem.hpp b/include/pmacc/mappings/simulation/Filesystem.hpp index 152d900ecb..282f82d811 100644 --- a/include/pmacc/mappings/simulation/Filesystem.hpp +++ b/include/pmacc/mappings/simulation/Filesystem.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Felix Schmitt +/* Copyright 2014-2021 Felix Schmitt * * This file is part of PMacc. * @@ -27,109 +27,96 @@ namespace pmacc { - + /** + * Singleton class providing common filesystem operations. + * + * @tparam DIM number of dimensions of the simulation + */ + template + class Filesystem + { + public: /** - * Singleton class providing common filesystem operations. + * Create directory with default permissions * - * @tparam DIM number of dimensions of the simulation + * @param dir name of directory */ - template - class Filesystem + void createDirectory(const std::string dir) const { - public: - - /** - * Create directory with default permissions - * - * @param dir name of directory - */ - void - createDirectory( const std::string dir ) const - { - /* does not throw if the directory exists or has been created */ - bfs::create_directories(dir); - } + /* does not throw if the directory exists or has been created */ + bfs::create_directories(dir); + } - /** - * Set 755 permissions for a directory - * - * @param dir name of directory - */ - void - setDirectoryPermissions( const std::string dir ) const - { - /* set permissions */ - bfs::permissions(dir, - bfs::owner_all | - bfs::group_read | - bfs::group_exe | - bfs::others_read | - bfs::others_exe); - } - - /** - * Create directory and set 755 permissions by root rank. - * - * @param dir name of directory - */ - void - createDirectoryWithPermissions( const std::string dir ) const - { - GridController& gc = Environment::get().GridController(); + /** + * Set 755 permissions for a directory + * + * @param dir name of directory + */ + void setDirectoryPermissions(const std::string dir) const + { + /* set permissions */ + bfs::permissions( + dir, + bfs::owner_all | bfs::group_read | bfs::group_exe | bfs::others_read | bfs::others_exe); + } - createDirectory(dir); + /** + * Create directory and set 755 permissions by root rank. + * + * @param dir name of directory + */ + void createDirectoryWithPermissions(const std::string dir) const + { + GridController& gc = Environment::get().GridController(); - if (gc.getGlobalRank() == 0) - { - /* must be set by only one process to avoid races */ - setDirectoryPermissions(dir); - } - } + createDirectory(dir); - /** - * Strip path from absolute or relative paths to filenames - * - * @param path and filename - */ - std::string - basename( const std::string pathFilename ) const + if(gc.getGlobalRank() == 0) { - return bfs::path( pathFilename ).filename().string(); + /* must be set by only one process to avoid races */ + setDirectoryPermissions(dir); } + } - private: - - friend class Environment; - - /** - * Constructor - */ - Filesystem() - { - - } + /** + * Strip path from absolute or relative paths to filenames + * + * @param path and filename + */ + std::string basename(const std::string pathFilename) const + { + return bfs::path(pathFilename).filename().string(); + } - /** - * Constructor - */ - Filesystem(const Filesystem& fs) - { + private: + friend class Environment; - } + /** + * Constructor + */ + Filesystem() + { + } - /** - * Returns the instance of the filesystem class. - * - * This class is a singleton class. - * - * @return a filesystem instance - */ - static Filesystem& getInstance() - { - static Filesystem instance; - return instance; - } - }; + /** + * Constructor + */ + Filesystem(const Filesystem& fs) + { + } -} //namespace pmacc + /** + * Returns the instance of the filesystem class. + * + * This class is a singleton class. + * + * @return a filesystem instance + */ + static Filesystem& getInstance() + { + static Filesystem instance; + return instance; + } + }; +} // namespace pmacc diff --git a/include/pmacc/mappings/simulation/GridController.hpp b/include/pmacc/mappings/simulation/GridController.hpp index d5306c421b..15b2a46a73 100644 --- a/include/pmacc/mappings/simulation/GridController.hpp +++ b/include/pmacc/mappings/simulation/GridController.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz * * This file is part of PMacc. @@ -30,272 +30,267 @@ namespace pmacc { - + /** + * GridController manages grid information. + * + * GridController provides information for a DIM-dimensional grid + * such as the number of GPU nodes and the current node's position in the grid + * and manages sliding window. + * GridController is a singleton. + * + * @tparam DIM dimension of the controlled grid + */ + template + class GridController + { + public: /** - * GridController manages grid information. + * Initialisation of the controller. * - * GridController provides information for a DIM-dimensional grid - * such as the number of GPU nodes and the current node's position in the grid - * and manages sliding window. - * GridController is a singleton. + * This methode must be called before any subgrids or buffers are used. * - * @tparam DIM dimension of the controlled grid + * @param nodes number of GPU nodes in each dimension + * @param periodic specifying whether the grid is periodic (1) or not (0) in each dimension */ - template - class GridController + void init(DataSpace nodes, DataSpace periodic = DataSpace()) { - public: - - /** - * Initialisation of the controller. - * - * This methode must be called before any subgrids or buffers are used. - * - * @param nodes number of GPU nodes in each dimension - * @param periodic specifying whether the grid is periodic (1) or not (0) in each dimension - */ - void init(DataSpace nodes, DataSpace periodic = DataSpace()) + static bool commIsInit = false; + if(!commIsInit) { - static bool commIsInit = false; - if (!commIsInit) - { - gpuNodes = nodes; + gpuNodes = nodes; - DataSpace tmp; - DataSpace periodicTmp; - tmp[0] = nodes[0]; - periodicTmp[0] = periodic[0]; - if (DIM < DIM2) - { - tmp[1] = 1; - periodicTmp[1] = 1; - } - else - { - tmp[1] = nodes[1]; - periodicTmp[1] = periodic[1]; - } - - if (DIM < DIM3) - { - tmp[2] = 1; - periodicTmp[2] = 1; - } - else - { - tmp[2] = nodes[2]; - periodicTmp[2] = periodic[2]; - } - - comm.init(tmp, periodicTmp); - commIsInit = true; - - Environment::get().EnvironmentController().setCommunicator(comm); + DataSpace tmp; + DataSpace periodicTmp; + tmp[0] = nodes[0]; + periodicTmp[0] = periodic[0]; + if(DIM < DIM2) + { + tmp[1] = 1; + periodicTmp[1] = 1; + } + else + { + tmp[1] = nodes[1]; + periodicTmp[1] = periodic[1]; } - } - /** - * Returns the number of GPU nodes in each dimension. - * - * @return number of nodes - */ - const DataSpace getGpuNodes() const - { - return gpuNodes; - } + if(DIM < DIM3) + { + tmp[2] = 1; + periodicTmp[2] = 1; + } + else + { + tmp[2] = nodes[2]; + periodicTmp[2] = periodic[2]; + } - /** - * Returns the position of the calling process' GPU in the grid. - * - * @return current GPU position - * */ - const DataSpace getPosition() const - { - return comm.getCoordinates(); - } + comm.init(tmp, periodicTmp); + commIsInit = true; - /** - * Returns the scalar position (rank) of this GPU, - * depending on its current grid position - * - * @return current grid position as scalar value - */ - uint32_t getScalarPosition() const - { - return DataSpaceOperations::map(getGpuNodes(), getPosition()); + Environment::get().EnvironmentController().setCommunicator(comm); } + } - /** - * Returns the local rank of the caller on the current host. - * - * return local rank on host - */ - uint32_t getHostRank() const - { - return comm.getHostRank(); - } + /** + * Returns the number of GPU nodes in each dimension. + * + * @return number of nodes + */ + const DataSpace getGpuNodes() const + { + return gpuNodes; + } - /** - * Returns the global MPI rank of the caller among all hosts. - * - * @return global MPI rank - */ - uint32_t getGlobalRank() const - { - return comm.getRank(); - } + /** + * Returns the position of the calling process' GPU in the grid. + * + * @return current GPU position + * */ + const DataSpace getPosition() const + { + return comm.getCoordinates(); + } - /** - * Returns the global MPI size. - * - * @return global number of MPI ranks - */ - uint32_t getGlobalSize() const - { - return comm.getSize(); - } + /** + * Returns the scalar position (rank) of this GPU, + * depending on its current grid position + * + * @return current grid position as scalar value + */ + uint32_t getScalarPosition() const + { + return DataSpaceOperations::map(getGpuNodes(), getPosition()); + } - /** - * Initialises a slide of the simulation area. - * - * Starts a slide of the simulation area. In the process, GPU nodes are - * reassigned to new grid positions to enable large simulation areas - * to be computed. - * All nodes in the simulation must call this function at the same iteration. - * - * @return true if the position of the calling GPU is switched to the end, false otherwise - */ - bool slide() - { - /* wait that all tasks are finished */ - Environment::get().Manager().waitForAllTasks();// + /** + * Returns the local rank of the caller on the current host. + * + * return local rank on host + */ + uint32_t getHostRank() const + { + return comm.getHostRank(); + } - bool result = comm.slide(); + /** + * Returns the global MPI rank of the caller among all hosts. + * + * @return global MPI rank + */ + uint32_t getGlobalRank() const + { + return comm.getRank(); + } - updateDomainOffset(); + /** + * Returns the global MPI size. + * + * @return global number of MPI ranks + */ + uint32_t getGlobalSize() const + { + return comm.getSize(); + } - return result; - } + /** + * Initialises a slide of the simulation area. + * + * Starts a slide of the simulation area. In the process, GPU nodes are + * reassigned to new grid positions to enable large simulation areas + * to be computed. + * All nodes in the simulation must call this function at the same iteration. + * + * @return true if the position of the calling GPU is switched to the end, false otherwise + */ + bool slide() + { + /* wait that all tasks are finished */ + Environment::get().Manager().waitForAllTasks(); // - /** - * Slides multiple times. - * - * Restores the state of the communicator and the domain offsets as - * if the simulation has been slided for numSlides times. - * - * \warning you are not allowed to call this method if moving - * the simulation does not use a moving window, - * else static load balancing will break in y-direction - * - * @param[in] numSlides number of slides to slide - * @return true if the position of gpu is switched to the end, else false - */ - bool setStateAfterSlides(size_t numSlides) - { - // nothing to do, nothing to change - // note: prevents destroying static load balancing in y for - // non-moving window simulations - if( numSlides == 0 ) - return false; + bool result = comm.slide(); - bool result = comm.setStateAfterSlides(numSlides); - updateDomainOffset(numSlides); - return result; - } + updateDomainOffset(); - /** - * Returns a Mask which describes all neighbouring GPU nodes. - * - * @return Mask with all neighbors - */ - const Mask& getCommunicationMask() const - { - return Environment::get().EnvironmentController().getCommunicationMask(); - } + return result; + } - /** - * Returns the MPI communicator class - * - * @return current CommunicatorMPI - */ - CommunicatorMPI& getCommunicator() - { - return comm; - } + /** + * Slides multiple times. + * + * Restores the state of the communicator and the domain offsets as + * if the simulation has been slided for numSlides times. + * + * \warning you are not allowed to call this method if moving + * the simulation does not use a moving window, + * else static load balancing will break in y-direction + * + * @param[in] numSlides number of slides to slide + * @return true if the position of gpu is switched to the end, else false + */ + bool setStateAfterSlides(size_t numSlides) + { + // nothing to do, nothing to change + // note: prevents destroying static load balancing in y for + // non-moving window simulations + if(numSlides == 0) + return false; - private: + bool result = comm.setStateAfterSlides(numSlides); + updateDomainOffset(numSlides); + return result; + } - friend class Environment; - /** - * Constructor - */ - GridController() : gpuNodes(DataSpace()) - { + /** + * Returns a Mask which describes all neighbouring GPU nodes. + * + * @return Mask with all neighbors + */ + const Mask& getCommunicationMask() const + { + return Environment::get().EnvironmentController().getCommunicationMask(); + } - } + /** + * Returns the MPI communicator class + * + * @return current CommunicatorMPI + */ + CommunicatorMPI& getCommunicator() + { + return comm; + } - /** - * Constructor - */ - GridController(const GridController& gc) - { + private: + friend class Environment; + /** + * Constructor + */ + GridController() : gpuNodes(DataSpace()) + { + } - } + /** + * Constructor + */ + GridController(const GridController& gc) + { + } - /** - * Sets globalDomain.offset & localDomain.offset using the current position. - * - * (This function is idempotent) - * - * @param[in] numSlides number of slides to slide - * - * \warning the implementation of this method is not compatible with - * static load balancing in y-direction + /** + * Sets globalDomain.offset & localDomain.offset using the current position. + * + * (This function is idempotent) + * + * @param[in] numSlides number of slides to slide + * + * \warning the implementation of this method is not compatible with + * static load balancing in y-direction + */ + void updateDomainOffset(size_t numSlides = 1) + { + /* if we slide we must change our localDomain.offset of the simulation + * (only change slide direction Y) */ - void updateDomainOffset(size_t numSlides = 1) - { - /* if we slide we must change our localDomain.offset of the simulation - * (only change slide direction Y) - */ - int gpuOffset_y = this->getPosition().y(); - const SubGrid& subGrid = Environment::get().SubGrid(); - DataSpace localDomainOffset(subGrid.getLocalDomain().offset); - DataSpace globalDomainOffset(subGrid.getGlobalDomain().offset); - /* this is allowed in the case that we use sliding window - * because size in Y direction is the same for all gpus domains - */ - localDomainOffset.y() = gpuOffset_y * subGrid.getLocalDomain().size.y(); - globalDomainOffset.y() += numSlides * subGrid.getLocalDomain().size.y(); + int gpuOffset_y = this->getPosition().y(); + const SubGrid& subGrid = Environment::get().SubGrid(); + DataSpace localDomainOffset(subGrid.getLocalDomain().offset); + DataSpace globalDomainOffset(subGrid.getGlobalDomain().offset); + /* this is allowed in the case that we use sliding window + * because size in Y direction is the same for all gpus domains + */ + localDomainOffset.y() = gpuOffset_y * subGrid.getLocalDomain().size.y(); + globalDomainOffset.y() += numSlides * subGrid.getLocalDomain().size.y(); - Environment::get().SubGrid().setLocalDomainOffset(localDomainOffset); - Environment::get().SubGrid().setGlobalDomainOffset(globalDomainOffset); - } + Environment::get().SubGrid().setLocalDomainOffset(localDomainOffset); + Environment::get().SubGrid().setGlobalDomainOffset(globalDomainOffset); + } - /** - * Returns the instance of the controller. - * - * This class is a singleton class. - * - * @return a controller instance - */ - static GridController& getInstance() - { - static GridController instance; - return instance; - } + /** + * Returns the instance of the controller. + * + * This class is a singleton class. + * + * @return a controller instance + */ + static GridController& getInstance() + { + static GridController instance; + return instance; + } - /** - * Communicator for MPI - */ - static CommunicatorMPI comm; + /** + * Communicator for MPI + */ + static CommunicatorMPI comm; - /** - * number of GPU nodes for each direction - */ - DataSpace gpuNodes; - }; + /** + * number of GPU nodes for each direction + */ + DataSpace gpuNodes; + }; - template - CommunicatorMPI GridController::comm; + template + CommunicatorMPI GridController::comm; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/mappings/simulation/ResourceMonitor.hpp b/include/pmacc/mappings/simulation/ResourceMonitor.hpp index e56815eaa3..534ed6e405 100644 --- a/include/pmacc/mappings/simulation/ResourceMonitor.hpp +++ b/include/pmacc/mappings/simulation/ResourceMonitor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Erik Zenker +/* Copyright 2016-2021 Erik Zenker * * This file is part of PMacc. * @@ -20,22 +20,20 @@ */ #pragma once -#include /* std::vector */ +#include /* std::vector */ #include /* std::size_t */ namespace pmacc { - /** * Provides ressource information of the current subgrid * * @tparam T_DIM number of dimensions of the simulation */ - template + template class ResourceMonitor { public: - /** * Constructor */ @@ -49,10 +47,8 @@ namespace pmacc /** * Returns the number of particles per species on the device */ - template - std::vector getParticleCounts(T_MappingDesc &cellDescription, T_ParticleFilter & parFilter); - + template + std::vector getParticleCounts(T_MappingDesc& cellDescription, T_ParticleFilter& parFilter); }; -} //namespace pmacc - +} // namespace pmacc diff --git a/include/pmacc/mappings/simulation/ResourceMonitor.tpp b/include/pmacc/mappings/simulation/ResourceMonitor.tpp index 73ccc132c6..0fa624715b 100644 --- a/include/pmacc/mappings/simulation/ResourceMonitor.tpp +++ b/include/pmacc/mappings/simulation/ResourceMonitor.tpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Erik Zenker +/* Copyright 2016-2021 Erik Zenker * * This file is part of PMacc. * @@ -34,21 +34,21 @@ namespace pmacc template struct MyCountParticles { - template - void operator()(T_Vector & particleCounts, T_MappingDesc & cellDescription, T_ParticleFilter & parFilter) + template + void operator()(T_Vector& particleCounts, T_MappingDesc& cellDescription, T_ParticleFilter& parFilter) { - DataConnector & dc = Environment<>::get().DataConnector(); + DataConnector& dc = Environment<>::get().DataConnector(); - const SubGrid & subGrid = Environment::get().SubGrid(); + const SubGrid& subGrid = Environment::get().SubGrid(); const DataSpace localSize(subGrid.getLocalDomain().size); uint64_cu totalNumParticles = 0; - totalNumParticles = pmacc::CountParticles::countOnDevice < CORE + BORDER > ( - *dc.get(T_Species::FrameType::getName(), true), - cellDescription, - DataSpace(), - localSize, - parFilter); + totalNumParticles = pmacc::CountParticles::countOnDevice( + *dc.get(T_Species::FrameType::getName(), true), + cellDescription, + DataSpace(), + localSize, + parFilter); particleCounts.push_back(totalNumParticles); } }; @@ -56,7 +56,6 @@ namespace pmacc template ResourceMonitor::ResourceMonitor() { - } template @@ -66,14 +65,16 @@ namespace pmacc } template - template - std::vector ResourceMonitor::getParticleCounts(T_MappingDesc &cellDescription, T_ParticleFilter & parFilter) + template + std::vector ResourceMonitor::getParticleCounts( + T_MappingDesc& cellDescription, + T_ParticleFilter& parFilter) { typedef bmpl::integral_c dim; std::vector particleCounts; - meta::ForEach > countParticles; + meta::ForEach> countParticles; countParticles(particleCounts, cellDescription, parFilter); return particleCounts; } -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/mappings/simulation/Selection.hpp b/include/pmacc/mappings/simulation/Selection.hpp index 9ab6cf11fe..930d91e38e 100644 --- a/include/pmacc/mappings/simulation/Selection.hpp +++ b/include/pmacc/mappings/simulation/Selection.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Felix Schmitt +/* Copyright 2014-2021 Felix Schmitt * * This file is part of PMacc. * @@ -28,86 +28,72 @@ namespace pmacc { - -/** - * Any DIM-dimensional selection of a simulation volume with a size and offset. - * - * @tparam DIM number of dimensions - */ -template -class Selection -{ -public: - /** - * Constructor - * Size and offset initialized to 0 (empty selection) + * Any DIM-dimensional selection of a simulation volume with a size and offset. + * + * @tparam DIM number of dimensions */ - Selection(void) + template + class Selection { - for (uint32_t i = 0; i < DIM; ++i) + public: + /** + * Constructor + * Size and offset initialized to 0 (empty selection) + */ + Selection(void) { - size[i] = 0; - offset[i] = 0; + for(uint32_t i = 0; i < DIM; ++i) + { + size[i] = 0; + offset[i] = 0; + } } - } - - /** - * Copy constructor - * - * @param other Selection to copy information from - */ - Selection(const Selection& other) : - size(other.size), - offset(other.offset) - { - } + /** + * Copy constructor + */ + constexpr Selection(const Selection&) = default; - /** - * Constructor - * Offset is initialized to 0. - * - * @param size DataSpace for selection size - */ - Selection(DataSpace size) : - size(size) - { - for (uint32_t i = 0; i < DIM; ++i) + /** + * Constructor + * Offset is initialized to 0. + * + * @param size DataSpace for selection size + */ + Selection(DataSpace size) : size(size) { - offset[i] = 0; + for(uint32_t i = 0; i < DIM; ++i) + { + offset[i] = 0; + } } - } - /** - * Constructor - * - * @param size DataSpace for selection size - * @param offset DataSpace for selection offset - */ - Selection(DataSpace size, DataSpace offset) : - size(size), - offset(offset) - { - - } + /** + * Constructor + * + * @param size DataSpace for selection size + * @param offset DataSpace for selection offset + */ + Selection(DataSpace size, DataSpace offset) : size(size), offset(offset) + { + } - /** - * Return a string representation - * - * @return string representation - */ - HINLINE const std::string toString(void) const - { - std::stringstream str; - str << "{ size = " << size.toString() << - " offset = " << offset.toString() << " }"; - return str.str(); - } + /** + * Return a string representation + * + * @return string representation + */ + HINLINE const std::string toString(void) const + { + std::stringstream str; + str << "{ size = " << size.toString() << " offset = " << offset.toString() << " }"; + return str.str(); + } - DataSpace size; + DataSpace size; - DataSpace offset; -}; + DataSpace offset; + }; -} // namespace picongpu +} // namespace pmacc diff --git a/include/pmacc/mappings/simulation/SubGrid.hpp b/include/pmacc/mappings/simulation/SubGrid.hpp index c5d9cf8dc8..5ceb6372d4 100644 --- a/include/pmacc/mappings/simulation/SubGrid.hpp +++ b/include/pmacc/mappings/simulation/SubGrid.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, Wolfgang Hoenig +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, Wolfgang Hoenig * * This file is part of PMacc. * @@ -29,18 +29,19 @@ namespace pmacc { /** - * Groups local, global and total domain information. - * - * For a detailed description of domains, see the PIConGPU wiki page: - * https://github.com/ComputationalRadiationPhysics/picongpu/wiki/PIConGPU-domain-definitions - */ - template + * Groups local, global and total domain information. + * + * For a detailed description of domains, see the PIConGPU wiki page: + * https://github.com/ComputationalRadiationPhysics/picongpu/wiki/PIConGPU-domain-definitions + */ + template class SubGrid { public: - typedef DataSpace Size; + constexpr SubGrid& operator=(const SubGrid&) = default; + /** * Initialize SubGrid instance * @@ -48,9 +49,7 @@ namespace pmacc * @param globalSize global domain size * @param localOffset local domain offset (formerly 'globalOffset') */ - void init(const Size& localSize, - const Size& globalSize, - const Size& localOffset) + void init(const Size& localSize, const Size& globalSize, const Size& localOffset) { totalDomain = Selection(globalSize); globalDomain = Selection(globalSize); @@ -114,7 +113,6 @@ namespace pmacc } private: - friend class Environment; /** total simulation volume, including active and inactive subvolumes */ @@ -131,7 +129,6 @@ namespace pmacc */ SubGrid() { - } static SubGrid& getInstance() @@ -149,12 +146,8 @@ namespace pmacc */ SubGrid(const SubGrid& gc) { - } }; -} //namespace pmacc - - - +} // namespace pmacc diff --git a/include/pmacc/mappings/threads/ForEachIdx.hpp b/include/pmacc/mappings/threads/ForEachIdx.hpp index 8e9d5b8bba..be6fdcf043 100644 --- a/include/pmacc/mappings/threads/ForEachIdx.hpp +++ b/include/pmacc/mappings/threads/ForEachIdx.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -29,100 +29,75 @@ namespace pmacc { -namespace mappings -{ -namespace threads -{ - - /** execute a functor for each index - * - * Distribute the indices even over all worker and execute a user defined functor. - * There is no guarantee in which order the indices will be processed. - * - * @tparam T_IdxConfig index domain description - */ - template< - typename T_IdxConfig - > - struct ForEachIdx : public T_IdxConfig + namespace mappings { - using T_IdxConfig::domainSize; - using T_IdxConfig::workerSize; - using T_IdxConfig::simdSize; - using T_IdxConfig::numCollIter; + namespace threads + { + /** execute a functor for each index + * + * Distribute the indices even over all worker and execute a user defined functor. + * There is no guarantee in which order the indices will be processed. + * + * @tparam T_IdxConfig index domain description + */ + template + struct ForEachIdx : public T_IdxConfig + { + using T_IdxConfig::domainSize; + using T_IdxConfig::numCollIter; + using T_IdxConfig::simdSize; + using T_IdxConfig::workerSize; - uint32_t const m_workerIdx; + uint32_t const m_workerIdx; - static constexpr bool outerLoopCondition = - ( domainSize % (simdSize * workerSize) ) == 0u || - ( simdSize * workerSize == 1u ); + static constexpr bool outerLoopCondition + = (domainSize % (simdSize * workerSize)) == 0u || (simdSize * workerSize == 1u); - static constexpr bool innerLoopCondition = - ( domainSize % simdSize ) == 0u || - ( simdSize == 1u ); + static constexpr bool innerLoopCondition = (domainSize % simdSize) == 0u || (simdSize == 1u); - /** constructor - * - * @param workerIdx index of the worker: range [0;workerSize) - */ - HDINLINE - ForEachIdx( uint32_t const workerIdx ) : m_workerIdx( workerIdx ) - { - } + /** constructor + * + * @param workerIdx index of the worker: range [0;workerSize) + */ + HDINLINE + ForEachIdx(uint32_t const workerIdx) : m_workerIdx(workerIdx) + { + } - /** execute a functor - * - * @param functor is called for each index which is mapped to the worker - * - * The functor must fulfill the following interface: - * @code - * template< typename ... T_Args > - * void operator()( uint32_t const linearIdx, uint32_t const idx, T_Args && ... ); - * @endcode - * - * @{ - */ - template< - typename T_Functor, - typename ... T_Args - > - HDINLINE void - operator()( - T_Functor && functor, - T_Args && ... args - ) const - { - for( uint32_t i = 0u; i < numCollIter; ++i ) - { - uint32_t const beginWorker = i * simdSize; - uint32_t const beginIdx = beginWorker * workerSize + simdSize * m_workerIdx; - if( - outerLoopCondition || - !innerLoopCondition || - beginIdx < domainSize - ) + /** execute a functor + * + * @param functor is called for each index which is mapped to the worker + * + * The functor must fulfill the following interface: + * @code + * template< typename ... T_Args > + * void operator()( uint32_t const linearIdx, uint32_t const idx, T_Args && ... ); + * @endcode + * + * @{ + */ + template + HDINLINE void operator()(T_Functor&& functor, T_Args&&... args) const { - for( uint32_t j = 0u; j < simdSize; ++j ) + for(uint32_t i = 0u; i < numCollIter; ++i) { - uint32_t const localIdx = beginIdx + j; - if( - innerLoopCondition || - localIdx < domainSize - ) - functor( - localIdx, - beginWorker + j, - std::forward< T_Args >( args ) ... - ); + uint32_t const beginWorker = i * simdSize; + uint32_t const beginIdx = beginWorker * workerSize + simdSize * m_workerIdx; + if(outerLoopCondition || !innerLoopCondition || beginIdx < domainSize) + { + for(uint32_t j = 0u; j < simdSize; ++j) + { + uint32_t const localIdx = beginIdx + j; + if(innerLoopCondition || localIdx < domainSize) + functor(localIdx, beginWorker + j, std::forward(args)...); + } + } } } - } - } - - /** @} */ - }; + /** @} */ + }; -} // namespace threads -} // namespace mappings + } // namespace threads + } // namespace mappings } // namespace pmacc diff --git a/include/pmacc/mappings/threads/IdxConfig.hpp b/include/pmacc/mappings/threads/IdxConfig.hpp index 785b98a251..fc6a600c94 100644 --- a/include/pmacc/mappings/threads/IdxConfig.hpp +++ b/include/pmacc/mappings/threads/IdxConfig.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -26,38 +26,33 @@ namespace pmacc { -namespace mappings -{ -namespace threads -{ - - /** describe a constant index domain - * - * describe the size of the index domain and the number of workers to operate on the domain - * - * @tparam T_domainSize number of indices in the domain - * @tparam T_workerSize number of worker working on @p T_domainSize - * @tparam T_simdSize SIMD width - */ - template< - uint32_t T_domainSize, - uint32_t T_workerSize, - uint32_t T_simdSize = 1u - > - struct IdxConfig + namespace mappings { - /** number of indices within the domain */ - static constexpr uint32_t domainSize = T_domainSize; - /** number of worker (threads) working on @p domainSize */ - static constexpr uint32_t workerSize = T_workerSize; - /** SIMD width */ - static constexpr uint32_t simdSize = T_simdSize; + namespace threads + { + /** describe a constant index domain + * + * describe the size of the index domain and the number of workers to operate on the domain + * + * @tparam T_domainSize number of indices in the domain + * @tparam T_workerSize number of worker working on @p T_domainSize + * @tparam T_simdSize SIMD width + */ + template + struct IdxConfig + { + /** number of indices within the domain */ + static constexpr uint32_t domainSize = T_domainSize; + /** number of worker (threads) working on @p domainSize */ + static constexpr uint32_t workerSize = T_workerSize; + /** SIMD width */ + static constexpr uint32_t simdSize = T_simdSize; - /** number of collective iterations needed to address all indices */ - static constexpr uint32_t numCollIter = - ( domainSize + simdSize * workerSize - 1u ) / ( simdSize * workerSize); - }; + /** number of collective iterations needed to address all indices */ + static constexpr uint32_t numCollIter + = (domainSize + simdSize * workerSize - 1u) / (simdSize * workerSize); + }; -} // namespace threads -} // namespace mappings + } // namespace threads + } // namespace mappings } // namespace pmacc diff --git a/include/pmacc/mappings/threads/ThreadCollective.hpp b/include/pmacc/mappings/threads/ThreadCollective.hpp index e6c1153356..ce56f2bd57 100644 --- a/include/pmacc/mappings/threads/ThreadCollective.hpp +++ b/include/pmacc/mappings/threads/ThreadCollective.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -31,97 +31,68 @@ namespace pmacc { - -/** execute a functor for each cell of a domain - * - * the user functor is executed on each elements of the full domain (GUARD +CORE) - * - * @tparam T_DataDomain pmacc::SuperCellDescription, compile time data domain - * description with a CORE and GUARD - * @tparam T_numWorkers number of workers - */ -template< - typename T_DataDomain, - uint32_t T_numWorkers -> -class ThreadCollective -{ -private: - // size of the CORE (in elements per dimension) - using CoreDomainSize = typename T_DataDomain::SuperCellSize; - // full size of the domain including the GUARD (in elements per dimension) - using DomainSize = typename T_DataDomain::FullSuperCellSize; - // offset (in elements per dimension) from the GUARD origin to the CORE - using OffsetOrigin = typename T_DataDomain::OffsetOrigin; - - static constexpr uint32_t numWorkers = T_numWorkers; - static constexpr uint32_t dim = T_DataDomain::Dim; - - PMACC_ALIGN( - m_workerIdx, - const uint32_t - ); - -public: - - /** constructor + /** execute a functor for each cell of a domain * - * @param workerIdx index of the worker - */ - DINLINE ThreadCollective( uint32_t const workerIdx ) : - m_workerIdx( workerIdx ) - { - } - - /** execute the user functor for each element in the full domain - * - * @tparam T_Functor type of the user functor, must have a `void operator()` - * with as many arguments as args contains - * @tparam T_Args type of the arguments, each type must implement an operator - * `template R operator(T)` + * the user functor is executed on each elements of the full domain (GUARD +CORE) * - * @param functor user defined functor - * @param args arguments passed to the functor - * The method `template R operator(T)` - * is called for each argument, the result is passed to the - * functor `functor::operator()`. - * `T` is a N-dimensional vector of an index relative to the origin - * of data domain GUARD + * @tparam T_DataDomain pmacc::SuperCellDescription, compile time data domain + * description with a CORE and GUARD + * @tparam T_numWorkers number of workers */ - template< - typename T_Functor, - typename ... T_Args, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_Functor & functor, - T_Args && ... args - ) + template + class ThreadCollective { - using namespace mappings::threads; - ForEachIdx< - IdxConfig< - math::CT::volume< DomainSize >::type::value, - numWorkers - > - >{ m_workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - /* offset (in elements) of the current processed element relative - * to the origin of the core domain - */ - DataSpace< dim > const offset( - DataSpaceOperations< dim >::template map< DomainSize >( linearIdx ) - - OffsetOrigin::toRT( ) - ); - functor( acc, args( offset ) ... ); - } - ); - } -}; + private: + // size of the CORE (in elements per dimension) + using CoreDomainSize = typename T_DataDomain::SuperCellSize; + // full size of the domain including the GUARD (in elements per dimension) + using DomainSize = typename T_DataDomain::FullSuperCellSize; + // offset (in elements per dimension) from the GUARD origin to the CORE + using OffsetOrigin = typename T_DataDomain::OffsetOrigin; + + static constexpr uint32_t numWorkers = T_numWorkers; + static constexpr uint32_t dim = T_DataDomain::Dim; + + PMACC_ALIGN(m_workerIdx, const uint32_t); + + public: + /** constructor + * + * @param workerIdx index of the worker + */ + DINLINE ThreadCollective(uint32_t const workerIdx) : m_workerIdx(workerIdx) + { + } + + /** execute the user functor for each element in the full domain + * + * @tparam T_Functor type of the user functor, must have a `void operator()` + * with as many arguments as args contains + * @tparam T_Args type of the arguments, each type must implement an operator + * `template R operator(T)` + * + * @param functor user defined functor + * @param args arguments passed to the functor + * The method `template R operator(T)` + * is called for each argument, the result is passed to the + * functor `functor::operator()`. + * `T` is a N-dimensional vector of an index relative to the origin + * of data domain GUARD + */ + template + DINLINE void operator()(T_Acc const& acc, T_Functor& functor, T_Args&&... args) + { + using namespace mappings::threads; + ForEachIdx::type::value, numWorkers>>{m_workerIdx}( + [&](uint32_t const linearIdx, uint32_t const) { + /* offset (in elements) of the current processed element relative + * to the origin of the core domain + */ + DataSpace const offset( + DataSpaceOperations::template map(linearIdx) - OffsetOrigin::toRT()); + functor(acc, args(offset)...); + }); + } + }; -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/mappings/threads/WorkerCfg.hpp b/include/pmacc/mappings/threads/WorkerCfg.hpp index 73f658e50a..17a2812aa7 100644 --- a/include/pmacc/mappings/threads/WorkerCfg.hpp +++ b/include/pmacc/mappings/threads/WorkerCfg.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -26,58 +26,54 @@ namespace pmacc { -namespace mappings -{ -namespace threads -{ - -/** holds a worker configuration - * - * collection of the compile time number of workers and the runtime worker index - * - * @tparam T_numWorkers number of workers which are used to execute this functor - */ -template< uint32_t T_numWorkers > -class WorkerCfg -{ -private: - - //! index of the worker: range [0;T_numWorkers) */ - PMACC_ALIGN( m_workerIdx, uint32_t const ); - -public: + namespace mappings + { + namespace threads + { + /** holds a worker configuration + * + * collection of the compile time number of workers and the runtime worker index + * + * @tparam T_numWorkers number of workers which are used to execute this functor + */ + template + class WorkerCfg + { + private: + //! index of the worker: range [0;T_numWorkers) */ + PMACC_ALIGN(m_workerIdx, uint32_t const); - //! number of workers - static constexpr uint32_t numWorkers = T_numWorkers; + public: + //! number of workers + static constexpr uint32_t numWorkers = T_numWorkers; - /** constructor - * - * @param workerIdx worker index - */ - HDINLINE WorkerCfg( uint32_t const workerIdx ) : - m_workerIdx( workerIdx ) - { } + /** constructor + * + * @param workerIdx worker index + */ + HDINLINE WorkerCfg(uint32_t const workerIdx) : m_workerIdx(workerIdx) + { + } - /** get the worker index - * - * @return index of the worker - */ - HDINLINE uint32_t getWorkerIdx( ) const - { - return m_workerIdx; - } + /** get the worker index + * + * @return index of the worker + */ + HDINLINE uint32_t getWorkerIdx() const + { + return m_workerIdx; + } - /** get the number of workers - * - * @return number of workers - */ - HDINLINE static - constexpr uint32_t getNumWorkers( ) - { - return T_numWorkers; - } -}; + /** get the number of workers + * + * @return number of workers + */ + HDINLINE static constexpr uint32_t getNumWorkers() + { + return T_numWorkers; + } + }; -} // namespace threads -} // namespace mappings + } // namespace threads + } // namespace mappings } // namespace pmacc diff --git a/include/pmacc/math/Complex.hpp b/include/pmacc/math/Complex.hpp index 84fb29cee1..ca457a449c 100644 --- a/include/pmacc/math/Complex.hpp +++ b/include/pmacc/math/Complex.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Debus +/* Copyright 2015-2021 Alexander Debus * * This file is part of PMacc. * diff --git a/include/pmacc/math/ConstVector.hpp b/include/pmacc/math/ConstVector.hpp index 256a2f3afd..4e57c6e043 100644 --- a/include/pmacc/math/ConstVector.hpp +++ b/include/pmacc/math/ConstVector.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2014-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -26,21 +26,43 @@ #include "pmacc/types.hpp" /* select namespace depending on __CUDA_ARCH__ compiler flag*/ -#ifdef __CUDA_ARCH__ //we are on gpu -# define PMACC_USING_STATIC_CONST_VECTOR_NAMESPACE(id) using namespace PMACC_JOIN(pmacc_static_const_vector_device,id) +#if(CUPLA_DEVICE_COMPILE == 1) // we are on gpu +# define PMACC_USING_STATIC_CONST_VECTOR_NAMESPACE(id) \ + using namespace PMACC_JOIN(pmacc_static_const_vector_device, id) #else -# define PMACC_USING_STATIC_CONST_VECTOR_NAMESPACE(id) using namespace PMACC_JOIN(pmacc_static_const_vector_host,id) +# define PMACC_USING_STATIC_CONST_VECTOR_NAMESPACE(id) \ + using namespace PMACC_JOIN(pmacc_static_const_vector_host, id) #endif -#ifdef __CUDACC__ -# define PMACC_STATIC_CONST_VECTOR_DIM_DEF_CUDA(id,Name,Type,...) \ - namespace PMACC_JOIN(pmacc_static_const_vector_device,id) \ - { \ - /* store all values in a const C array on device*/ \ - __constant__ const Type PMACC_JOIN(Name, _data)[]={__VA_ARGS__}; \ +#if defined(__CUDACC__) || BOOST_COMP_HIP +# define PMACC_STATIC_CONST_VECTOR_DIM_DEF_CUDA(id, Name, Type, ...) \ + namespace PMACC_JOIN(pmacc_static_const_vector_device, id) \ + { \ + /* store all values in a const C array on device*/ \ + __constant__ const Type PMACC_JOIN(Name, _data)[] = {__VA_ARGS__}; \ } /*namespace pmacc_static_const_vector_device + id */ #else -# define PMACC_STATIC_CONST_VECTOR_DIM_DEF_CUDA(id,Name,Type,...) +# define PMACC_STATIC_CONST_VECTOR_DIM_DEF_CUDA(id, Name, Type, ...) +#endif + +#define PMACC_PRAGMA_QUOTE(x) _Pragma(#x) +#define PMACC_PRAGMA_OACC_DECLARE_ARRAY(name, count) +#define PMACC_PRAGMA_OMP_TARGET_BEGIN_DECLARE +#define PMACC_PRAGMA_OMP_TARGET_END_DECLARE +#define PMACC_TARGET_CONSTEXPR constexpr + +#ifdef ALPAKA_ACC_ANY_BT_OACC_ENABLED +# undef PMACC_PRAGMA_OACC_DECLARE_ARRAY(name, count) +# undef PMACC_TARGET_CONSTEXPR +// might need to remove parentheses from macro argument count to clean up copyin clause, but works with NVHPC +# define PMACC_PRAGMA_OACC_DECLARE_ARRAY(name, count) PMACC_PRAGMA_QUOTE(acc declare copyin(name)) +# define PMACC_TARGET_CONSTEXPR +#elif defined ALPAKA_ACC_ANY_BT_OMP5_ENABLED +# undef PMACC_PRAGMA_OMP_TARGET_BEGIN_DECLARE +# undef PMACC_PRAGMA_OMP_TARGET_END_DECLARE +// the single-pragma declare (more like the OpenACC version above) does not work with clang 11 +# define PMACC_PRAGMA_OMP_TARGET_BEGIN_DECLARE _Pragma("omp declare target") +# define PMACC_PRAGMA_OMP_TARGET_END_DECLARE _Pragma("omp end declare target") #endif /** define a const vector @@ -50,75 +72,75 @@ * * @param id unique precompiler id to create unique namespaces */ -#define PMACC_STATIC_CONST_VECTOR_DIM_DEF(id,Name,Type,Dim,count,...) \ -namespace PMACC_JOIN(pmacc_static_const_storage,id) \ -{ \ - PMACC_STATIC_CONST_VECTOR_DIM_DEF_CUDA(id,Name,Type,__VA_ARGS__); \ - namespace PMACC_JOIN(pmacc_static_const_vector_host,id) \ - { \ - /* store all values in a const C array on host*/ \ - const Type PMACC_JOIN(Name,_data)[]={__VA_ARGS__}; \ - } /* namespace pmacc_static_const_vector_host + id */ \ - /* select host or device namespace depending on __CUDA_ARCH__ compiler flag*/ \ - PMACC_USING_STATIC_CONST_VECTOR_NAMESPACE(id); \ - template \ - struct ConstArrayStorage \ - { \ - PMACC_CASSERT_MSG( \ - __PMACC_CONST_VECTOR_dimension_needs_to_be_less_than_or_equal_to_the_number_of_arguments__, \ - Dim <= count ); \ - static constexpr bool isConst = true; \ - typedef T_Type type; \ - static constexpr int dim = T_Dim; \ - \ - HDINLINE const type& operator[](const int idx) const \ - { \ - /*access const C array with the name of array*/ \ - return PMACC_JOIN(Name,_data)[idx]; \ - } \ - }; \ - /*define a const vector type, ConstArrayStorage is used as Storage policy*/\ - typedef const pmacc::math::Vector< \ - Type, \ - Dim, \ - pmacc::math::StandardAccessor, \ - pmacc::math::StandardNavigator, \ - ConstArrayStorage > PMACC_JOIN(Name,_t); \ -} /* namespace pmacc_static_const_storage + id */ \ -using namespace PMACC_JOIN(pmacc_static_const_storage,id) +#define PMACC_STATIC_CONST_VECTOR_DIM_DEF(id, Name, Type, Dim, count, ...) \ + namespace PMACC_JOIN(pmacc_static_const_storage, id) \ + { \ + PMACC_STATIC_CONST_VECTOR_DIM_DEF_CUDA(id, Name, Type, __VA_ARGS__); \ + namespace PMACC_JOIN(pmacc_static_const_vector_host, id) \ + { \ + /* store all values in a const C array on host*/ \ + PMACC_PRAGMA_OMP_TARGET_BEGIN_DECLARE \ + PMACC_TARGET_CONSTEXPR Type PMACC_JOIN(Name, _data)[] = {__VA_ARGS__}; \ + PMACC_PRAGMA_OMP_TARGET_END_DECLARE \ + PMACC_PRAGMA_OACC_DECLARE_ARRAY(PMACC_JOIN(Name, _data), count) \ + } /* namespace pmacc_static_const_vector_host + id */ \ + /* select host or device namespace depending on __CUDA_ARCH__ compiler flag*/ \ + PMACC_USING_STATIC_CONST_VECTOR_NAMESPACE(id); \ + template \ + struct ConstArrayStorage \ + { \ + PMACC_CASSERT_MSG( \ + __PMACC_CONST_VECTOR_dimension_needs_to_be_less_than_or_equal_to_the_number_of_arguments__, \ + Dim <= count); \ + static constexpr bool isConst = true; \ + typedef T_Type type; \ + static constexpr int dim = T_Dim; \ + \ + HDINLINE const type& operator[](const int idx) const \ + { \ + /*access const C array with the name of array*/ \ + return PMACC_JOIN(Name, _data)[idx]; \ + } \ + }; \ + /*define a const vector type, ConstArrayStorage is used as Storage policy*/ \ + typedef const pmacc::math:: \ + Vector \ + PMACC_JOIN(Name, _t); \ + } /* namespace pmacc_static_const_storage + id */ \ + using namespace PMACC_JOIN(pmacc_static_const_storage, id) -#ifdef __CUDACC__ -# define PMACC_STATIC_CONST_VECTOR_DIM_INSTANCE_CUDA(Name,id) \ - namespace PMACC_JOIN(pmacc_static_const_vector_device,id) \ - { \ - /* create const instance on device */ \ - __constant__ const PMACC_JOIN(Name,_t) Name; \ +#if defined(__CUDACC__) || BOOST_COMP_HIP +# define PMACC_STATIC_CONST_VECTOR_DIM_INSTANCE_CUDA(Name, id) \ + namespace PMACC_JOIN(pmacc_static_const_vector_device, id) \ + { \ + /* create const instance on device */ \ + __constant__ const PMACC_JOIN(Name, _t) Name; \ } /* namespace pmacc_static_const_vector_device + id */ #else -# define PMACC_STATIC_CONST_VECTOR_DIM_INSTANCE_CUDA(Name,id) +# define PMACC_STATIC_CONST_VECTOR_DIM_INSTANCE_CUDA(Name, id) #endif /** create a instance of type `Name_t` with the name `Name` */ -#define PMACC_STATIC_CONST_VECTOR_DIM_INSTANCE(id,Name,Type,Dim,count,...) \ -namespace PMACC_JOIN(pmacc_static_const_storage,id) \ -{ \ - /* Conditionally define the instance on CUDA devices */ \ - PMACC_STATIC_CONST_VECTOR_DIM_INSTANCE_CUDA(Name,id) \ - namespace PMACC_JOIN(pmacc_static_const_vector_host,id) \ - { \ - /* create const instance on host*/ \ - const PMACC_JOIN(Name,_t) Name; \ - } /* namespace pmacc_static_const_vector_host + id */ \ -} /* namespace pmacc_static_const_storage + id */ +#define PMACC_STATIC_CONST_VECTOR_DIM_INSTANCE(id, Name, Type, Dim, count, ...) \ + namespace PMACC_JOIN(pmacc_static_const_storage, id) \ + { \ + /* Conditionally define the instance on CUDA devices */ \ + PMACC_STATIC_CONST_VECTOR_DIM_INSTANCE_CUDA(Name, id) \ + namespace PMACC_JOIN(pmacc_static_const_vector_host, id) \ + { \ + /* create const instance on host*/ \ + constexpr PMACC_JOIN(Name, _t) Name; \ + } /* namespace pmacc_static_const_vector_host + id */ \ + } /* namespace pmacc_static_const_storage + id */ /** @see PMACC_CONST_VECTOR documentation, only unique "id" is added * * @param id unique precompiler id to create unique namespaces */ -#define PMACC_STATIC_CONST_VECTOR_DIM(id,Name,Type,Dim,count,...) \ - PMACC_STATIC_CONST_VECTOR_DIM_DEF(id,Name,Type,Dim,count,__VA_ARGS__); \ - PMACC_STATIC_CONST_VECTOR_DIM_INSTANCE(id,Name,Type,Dim,count,__VA_ARGS__) +#define PMACC_STATIC_CONST_VECTOR_DIM(id, Name, Type, Dim, count, ...) \ + PMACC_STATIC_CONST_VECTOR_DIM_DEF(id, Name, Type, Dim, count, __VA_ARGS__); \ + PMACC_STATIC_CONST_VECTOR_DIM_INSTANCE(id, Name, Type, Dim, count, __VA_ARGS__) /** define a const vector @@ -127,8 +149,8 @@ namespace PMACC_JOIN(pmacc_static_const_storage,id) \ * * create type definition `name_t` */ -#define PMACC_CONST_VECTOR_DEF(type,dim,name,...) \ - PMACC_STATIC_CONST_VECTOR_DIM_DEF(__COUNTER__,name,type,dim,PMACC_COUNT_ARGS(type,__VA_ARGS__),__VA_ARGS__) +#define PMACC_CONST_VECTOR_DEF(type, dim, name, ...) \ + PMACC_STATIC_CONST_VECTOR_DIM_DEF(__COUNTER__, name, type, dim, PMACC_COUNT_ARGS(type, __VA_ARGS__), __VA_ARGS__) /** Create global constant math::Vector with compile time values which can be * used on device and host @@ -146,5 +168,5 @@ namespace PMACC_JOIN(pmacc_static_const_storage,id) \ * create math:Vector myVector(2.1,4.2); //as global const vector * The type of the created vector is "name_t" -> in this case "myVector_t" */ -#define PMACC_CONST_VECTOR(type,dim,name,...) \ - PMACC_STATIC_CONST_VECTOR_DIM(__COUNTER__,name,type,dim,PMACC_COUNT_ARGS(type,__VA_ARGS__),__VA_ARGS__) +#define PMACC_CONST_VECTOR(type, dim, name, ...) \ + PMACC_STATIC_CONST_VECTOR_DIM(__COUNTER__, name, type, dim, PMACC_COUNT_ARGS(type, __VA_ARGS__), __VA_ARGS__) diff --git a/include/pmacc/math/MapTuple.hpp b/include/pmacc/math/MapTuple.hpp index e085e66ea8..644427fdd0 100644 --- a/include/pmacc/math/MapTuple.hpp +++ b/include/pmacc/math/MapTuple.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -32,220 +32,138 @@ namespace pmacc { -namespace math -{ - - namespace bmpl = boost::mpl; - - /** wrap a datum - * - * align the data structure with `PMACC_ALIGN` - * - * @tparam T_Pair boost mpl pair< key, type of the value > - */ - template< typename T_Pair > - struct AlignedData + namespace math { - typedef typename T_Pair::first Key; - typedef typename T_Pair::second ValueType; - - PMACC_ALIGN( value, ValueType ); + namespace bmpl = boost::mpl; - HDINLINE AlignedData( ) + /** wrap a datum + * + * align the data structure with `PMACC_ALIGN` + * + * @tparam T_Pair boost mpl pair< key, type of the value > + */ + template + struct AlignedData { - } + typedef typename T_Pair::first Key; + typedef typename T_Pair::second ValueType; - HDINLINE AlignedData( const ValueType& value ) : value( value ) - { - } + PMACC_ALIGN(value, ValueType); - HDINLINE ValueType& operator[]( const Key& ) - { - return value; - } + HDINLINE AlignedData() + { + } - HDINLINE const ValueType& operator[]( const Key& ) const - { - return value; - } - }; - - /** wrap a datum - * - * @tparam T_Pair boost mpl pair< key, type of the value > - */ - template< typename T_Pair > - struct NativeData - { - typedef typename T_Pair::first Key; - typedef typename T_Pair::second ValueType; + HDINLINE AlignedData(const ValueType& value) : value(value) + { + } - ValueType value; + HDINLINE ValueType& operator[](const Key&) + { + return value; + } - HDINLINE NativeData( ) - { - } + HDINLINE const ValueType& operator[](const Key&) const + { + return value; + } + }; - HDINLINE NativeData( const ValueType& value ) : value( value ) + /** wrap a datum + * + * @tparam T_Pair boost mpl pair< key, type of the value > + */ + template + struct NativeData { - } + typedef typename T_Pair::first Key; + typedef typename T_Pair::second ValueType; - HDINLINE ValueType& operator[]( const Key& ) - { - return value; - } + ValueType value; - HDINLINE const ValueType& operator[]( const Key& ) const - { - return value; - } - }; - - template< - typename T_Map, - template< typename > class T_PodType = NativeData - > - struct MapTuple : - protected InheritLinearly< - T_Map, - T_PodType - > - { + HDINLINE NativeData() + { + } - typedef T_Map Map; - static constexpr int dim = bmpl::size< Map >::type::value; - typedef InheritLinearly< - T_Map, - T_PodType - > Base; + HDINLINE NativeData(const ValueType& value) : value(value) + { + } - template< class > struct result; + HDINLINE ValueType& operator[](const Key&) + { + return value; + } - template< - class T_F, - class T_Key - > - struct result< T_F( T_Key ) > - { - typedef typename bmpl::at< - Map, - T_Key - >::type& type; + HDINLINE const ValueType& operator[](const Key&) const + { + return value; + } }; - template< - class T_F, - class T_Key - > - struct result< const T_F( T_Key ) > + template class T_PodType = NativeData> + struct MapTuple : protected InheritLinearly { - typedef const typename bmpl::at< - Map, - T_Key - >::type& type; + typedef T_Map Map; + static constexpr int dim = bmpl::size::type::value; + typedef InheritLinearly Base; + + template + struct result; + + template + struct result + { + typedef typename bmpl::at::type& type; + }; + + template + struct result + { + typedef const typename bmpl::at::type& type; + }; + + /** access a datum with a key + * + * @tparam T_Key key type + * + * @{ + */ + template + HDINLINE typename boost::result_of::type operator[](const T_Key& key) + { + return (*(static_cast::type>>*>(this)))[key]; + } + + template + HDINLINE typename boost::result_of::type operator[](const T_Key& key) const + { + return (*( + static_cast::type>>*>(this)))[key]; + } + /** @} */ + + /** access a datum with an index + * + * @tparam T_i the index of tuple's i-th element + * + * @{ + */ + template + HDINLINE typename boost::result_of>::type::first)>::type + at() + { + return (*this)[typename bmpl::at>::type::first()]; + } + + template + HDINLINE + typename boost::result_of>::type::first)>::type + at() const + { + return (*this)[typename bmpl::at>::type::first()]; + } + /** @} */ }; - /** access a datum with a key - * - * @tparam T_Key key type - * - * @{ - */ - template< typename T_Key > - HDINLINE - typename boost::result_of< - MapTuple( T_Key ) - >::type - operator[]( const T_Key& key ) - { - return - ( - *( static_cast< - T_PodType< - bmpl::pair< - T_Key, - typename bmpl::at< - Map, - T_Key - >::type - > - >* - >( this ) ) - )[key]; - } - - template< typename T_Key > - HDINLINE - typename boost::result_of< - const MapTuple( T_Key ) - >::type - operator[]( const T_Key& key ) const - { - return ( - *( - static_cast< - const T_PodType< - bmpl::pair< - T_Key, - typename bmpl::at< - Map, - T_Key - >::type - > - >* - >( this ) - ) - )[key]; - } - /** @} */ - - /** access a datum with an index - * - * @tparam T_i the index of tuple's i-th element - * - * @{ - */ - template< int T_i > - HDINLINE - typename boost::result_of< - MapTuple( - typename bmpl::at< - Map, - bmpl::int_< T_i > - >::type::first - ) - >::type - at( ) - { - return ( *this )[ - typename bmpl::at< - Map, - bmpl::int_< T_i > - >::type::first( ) - ]; - } - - template< int T_i > - HDINLINE - typename boost::result_of< - const MapTuple( - typename bmpl::at< - Map, - bmpl::int_< T_i > - >::type::first - ) - >::type - at( ) const - { - return ( *this )[ - typename bmpl::at< - Map, - bmpl::int_< T_i > - >::type::first( ) - ]; - } - /** @} */ - }; - -} // math -} // PMacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/RungeKutta.hpp b/include/pmacc/math/RungeKutta.hpp index a7e59e2035..bf5764778e 100644 --- a/include/pmacc/math/RungeKutta.hpp +++ b/include/pmacc/math/RungeKutta.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Richard Pausch +/* Copyright 2015-2021 Richard Pausch * * This file is part of PMacc. * diff --git a/include/pmacc/math/RungeKutta/RungeKutta4.hpp b/include/pmacc/math/RungeKutta/RungeKutta4.hpp index 373876ad0b..99836f44d3 100644 --- a/include/pmacc/math/RungeKutta/RungeKutta4.hpp +++ b/include/pmacc/math/RungeKutta/RungeKutta4.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Richard Pausch +/* Copyright 2015-2021 Richard Pausch * * This file is part of PMacc. * @@ -23,55 +23,48 @@ namespace pmacc { -namespace math -{ - -struct RungeKutta4 -{ - /** Runge Kutta solver 4th order - * - * Calculate next time step based on the Runge Kutta - * algorithm and return next variable - * - * @param diffEq functor with first argument time and second variables - * @param var variables of type T_Variable (can be vector type) - * @param time current time - * @param deltaTime time step - * @return var for the consecutive time step - */ - template - HDINLINE T_Variable operator()(const T_Functor diffEq, - const T_Variable var, - const T_Time time, - const T_Time deltaTime) + namespace math { - // use typenames instead of template types - typedef T_Functor FunctorType; - typedef T_Variable VariableType; - typedef T_Time TimeType; + struct RungeKutta4 + { + /** Runge Kutta solver 4th order + * + * Calculate next time step based on the Runge Kutta + * algorithm and return next variable + * + * @param diffEq functor with first argument time and second variables + * @param var variables of type T_Variable (can be vector type) + * @param time current time + * @param deltaTime time step + * @return var for the consecutive time step + */ + template + HDINLINE T_Variable + operator()(const T_Functor diffEq, const T_Variable var, const T_Time time, const T_Time deltaTime) + { + // use typenames instead of template types + typedef T_Functor FunctorType; + typedef T_Variable VariableType; + typedef T_Time TimeType; - // calculate all 4 steps of the Runge Kutta 4th order - const VariableType k_1 = diffEq(time, - var); - const VariableType k_2 = diffEq(time + TimeType(0.5) * deltaTime, - var + (TimeType(0.5) * deltaTime) * k_1); - const VariableType k_3 = diffEq(time + TimeType(0.5) * deltaTime, - var + (TimeType(0.5) * deltaTime) * k_2); - const VariableType k_4 = diffEq(time + deltaTime, - var + deltaTime * k_3); + // calculate all 4 steps of the Runge Kutta 4th order + const VariableType k_1 = diffEq(time, var); + const VariableType k_2 + = diffEq(time + TimeType(0.5) * deltaTime, var + (TimeType(0.5) * deltaTime) * k_1); + const VariableType k_3 + = diffEq(time + TimeType(0.5) * deltaTime, var + (TimeType(0.5) * deltaTime) * k_2); + const VariableType k_4 = diffEq(time + deltaTime, var + deltaTime * k_3); - // combine all 4 steps - const VariableType diff = deltaTime/TimeType(6.) * (k_1 - + TimeType(2.) * k_2 - + TimeType(2.) * k_3 - + k_4); + // combine all 4 steps + const VariableType diff + = deltaTime / TimeType(6.) * (k_1 + TimeType(2.) * k_2 + TimeType(2.) * k_3 + k_4); - // current var + difference = new var - const VariableType out = var + diff; - return out; - } -}; + // current var + difference = new var + const VariableType out = var + diff; + return out; + } + }; -} //namespace math -} //namespace pmacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/Tuple.hpp b/include/pmacc/math/Tuple.hpp index 808d8235f5..d6751e9627 100644 --- a/include/pmacc/math/Tuple.hpp +++ b/include/pmacc/math/Tuple.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -40,129 +40,122 @@ namespace pmacc { -namespace math -{ - + namespace math + { #ifndef TUPLE_MAX_DIM -#define TUPLE_MAX_DIM 8 +# define TUPLE_MAX_DIM 8 #endif -#define CONSTRUCTOR(Z, N, _) \ - template \ - HDINLINE \ - Tuple(BOOST_PP_ENUM_BINARY_PARAMS(N, const Arg, &arg)) \ - : value(arg0), \ - base(BOOST_PP_ENUM_SHIFTED_PARAMS(N, arg)) \ - { \ - BOOST_STATIC_ASSERT(dim == N); \ +#define CONSTRUCTOR(Z, N, _) \ + template \ + HDINLINE Tuple(BOOST_PP_ENUM_BINARY_PARAMS(N, const Arg, &arg)) \ + : value(arg0) \ + , base(BOOST_PP_ENUM_SHIFTED_PARAMS(N, arg)) \ + { \ + BOOST_STATIC_ASSERT(dim == N); \ } -namespace mpl = boost::mpl; - -template::type::value> -class Tuple; - -template -class Tuple {}; - -template -class Tuple - : public Tuple::type> -{ -public: - static constexpr int dim = mpl::size::type::value; - typedef TypeList TypeList_; -private: - typedef Tuple::type> base; - - typedef typename mpl::front::type Value; - typedef typename boost::remove_reference::type pureValue; - - Value value; -public: - HDINLINE Tuple() {} - - HDINLINE Tuple(Value arg0) : value(arg0) - { - BOOST_STATIC_ASSERT(dim == 1); - } - - BOOST_PP_REPEAT_FROM_TO(2, BOOST_PP_INC(TUPLE_MAX_DIM), CONSTRUCTOR, _) - - template - HDINLINE - typename mpl::at_c::type& - at_c() - { - return this->at(mpl::int_()); - } - template - HDINLINE - const typename mpl::at_c::type& - at_c() const - { - return this->at(mpl::int_()); - } - - HDINLINE Value& at(mpl::int_<0>) - { - return value; - } - HDINLINE Value& at(mpl::integral_c) - { - return value; - } - - HDINLINE const Value& at(mpl::int_<0>) const - { - return value; - } - HDINLINE const Value& at(mpl::integral_c) const - { - return value; - } - - template - HDINLINE - typename mpl::at::type& - at(Idx) - { - return base::at(typename mpl::minus >::type()); - } - - template - HDINLINE - const typename mpl::at::type& - at(Idx) const - { - return base::at(typename mpl::minus >::type()); - } -}; + namespace mpl = boost::mpl; + + template::type::value> + class Tuple; + + template + class Tuple + { + }; + + template + class Tuple : public Tuple::type> + { + public: + static constexpr int dim = mpl::size::type::value; + typedef TypeList TypeList_; + + private: + typedef Tuple::type> base; + + typedef typename mpl::front::type Value; + typedef typename boost::remove_reference::type pureValue; + + Value value; + + public: + HDINLINE Tuple() + { + } + + HDINLINE Tuple(Value arg0) : value(arg0) + { + BOOST_STATIC_ASSERT(dim == 1); + } + + BOOST_PP_REPEAT_FROM_TO(2, BOOST_PP_INC(TUPLE_MAX_DIM), CONSTRUCTOR, _) + + template + HDINLINE typename mpl::at_c::type& at_c() + { + return this->at(mpl::int_()); + } + template + HDINLINE const typename mpl::at_c::type& at_c() const + { + return this->at(mpl::int_()); + } + + HDINLINE Value& at(mpl::int_<0>) + { + return value; + } + HDINLINE Value& at(mpl::integral_c) + { + return value; + } + + HDINLINE const Value& at(mpl::int_<0>) const + { + return value; + } + HDINLINE const Value& at(mpl::integral_c) const + { + return value; + } + + template + HDINLINE typename mpl::at::type& at(Idx) + { + return base::at(typename mpl::minus>::type()); + } + + template + HDINLINE const typename mpl::at::type& at(Idx) const + { + return base::at(typename mpl::minus>::type()); + } + }; #undef CONSTRUCTOR -#define MAKE_TUPLE(Z, N, _) \ - template \ - HDINLINE \ - Tuple > \ - make_Tuple(BOOST_PP_ENUM_BINARY_PARAMS(N, Value, value)) \ - { \ - return Tuple > \ - (BOOST_PP_ENUM_PARAMS(N, value)); \ +#define MAKE_TUPLE(Z, N, _) \ + template \ + HDINLINE Tuple> make_Tuple( \ + BOOST_PP_ENUM_BINARY_PARAMS(N, Value, value)) \ + { \ + return Tuple>(BOOST_PP_ENUM_PARAMS(N, value)); \ } -BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(TUPLE_MAX_DIM), MAKE_TUPLE, _) + BOOST_PP_REPEAT_FROM_TO(1, BOOST_PP_INC(TUPLE_MAX_DIM), MAKE_TUPLE, _) #undef MAKE_TUPLE -namespace result_of -{ -template -struct at_c -{ - typedef typename mpl::at_c::type type; -}; -} // result_of - -} // math -} // PMacc + namespace result_of + { + template + struct at_c + { + typedef typename mpl::at_c::type type; + }; + } // namespace result_of + + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/Vector.hpp b/include/pmacc/math/Vector.hpp index 6580efb57f..78f0310902 100644 --- a/include/pmacc/math/Vector.hpp +++ b/include/pmacc/math/Vector.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * diff --git a/include/pmacc/math/VectorOperations.hpp b/include/pmacc/math/VectorOperations.hpp index e14797c7b8..445858b442 100644 --- a/include/pmacc/math/VectorOperations.hpp +++ b/include/pmacc/math/VectorOperations.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl +/* Copyright 2014-2021 Axel Huebl * * This file is part of PMacc. * @@ -26,64 +26,56 @@ namespace pmacc { -namespace math -{ - /** Map a runtime linear index to a N dimensional position - * - * The size of the space to map the index to must be know at compile time - * - * \tparam T_Dim dimension of the position to map to - */ - template - struct MapToPos; - - template<> - struct MapToPos<3> + namespace math { - /** Functor + /** Map a runtime linear index to a N dimensional position + * + * The size of the space to map the index to must be know at compile time * - * \tparam T_ctVec math::CT::vector type like \see math::CT::Int - * \param math::CT::vector with spatial size to map the index to - * \param linearIndex linear index to be mapped - * \return runtime math::vector of dimension T_Dim + * \tparam T_Dim dimension of the position to map to */ - template - DINLINE - typename T_ctVec::RT_type - operator()( T_ctVec, const int linearIndex ) + template + struct MapToPos; + + template<> + struct MapToPos<3> { - return typename T_ctVec::RT_type( - (linearIndex % T_ctVec::x::value), - ((linearIndex % (T_ctVec::x::value * T_ctVec::y::value)) / T_ctVec::x::value), - (linearIndex / (T_ctVec::x::value * T_ctVec::y::value))); - } - }; + /** Functor + * + * \tparam T_ctVec math::CT::vector type like \see math::CT::Int + * \param math::CT::vector with spatial size to map the index to + * \param linearIndex linear index to be mapped + * \return runtime math::vector of dimension T_Dim + */ + template + DINLINE typename T_ctVec::RT_type operator()(T_ctVec, const int linearIndex) + { + return typename T_ctVec::RT_type( + (linearIndex % T_ctVec::x::value), + ((linearIndex % (T_ctVec::x::value * T_ctVec::y::value)) / T_ctVec::x::value), + (linearIndex / (T_ctVec::x::value * T_ctVec::y::value))); + } + }; - template<> - struct MapToPos<2> - { - template - DINLINE - typename T_ctVec::RT_type - operator()( T_ctVec, const int linearIndex ) + template<> + struct MapToPos<2> { - return typename T_ctVec::RT_type( - (linearIndex % T_ctVec::x::value), - (linearIndex / T_ctVec::x::value)); - } - }; + template + DINLINE typename T_ctVec::RT_type operator()(T_ctVec, const int linearIndex) + { + return typename T_ctVec::RT_type((linearIndex % T_ctVec::x::value), (linearIndex / T_ctVec::x::value)); + } + }; - template<> - struct MapToPos<1> - { - template - DINLINE - typename T_ctVec::RT_type - operator()( T_ctVec, const int linearIndex ) + template<> + struct MapToPos<1> { - return typename T_ctVec::RT_type( linearIndex ); - } - }; + template + DINLINE typename T_ctVec::RT_type operator()(T_ctVec, const int linearIndex) + { + return typename T_ctVec::RT_type(linearIndex); + } + }; -} /* namespace math */ + } /* namespace math */ } /* namespace pmacc */ diff --git a/include/pmacc/math/complex/Bessel.hpp b/include/pmacc/math/complex/Bessel.hpp index 95b16eb27d..c9b691539b 100644 --- a/include/pmacc/math/complex/Bessel.hpp +++ b/include/pmacc/math/complex/Bessel.hpp @@ -1,4 +1,4 @@ -/* Copyright 2003-2020 Alexander Debus, C. Bond +/* Copyright 2003-2021 Alexander Debus, C. Bond * * This file is part of PMacc. * @@ -67,180 +67,179 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ -namespace bessel -{ - template< - typename T_Type, - typename T_TableA, - typename T_TableB, - typename T_TableA1, - typename T_TableB1 - > - struct Cbesselj0Base; - - template< typename T_Type > - HDINLINE typename J0< pmacc::math::Complex< T_Type > >::result - j0( pmacc::math::Complex< T_Type > const & z ) - { - return J0< pmacc::math::Complex< T_Type > >( )( z ); - } - - template< - typename T_Type, - typename T_TableA, - typename T_TableB, - typename T_TableA1, - typename T_TableB1 - > - struct Cbesselj1Base; - - template< typename T_Type > - HDINLINE typename J1< pmacc::math::Complex< T_Type > >::result - j1( pmacc::math::Complex< T_Type > const & z) + namespace math { - return J1< pmacc::math::Complex< T_Type > >( )( z ); - } - - PMACC_CONST_VECTOR( double, 14, aDouble, - -7.03125e-2, - 0.112152099609375, - -0.5725014209747314, - 6.074042001273483, - -1.100171402692467e2, - 3.038090510922384e3, - -1.188384262567832e5, - 6.252951493434797e6, - -4.259392165047669e8, - 3.646840080706556e10, - -3.833534661393944e12, - 4.854014686852901e14, - -7.286857349377656e16, - 1.279721941975975e19 - ); - - PMACC_CONST_VECTOR( double, 14, bDouble, - 7.32421875e-2, - -0.2271080017089844, - 1.727727502584457, - -2.438052969955606e1, - 5.513358961220206e2, - -1.825775547429318e4, - 8.328593040162893e5, - -5.006958953198893e7, - 3.836255180230433e9, - -3.649010818849833e11, - 4.218971570284096e13, - -5.827244631566907e15, - 9.476288099260110e17, - -1.792162323051699e20 - ); - - PMACC_CONST_VECTOR( double, 14, a1Double, - 0.1171875, - -0.1441955566406250, - 0.6765925884246826, - -6.883914268109947, - 1.215978918765359e2, - -3.302272294480852e3, - 1.276412726461746e5, - -6.656367718817688e6, - 4.502786003050393e8, - -3.833857520742790e10, - 4.011838599133198e12, - -5.060568503314727e14, - 7.572616461117958e16, - -1.326257285320556e19 - ); - - PMACC_CONST_VECTOR( double, 14, b1Double, - -0.1025390625, - 0.2775764465332031, - -1.993531733751297, - 2.724882731126854e1, - -6.038440767050702e2, - 1.971837591223663e4, - -8.902978767070678e5, - 5.310411010968522e7, - -4.043620325107754e9, - 3.827011346598605e11, - -4.406481417852278e13, - 6.065091351222699e15, - -9.833883876590679e17, - 1.855045211579828e20 - ); - - PMACC_CONST_VECTOR( float, 14, aFloat, - -7.03125e-2, - 0.112152099609375, - -0.5725014209747314, - 6.074042001273483, - -1.100171402692467e2, - 3.038090510922384e3, - -1.188384262567832e5, - 6.252951493434797e6, - -4.259392165047669e8, - 3.646840080706556e10, - -3.833534661393944e12, - 4.854014686852901e14, - -7.286857349377656e16, - 1.279721941975975e19 - ); - - PMACC_CONST_VECTOR( float, 14, bFloat, - 7.32421875e-2, - -0.2271080017089844, - 1.727727502584457, - -2.438052969955606e1, - 5.513358961220206e2, - -1.825775547429318e4, - 8.328593040162893e5, - -5.006958953198893e7, - 3.836255180230433e9, - -3.649010818849833e11, - 4.218971570284096e13, - -5.827244631566907e15, - 9.476288099260110e17, - -1.792162323051699e20 - ); - - PMACC_CONST_VECTOR(float, 14, a1Float, - 0.1171875, - -0.1441955566406250, - 0.6765925884246826, - -6.883914268109947, - 1.215978918765359e2, - -3.302272294480852e3, - 1.276412726461746e5, - -6.656367718817688e6, - 4.502786003050393e8, - -3.833857520742790e10, - 4.011838599133198e12, - -5.060568503314727e14, - 7.572616461117958e16, - -1.326257285320556e19 - ); - - PMACC_CONST_VECTOR( float, 14, b1Float, - -0.1025390625, - 0.2775764465332031, - -1.993531733751297, - 2.724882731126854e1, - -6.038440767050702e2, - 1.971837591223663e4, - -8.902978767070678e5, - 5.310411010968522e7, - -4.043620325107754e9, - 3.827011346598605e11, - -4.406481417852278e13, - 6.065091351222699e15, - -9.833883876590679e17, - 1.855045211579828e20 - ); -} //namespace bessel -} //namespace math -} //namespace algorithms -} //namespace pmacc + namespace bessel + { + template + struct Cbesselj0Base; + + template + HDINLINE typename J0>::result j0(pmacc::math::Complex const& z) + { + return J0>()(z); + } + + template + struct Cbesselj1Base; + + template + HDINLINE typename J1>::result j1(pmacc::math::Complex const& z) + { + return J1>()(z); + } + + PMACC_CONST_VECTOR( + double, + 14, + aDouble, + -7.03125e-2, + 0.112152099609375, + -0.5725014209747314, + 6.074042001273483, + -1.100171402692467e2, + 3.038090510922384e3, + -1.188384262567832e5, + 6.252951493434797e6, + -4.259392165047669e8, + 3.646840080706556e10, + -3.833534661393944e12, + 4.854014686852901e14, + -7.286857349377656e16, + 1.279721941975975e19); + + PMACC_CONST_VECTOR( + double, + 14, + bDouble, + 7.32421875e-2, + -0.2271080017089844, + 1.727727502584457, + -2.438052969955606e1, + 5.513358961220206e2, + -1.825775547429318e4, + 8.328593040162893e5, + -5.006958953198893e7, + 3.836255180230433e9, + -3.649010818849833e11, + 4.218971570284096e13, + -5.827244631566907e15, + 9.476288099260110e17, + -1.792162323051699e20); + + PMACC_CONST_VECTOR( + double, + 14, + a1Double, + 0.1171875, + -0.1441955566406250, + 0.6765925884246826, + -6.883914268109947, + 1.215978918765359e2, + -3.302272294480852e3, + 1.276412726461746e5, + -6.656367718817688e6, + 4.502786003050393e8, + -3.833857520742790e10, + 4.011838599133198e12, + -5.060568503314727e14, + 7.572616461117958e16, + -1.326257285320556e19); + + PMACC_CONST_VECTOR( + double, + 14, + b1Double, + -0.1025390625, + 0.2775764465332031, + -1.993531733751297, + 2.724882731126854e1, + -6.038440767050702e2, + 1.971837591223663e4, + -8.902978767070678e5, + 5.310411010968522e7, + -4.043620325107754e9, + 3.827011346598605e11, + -4.406481417852278e13, + 6.065091351222699e15, + -9.833883876590679e17, + 1.855045211579828e20); + + PMACC_CONST_VECTOR( + float, + 14, + aFloat, + -7.03125e-2, + 0.112152099609375, + -0.5725014209747314, + 6.074042001273483, + -1.100171402692467e2, + 3.038090510922384e3, + -1.188384262567832e5, + 6.252951493434797e6, + -4.259392165047669e8, + 3.646840080706556e10, + -3.833534661393944e12, + 4.854014686852901e14, + -7.286857349377656e16, + 1.279721941975975e19); + + PMACC_CONST_VECTOR( + float, + 14, + bFloat, + 7.32421875e-2, + -0.2271080017089844, + 1.727727502584457, + -2.438052969955606e1, + 5.513358961220206e2, + -1.825775547429318e4, + 8.328593040162893e5, + -5.006958953198893e7, + 3.836255180230433e9, + -3.649010818849833e11, + 4.218971570284096e13, + -5.827244631566907e15, + 9.476288099260110e17, + -1.792162323051699e20); + + PMACC_CONST_VECTOR( + float, + 14, + a1Float, + 0.1171875, + -0.1441955566406250, + 0.6765925884246826, + -6.883914268109947, + 1.215978918765359e2, + -3.302272294480852e3, + 1.276412726461746e5, + -6.656367718817688e6, + 4.502786003050393e8, + -3.833857520742790e10, + 4.011838599133198e12, + -5.060568503314727e14, + 7.572616461117958e16, + -1.326257285320556e19); + + PMACC_CONST_VECTOR( + float, + 14, + b1Float, + -0.1025390625, + 0.2775764465332031, + -1.993531733751297, + 2.724882731126854e1, + -6.038440767050702e2, + 1.971837591223663e4, + -8.902978767070678e5, + 5.310411010968522e7, + -4.043620325107754e9, + 3.827011346598605e11, + -4.406481417852278e13, + 6.065091351222699e15, + -9.833883876590679e17, + 1.855045211579828e20); + } // namespace bessel + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/complex/Bessel.tpp b/include/pmacc/math/complex/Bessel.tpp index 9437a50a25..449c714ec6 100644 --- a/include/pmacc/math/complex/Bessel.tpp +++ b/include/pmacc/math/complex/Bessel.tpp @@ -1,4 +1,4 @@ -/* Copyright 2003-2020 Alexander Debus, C. Bond +/* Copyright 2003-2021 Alexander Debus, C. Bond * * This file is part of PMacc. * @@ -69,218 +69,207 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ -namespace bessel -{ - namespace pmMath = pmacc::algorithms::math; - - template< - typename T_Type, - typename T_TableA, - typename T_TableB, - typename T_TableA1, - typename T_TableB1 - > - struct Cbesselj0Base + namespace math { - using Result = pmacc::math::Complex< T_Type >; - using complex_T = pmacc::math::Complex< T_Type >; - using float_T = T_Type; - - HDINLINE Result operator( )( complex_T const & z ) + namespace bessel { - T_TableA a; - T_TableB b; - T_TableA1 a1; - T_TableB1 b1; - Result cj0; - /* The target rel. accuracy goal eps is chosen according to the original implementation - * of C. Bond, where for double-precision the accuracy goal is 1.0e-15. Here the accuracy - * goal value is the same 4.5 * DBL_EPSILON = 1.0e-15 for double-precision, but is similarly - * defined for float-precision. - */ - float_T const eps = float_T( 4.5 ) * std::numeric_limits< float_T >::epsilon( ); - - complex_T const cii = complex_T( 0, 1 ); - complex_T const cone = complex_T( 1, 0 ); - complex_T const czero = complex_T( 0, 0 ); - - float_T const a0 = pmMath::abs( z ); - complex_T const z2 = z * z; - complex_T z1 = z; - if( a0 == float_T( 0.0 ) ) - { - cj0 = cone; - return cj0; - } - if( z.get_real() < float_T( 0.0 ) ) - z1 = float_T( -1.0 ) * z; - if( a0 <= float_T( 12.0 ) ) + template + struct Cbesselj0Base { - cj0 = cone; - complex_T cr = cone; - for ( uint32_t k = 1u; k <= 40u; k++ ) - { - cr *= float_T( -0.25 ) * z2 / float_T( k * k ); - cj0 += cr; - if( pmMath::abs( cr ) < pmMath::abs( cj0 ) * eps ) break; - } - } - else { - uint32_t kz; - if( a0 >= float_T( 50.0 ) ) kz = 8u; // can be changed to 10 - else if( a0 >= float_T( 35.0 ) ) kz = 10u; // " " " 12 - else kz = 12u; // " " " 14 - complex_T ct1 = z1 - Pi< float_T >::quarterValue; - complex_T cp0 = cone; - for ( uint32_t k = 0u; k < kz; k++ ) - { - cp0 += a[ k ] * pow( - z1, - float_T( -2.0 ) * k - float_T( 2.0 ) - ); - } - complex_T cq0 = float_T( -0.125 ) / z1; - for ( uint32_t k = 0; k < kz; k++ ) + using Result = pmacc::math::Complex; + using complex_T = pmacc::math::Complex; + using float_T = T_Type; + + HDINLINE Result operator()(complex_T const& z) { - cq0 += b[ k ] * pmMath::pow( - z1, - float_T( -2.0 ) * k - float_T( 3.0 ) - ); - } - complex_T const cu = pmMath::sqrt( Pi< float_T >::doubleReciprocalValue / z1 ); - cj0 = cu * ( cp0 * pmMath::cos( ct1 ) - cq0 * pmMath::sin( ct1 ) ); - } - return cj0; - } - }; + T_TableA a; + T_TableB b; + T_TableA1 a1; + T_TableB1 b1; + Result cj0; + /* The target rel. accuracy goal eps is chosen according to the original implementation + * of C. Bond, where for double-precision the accuracy goal is 1.0e-15. Here the accuracy + * goal value is the same 4.5 * DBL_EPSILON = 1.0e-15 for double-precision, but is similarly + * defined for float-precision. + */ + float_T const eps = float_T(4.5) * std::numeric_limits::epsilon(); - template< - typename T_Type, - typename T_TableA, - typename T_TableB, - typename T_TableA1, - typename T_TableB1 - > - struct Cbesselj1Base - { - using Result = pmacc::math::Complex< T_Type >; - using complex_T = pmacc::math::Complex< T_Type >; - using float_T = T_Type; + complex_T const cii = complex_T(0, 1); + complex_T const cone = complex_T(1, 0); + complex_T const czero = complex_T(0, 0); - HDINLINE Result operator( )( complex_T const & z ) - { - T_TableA a; - T_TableB b; - T_TableA1 a1; - T_TableB1 b1; - Result cj1; - /* The target rel. accuracy goal eps is chosen according to the original implementation - * of C. Bond, where for double-precision the accuracy goal is 1.0e-15. Here the accuracy - * goal value is the same 4.5 * DBL_EPSILON = 1.0e-15 for double-precision, but is similarly - * defined for float-precision. - */ - float_T const eps = float_T( 4.5 ) * std::numeric_limits< float_T >::epsilon( ); - - complex_T const cii = complex_T( 0, 1 ); - complex_T const cone = complex_T( 1, 0 ); - complex_T const czero = complex_T( 0, 0 ); + float_T const a0 = cupla::math::abs(z); + complex_T const z2 = z * z; + complex_T z1 = z; + if(a0 == float_T(0.0)) + { + cj0 = cone; + return cj0; + } + if(z.get_real() < float_T(0.0)) + z1 = float_T(-1.0) * z; + if(a0 <= float_T(12.0)) + { + cj0 = cone; + complex_T cr = cone; + for(uint32_t k = 1u; k <= 40u; k++) + { + cr *= float_T(-0.25) * z2 / float_T(k * k); + cj0 += cr; + if(cupla::math::abs(cr) < cupla::math::abs(cj0) * eps) + break; + } + } + else + { + uint32_t kz; + if(a0 >= float_T(50.0)) + kz = 8u; // can be changed to 10 + else if(a0 >= float_T(35.0)) + kz = 10u; // " " " 12 + else + kz = 12u; // " " " 14 + complex_T ct1 = z1 - Pi::quarterValue; + complex_T cp0 = cone; + for(uint32_t k = 0u; k < kz; k++) + { + cp0 += a[k] * pow(z1, float_T(-2.0) * k - float_T(2.0)); + } + complex_T cq0 = float_T(-0.125) / z1; + for(uint32_t k = 0; k < kz; k++) + { + cq0 += b[k] * cupla::pow(z1, float_T(-2.0) * k - float_T(3.0)); + } + complex_T const cu = cupla::math::sqrt(Pi::doubleReciprocalValue / z1); + cj0 = cu * (cp0 * cupla::math::cos(ct1) - cq0 * cupla::math::sin(ct1)); + } + return cj0; + } + }; - float_T const a0 = pmMath::abs( z ); - complex_T const z2 = z * z; - complex_T z1 = z; - if( a0 == float_T( 0.0 ) ) + template + struct Cbesselj1Base { - cj1 = czero; - return cj1; - } - if( z.get_real() < float_T( 0.0 ) ) - z1 = float_T( -1.0 ) * z; - if( a0 <= float_T( 12.0 ) ) - { - cj1 = cone; - complex_T cr = cone; - for ( uint32_t k = 1u; k <= 40u; k++ ) - { - cr *= float_T( -0.25 ) * z2 / ( k * ( k + float_T( 1.0 ) ) ); - cj1 += cr; - if ( pmMath::abs( cr ) < pmMath::abs( cj1 ) * eps ) break; - } - cj1 *= float_T( 0.5 ) * z1; - } - else { - uint32_t kz; - if( a0 >= float_T( 50.0 ) ) kz = 8u; // can be changed to 10 - else if ( a0 >= float_T( 35.0 ) ) kz = 10u; // " " " 12 - else kz = 12u; // " " " 14 - complex_T const cu = pmMath::sqrt( Pi< float_T >::doubleReciprocalValue / z1 ); - complex_T const ct2 = z1 - float_T( 0.75 ) * Pi< float_T >::value; - complex_T cp1 = cone; - for ( uint32_t k = 0u; k < kz; k++ ) - { - cp1 += a1[ k ] * pmMath::pow( - z1, - float_T( -2.0 ) * k - float_T( 2.0 ) - ); - } - complex_T cq1 = float_T( 0.375 ) / z1; - for ( uint32_t k = 0u; k < kz; k++ ) + using Result = pmacc::math::Complex; + using complex_T = pmacc::math::Complex; + using float_T = T_Type; + + HDINLINE Result operator()(complex_T const& z) { - cq1 += b1[ k ] * pmMath::pow( - z1, - float_T( -2.0 ) * k - float_T( 3.0 ) - ); + T_TableA a; + T_TableB b; + T_TableA1 a1; + T_TableB1 b1; + Result cj1; + /* The target rel. accuracy goal eps is chosen according to the original implementation + * of C. Bond, where for double-precision the accuracy goal is 1.0e-15. Here the accuracy + * goal value is the same 4.5 * DBL_EPSILON = 1.0e-15 for double-precision, but is similarly + * defined for float-precision. + */ + float_T const eps = float_T(4.5) * std::numeric_limits::epsilon(); + + complex_T const cii = complex_T(0, 1); + complex_T const cone = complex_T(1, 0); + complex_T const czero = complex_T(0, 0); + + float_T const a0 = cupla::math::abs(z); + complex_T const z2 = z * z; + complex_T z1 = z; + if(a0 == float_T(0.0)) + { + cj1 = czero; + return cj1; + } + if(z.get_real() < float_T(0.0)) + z1 = float_T(-1.0) * z; + if(a0 <= float_T(12.0)) + { + cj1 = cone; + complex_T cr = cone; + for(uint32_t k = 1u; k <= 40u; k++) + { + cr *= float_T(-0.25) * z2 / (k * (k + float_T(1.0))); + cj1 += cr; + if(cupla::math::abs(cr) < cupla::math::abs(cj1) * eps) + break; + } + cj1 *= float_T(0.5) * z1; + } + else + { + uint32_t kz; + if(a0 >= float_T(50.0)) + kz = 8u; // can be changed to 10 + else if(a0 >= float_T(35.0)) + kz = 10u; // " " " 12 + else + kz = 12u; // " " " 14 + complex_T const cu = cupla::math::sqrt(Pi::doubleReciprocalValue / z1); + complex_T const ct2 = z1 - float_T(0.75) * Pi::value; + complex_T cp1 = cone; + for(uint32_t k = 0u; k < kz; k++) + { + cp1 += a1[k] * cupla::pow(z1, float_T(-2.0) * k - float_T(2.0)); + } + complex_T cq1 = float_T(0.375) / z1; + for(uint32_t k = 0u; k < kz; k++) + { + cq1 += b1[k] * cupla::pow(z1, float_T(-2.0) * k - float_T(3.0)); + } + cj1 = cu * (cp1 * cupla::math::cos(ct2) - cq1 * cupla::math::sin(ct2)); + } + if(z.get_real() < float_T(0.0)) + { + cj1 = float_T(-1.0) * cj1; + } + return cj1; } - cj1 = cu * ( cp1 * pmMath::cos( ct2 ) - cq1 * pmMath::sin( ct2 ) ); - } - if( z.get_real( ) < float_T( 0.0 ) ) - { - cj1 = float_T( -1.0 ) * cj1; - } - return cj1; - } - }; + }; - template< > - struct J0< pmacc::math::Complex< double > > : public Cbesselj0Base< - double, - pmacc::algorithms::math::bessel::aDouble_t, - pmacc::algorithms::math::bessel::bDouble_t, - pmacc::algorithms::math::bessel::a1Double_t, - pmacc::algorithms::math::bessel::b1Double_t - >{ }; + template<> + struct J0> + : public Cbesselj0Base< + double, + pmacc::math::bessel::aDouble_t, + pmacc::math::bessel::bDouble_t, + pmacc::math::bessel::a1Double_t, + pmacc::math::bessel::b1Double_t> + { + }; - template< > - struct J0< pmacc::math::Complex< float > > : public Cbesselj0Base< - float, - pmacc::algorithms::math::bessel::aFloat_t, - pmacc::algorithms::math::bessel::bFloat_t, - pmacc::algorithms::math::bessel::a1Float_t, - pmacc::algorithms::math::bessel::b1Float_t - >{ }; + template<> + struct J0> + : public Cbesselj0Base< + float, + pmacc::math::bessel::aFloat_t, + pmacc::math::bessel::bFloat_t, + pmacc::math::bessel::a1Float_t, + pmacc::math::bessel::b1Float_t> + { + }; - template< > - struct J1< pmacc::math::Complex< double > > : public Cbesselj1Base< - double, - pmacc::algorithms::math::bessel::aDouble_t, - pmacc::algorithms::math::bessel::bDouble_t, - pmacc::algorithms::math::bessel::a1Double_t, - pmacc::algorithms::math::bessel::b1Double_t - >{ }; + template<> + struct J1> + : public Cbesselj1Base< + double, + pmacc::math::bessel::aDouble_t, + pmacc::math::bessel::bDouble_t, + pmacc::math::bessel::a1Double_t, + pmacc::math::bessel::b1Double_t> + { + }; - template< > - struct J1< pmacc::math::Complex< float > > : public Cbesselj1Base< - float, - pmacc::algorithms::math::bessel::aFloat_t, - pmacc::algorithms::math::bessel::bFloat_t, - pmacc::algorithms::math::bessel::a1Float_t, - pmacc::algorithms::math::bessel::b1Float_t - >{ }; + template<> + struct J1> + : public Cbesselj1Base< + float, + pmacc::math::bessel::aFloat_t, + pmacc::math::bessel::bFloat_t, + pmacc::math::bessel::a1Float_t, + pmacc::math::bessel::b1Float_t> + { + }; -} //namespace bessel -} //namespace math -} //namespace algorithms -} //namespace pmacc + } // namespace bessel + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/complex/Complex.hpp b/include/pmacc/math/complex/Complex.hpp index 0aec12bf56..292de6bbb3 100644 --- a/include/pmacc/math/complex/Complex.hpp +++ b/include/pmacc/math/complex/Complex.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, Alexander Debus +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, Alexander Debus * * This file is part of PMacc. * @@ -24,199 +24,195 @@ namespace pmacc { -namespace math -{ - -/** A complex number class */ -template -struct Complex -{ - -public: - - typedef T_Type type; - - // constructor (real, imaginary) - HDINLINE Complex(T_Type real, T_Type imaginary = type(0.0) ) : real(real), imaginary(imaginary) { } - - // constructor (Complex) - template - HDINLINE explicit Complex(const Complex& other) : - real( static_cast (other.get_real()) ), - imaginary( static_cast (other.get_imag()) ) { } - - // default constructor ( ! no initialization of data ! ) - HDINLINE Complex(void) { } - - // Conversion from scalar (assignment) - HDINLINE Complex& operator=(const T_Type& other) - { - real = other; - imaginary = type(0.0); - return *this; - } - - // Assignment operator - HDINLINE Complex& operator=(const Complex& other) - { - real = other.get_real(); - imaginary = other.get_imag(); - return *this; - } - - // assign addition - HDINLINE Complex& operator+=(const Complex& other) - { - real += other.get_real(); - imaginary += other.get_imag(); - return *this; - } - - // assign difference - HDINLINE Complex& operator-=(const Complex& other) + namespace math { - real -= other.get_real(); - imaginary -= other.get_imag(); - return *this; - } - - // assign multiplication - HDINLINE Complex& operator *=(const Complex& other) - { - *this = *this * other; - return *this; - } - - // real part - HDINLINE T_Type& get_real() - { - return real; - } - - // real part - HDINLINE T_Type get_real(void) const - { - return real; - } - - // imaginary part - HDINLINE T_Type& get_imag() - { - return imaginary; - } - - // imaginary part - HDINLINE T_Type get_imag(void) const - { - return imaginary; - } - - // complex zero - HDINLINE static Complex zero(void) - { - return Complex( type(0.0) , type(0.0) ); - } - -private: - PMACC_ALIGN(real,T_Type); // real part - PMACC_ALIGN(imaginary,T_Type); // imaginary part - -}; - -/** Addition operators */ - -template -HDINLINE Complex -operator+(const Complex& lhs, const Complex& rhs) -{ - return Complex(lhs.get_real() + rhs.get_real(), lhs.get_imag() + rhs.get_imag()); -} - -template -HDINLINE Complex -operator+(const Complex& lhs, const T_Type& rhs) -{ - return Complex(lhs.get_real() + rhs, lhs.get_imag()); -} - -template -HDINLINE Complex -operator+(const T_Type& lhs, const Complex& rhs) -{ - return Complex(lhs + rhs.get_real(), rhs.get_imag()); -} - -/** Subtraction operators */ - -template -HDINLINE Complex -operator-(const Complex& lhs, const Complex& rhs) -{ - return Complex(lhs.get_real() - rhs.get_real(), lhs.get_imag() - rhs.get_imag()); -} - -template -HDINLINE Complex -operator-(const Complex& lhs, const T_Type& rhs) -{ - return Complex(lhs.get_real() - rhs, lhs.get_imag()); -} - -template -HDINLINE Complex -operator-(const T_Type& lhs, const Complex& rhs) -{ - return Complex(lhs - rhs.get_real(), -rhs.get_imag()); -} - -/** Multiplication operators */ - -template -HDINLINE Complex -operator*(const Complex& lhs, const Complex& rhs) -{ - return Complex(lhs.get_real() * rhs.get_real() - lhs.get_imag() * rhs.get_imag(), - lhs.get_imag() * rhs.get_real() + lhs.get_real() * rhs.get_imag()); -} - -template -HDINLINE Complex -operator*(const Complex& lhs, const T_Type& rhs) -{ - return Complex(lhs.get_real() * rhs, lhs.get_imag() * rhs); -} - -template -HDINLINE Complex -operator*(const T_Type& lhs, const Complex& rhs) -{ - return Complex(lhs * rhs.get_real(), lhs * rhs.get_imag()); -} - -/** Division operators */ - -template -HDINLINE Complex -operator/(const Complex& lhs, const T_Type& rhs) -{ - return Complex(lhs.get_real() / rhs, lhs.get_imag() / rhs); -} - -template -HDINLINE Complex -operator/(const T_Type& lhs, const Complex& rhs) -{ - return Complex(lhs * rhs.get_real()/(rhs.get_real()*rhs.get_real()+rhs.get_imag()*rhs.get_imag()), - -lhs * rhs.get_imag()/( rhs.get_real()*rhs.get_real()+rhs.get_imag()*rhs.get_imag() )); -} - -template -HDINLINE Complex -operator/(const Complex& lhs, const Complex& rhs) -{ - return lhs*Complex(rhs.get_real()/(rhs.get_real()*rhs.get_real()+rhs.get_imag()*rhs.get_imag()), - -rhs.get_imag()/( rhs.get_real()*rhs.get_real()+rhs.get_imag()*rhs.get_imag() )); -} - -} //namespace math -} //namespace pmacc + /** A complex number class */ + template + struct Complex + { + public: + typedef T_Type type; + + // constructor (real, imaginary) + HDINLINE Complex(T_Type real, T_Type imaginary = type(0.0)) : real(real), imaginary(imaginary) + { + } + + constexpr HDINLINE Complex(const Complex& other) = default; + + // constructor (Complex) + template + HDINLINE explicit Complex(const Complex& other) + : real(static_cast(other.get_real())) + , imaginary(static_cast(other.get_imag())) + { + } + + // default constructor ( ! no initialization of data ! ) + HDINLINE Complex(void) + { + } + + // Conversion from scalar (assignment) + HDINLINE Complex& operator=(const T_Type& other) + { + real = other; + imaginary = type(0.0); + return *this; + } + + // Assignment operator + HDINLINE Complex& operator=(const Complex& other) + { + real = other.get_real(); + imaginary = other.get_imag(); + return *this; + } + + // assign addition + HDINLINE Complex& operator+=(const Complex& other) + { + real += other.get_real(); + imaginary += other.get_imag(); + return *this; + } + + // assign difference + HDINLINE Complex& operator-=(const Complex& other) + { + real -= other.get_real(); + imaginary -= other.get_imag(); + return *this; + } + + // assign multiplication + HDINLINE Complex& operator*=(const Complex& other) + { + *this = *this * other; + return *this; + } + + // real part + HDINLINE T_Type& get_real() + { + return real; + } + + // real part + HDINLINE T_Type get_real(void) const + { + return real; + } + + // imaginary part + HDINLINE T_Type& get_imag() + { + return imaginary; + } + + // imaginary part + HDINLINE T_Type get_imag(void) const + { + return imaginary; + } + + // complex zero + HDINLINE static Complex zero(void) + { + return Complex(type(0.0), type(0.0)); + } + + private: + PMACC_ALIGN(real, T_Type); // real part + PMACC_ALIGN(imaginary, T_Type); // imaginary part + }; + + /** Addition operators */ + + template + HDINLINE Complex operator+(const Complex& lhs, const Complex& rhs) + { + return Complex(lhs.get_real() + rhs.get_real(), lhs.get_imag() + rhs.get_imag()); + } + + template + HDINLINE Complex operator+(const Complex& lhs, const T_Type& rhs) + { + return Complex(lhs.get_real() + rhs, lhs.get_imag()); + } + + template + HDINLINE Complex operator+(const T_Type& lhs, const Complex& rhs) + { + return Complex(lhs + rhs.get_real(), rhs.get_imag()); + } + + /** Subtraction operators */ + + template + HDINLINE Complex operator-(const Complex& lhs, const Complex& rhs) + { + return Complex(lhs.get_real() - rhs.get_real(), lhs.get_imag() - rhs.get_imag()); + } + + template + HDINLINE Complex operator-(const Complex& lhs, const T_Type& rhs) + { + return Complex(lhs.get_real() - rhs, lhs.get_imag()); + } + + template + HDINLINE Complex operator-(const T_Type& lhs, const Complex& rhs) + { + return Complex(lhs - rhs.get_real(), -rhs.get_imag()); + } + + /** Multiplication operators */ + + template + HDINLINE Complex operator*(const Complex& lhs, const Complex& rhs) + { + return Complex( + lhs.get_real() * rhs.get_real() - lhs.get_imag() * rhs.get_imag(), + lhs.get_imag() * rhs.get_real() + lhs.get_real() * rhs.get_imag()); + } + + template + HDINLINE Complex operator*(const Complex& lhs, const T_Type& rhs) + { + return Complex(lhs.get_real() * rhs, lhs.get_imag() * rhs); + } + + template + HDINLINE Complex operator*(const T_Type& lhs, const Complex& rhs) + { + return Complex(lhs * rhs.get_real(), lhs * rhs.get_imag()); + } + + /** Division operators */ + + template + HDINLINE Complex operator/(const Complex& lhs, const T_Type& rhs) + { + return Complex(lhs.get_real() / rhs, lhs.get_imag() / rhs); + } + + template + HDINLINE Complex operator/(const T_Type& lhs, const Complex& rhs) + { + return Complex( + lhs * rhs.get_real() / (rhs.get_real() * rhs.get_real() + rhs.get_imag() * rhs.get_imag()), + -lhs * rhs.get_imag() / (rhs.get_real() * rhs.get_real() + rhs.get_imag() * rhs.get_imag())); + } + + template + HDINLINE Complex operator/(const Complex& lhs, const Complex& rhs) + { + return lhs + * Complex( + rhs.get_real() / (rhs.get_real() * rhs.get_real() + rhs.get_imag() * rhs.get_imag()), + -rhs.get_imag() / (rhs.get_real() * rhs.get_real() + rhs.get_imag() * rhs.get_imag())); + } + + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/complex/Complex.tpp b/include/pmacc/math/complex/Complex.tpp index 5034c116d2..47d781d7ae 100644 --- a/include/pmacc/math/complex/Complex.tpp +++ b/include/pmacc/math/complex/Complex.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch, * Alexander Debus, Benjamin Worpitz, Finn-Ole Carstens * * This file is part of PMacc. @@ -33,254 +33,261 @@ namespace pmacc { -namespace algorithms -{ -namespace math -{ - -namespace pmMath = pmacc::algorithms::math; - -/* Set primary template and subsequent specialization for returning a complex number - by using Euler's formula. */ - -template -struct Euler; - -template -HDINLINE typename Euler< T_Type >::result euler(const T_Type& magnitude, const T_Type& phase) -{ - return Euler< T_Type > ()(magnitude, phase); -} - -template -HDINLINE typename Euler< T_Type >::result euler(const T_Type& magnitude, const T_Type& sinValue, - const T_Type& cosValue) -{ - return Euler< T_Type > ()(magnitude, sinValue, cosValue); -} - -template -struct Euler -{ - typedef typename ::pmacc::math::Complex result; - - HDINLINE result operator( )(const T_Type &magnitude, const T_Type &phase) - { - return result(magnitude * pmMath::cos(phase),magnitude * pmMath::sin(phase)); - } - - HDINLINE result operator( )(const T_Type &magnitude, - const T_Type &sinValue, const T_Type &cosValue) + namespace math { - return result(magnitude * cosValue, magnitude * sinValue); - } -}; + /* Set primary template and subsequent specialization for returning a complex number + by using Euler's formula. */ -/* Specialize sqrt() for complex numbers. */ + template + struct Euler; -template -struct Sqrt< ::pmacc::math::Complex > -{ - typedef typename ::pmacc::math::Complex result; - typedef T_Type type; - - HDINLINE result operator( )(const ::pmacc::math::Complex& other) - { - if (other.get_real()<=type(0.0) && other.get_imag()==type(0.0) ) { - return ::pmacc::math::Complex(type(0.0), pmMath::sqrt( -other.get_real() ) ); - } - else { - return pmMath::sqrt( pmMath::abs(other) )*(other+pmMath::abs(other)) - /pmMath::abs(other+pmMath::abs(other)); - } - } -}; - -/* Specialize exp() for complex numbers. */ - -template -struct Exp< ::pmacc::math::Complex > -{ - typedef typename ::pmacc::math::Complex result; - typedef T_Type type; - - HDINLINE result operator( )(const ::pmacc::math::Complex& other) - { - return pmMath::euler(type(1.0),other.get_imag())*pmMath::exp(other.get_real()); - } -}; - -/* Set primary template and subsequent specialization of arg() for retrieving - * the phase of a complex number (Note: Branchcut running from -infinity to 0). - */ -template -struct Arg; - -template -HDINLINE typename Arg< T_Type >::result arg(const T_Type& val) -{ - return Arg< T_Type > ()(val); -} - -template -struct Arg< ::pmacc::math::Complex > -{ - typedef typename ::pmacc::math::Complex::type result; - typedef T_Type type; - - HDINLINE result operator( )(const ::pmacc::math::Complex& other) - { - if ( other.get_real()==type(0.0) && other.get_imag()==type(0.0) ) - return type(0.0); - else if ( other.get_real()==type(0.0) && other.get_imag()>type(0.0) ) - return Pi< type >::halfValue; - else if ( other.get_real()==type(0.0) && other.get_imag()::halfValue; - else if ( other.get_real()::value; - else - return pmMath::atan2(other.get_imag(),other.get_real()); - } -}; - -/* Specialize pow() for complex numbers. */ -template -struct Pow< ::pmacc::math::Complex, T_Type > -{ - typedef typename ::pmacc::math::Complex result; - typedef T_Type type; - - HDINLINE result operator( )(const ::pmacc::math::Complex& other, - const T_Type& exponent) - { - return pmMath::pow( pmMath::abs(other),exponent ) - *pmMath::exp( ::pmacc::math::Complex(type(0.),type(1.) ) - *pmMath::arg(other)*exponent ); - } -}; - -/* Specialize abs() for complex numbers. */ -template -struct Abs< ::pmacc::math::Complex > -{ - typedef typename ::pmacc::math::Complex::type result; - - HDINLINE result operator( )(const ::pmacc::math::Complex& other) - { - return pmMath::sqrt( pmMath::abs2(other.get_real()) + pmMath::abs2(other.get_imag()) ); - } -}; - -/* Specialize abs2() for complex numbers. */ -template -struct Abs2< ::pmacc::math::Complex > -{ - typedef typename ::pmacc::math::Complex::type result; - - HDINLINE result operator( )(const ::pmacc::math::Complex& other) - { - return pmMath::abs2(other.get_real()) + pmMath::abs2(other.get_imag()); - } -}; - - /* Specialize log() for complex numbers. */ - template< typename T_Type > - struct Log< ::pmacc::math::Complex< T_Type > > - { - using type = T_Type; - using result = typename ::pmacc::math::Complex< type >::type; - - HDINLINE result operator( )( ::pmacc::math::Complex< T_Type > const & other ) + template + HDINLINE typename Euler::result euler(const T_Type& magnitude, const T_Type& phase) { - return pmMath::log( pmMath::abs( other ) ) + - ::pmacc::math::Complex< T_Type >( - type( 0. ), - type( 1. ) - ) * pmMath::arg( other ); + return Euler()(magnitude, phase); } - }; - - /* Specialize sin( ) for complex numbers. */ - template< typename T_Type > - struct Sin< ::pmacc::math::Complex< T_Type > > - { - using result = typename ::pmacc::math::Complex< T_Type >; - using type = T_Type; - HDINLINE result operator( )( const ::pmacc::math::Complex< T_Type > & other ) + template + HDINLINE typename Euler::result euler( + const T_Type& magnitude, + const T_Type& sinValue, + const T_Type& cosValue) { - return ( pmMath::exp( ::pmacc::math::Complex< T_Type >( type( 0. ), type( 1. ) ) * other ) - - pmMath::exp( ::pmacc::math::Complex< T_Type >( type( 0. ), type( -1. ) ) * other ) ) / - ::pmacc::math::Complex< T_Type >( type( 0. ), type( 2. ) ); + return Euler()(magnitude, sinValue, cosValue); } - }; - - /* Specialize cos( ) for complex numbers. */ - template< typename T_Type > - struct Cos< ::pmacc::math::Complex< T_Type > > - { - using result = typename ::pmacc::math::Complex< T_Type >; - using type = T_Type; - HDINLINE result operator( )( const ::pmacc::math::Complex< T_Type >& other ) + template + struct Euler + { + typedef typename ::pmacc::math::Complex result; + + HDINLINE result operator()(const T_Type& magnitude, const T_Type& phase) + { + return result(magnitude * cupla::math::cos(phase), magnitude * cupla::math::sin(phase)); + } + + HDINLINE result operator()(const T_Type& magnitude, const T_Type& sinValue, const T_Type& cosValue) + { + return result(magnitude * cosValue, magnitude * sinValue); + } + }; + + /* Set primary template and subsequent specialization of arg() for retrieving + * the phase of a complex number (Note: Branchcut running from -infinity to 0). + */ + template + struct Arg; + + template + HDINLINE typename Arg::result arg(const T_Type& val) { - return ( pmMath::exp( ::pmacc::math::Complex< T_Type >( type( 0. ), type( 1. ) ) * other ) + - pmMath::exp( ::pmacc::math::Complex< T_Type >( type( 0. ), type( -1. ) ) * other ) ) / - type( 2.0 ); + return Arg()(val); } - }; - -} //namespace math -} //namespace algorithms -} //namespace pmacc - -namespace pmacc -{ -namespace algorithms -{ -namespace precisionCast -{ -/* Specialize precisionCast-operators for complex numbers. */ + template + struct Arg<::pmacc::math::Complex> + { + typedef typename ::pmacc::math::Complex::type result; + typedef T_Type type; + + HDINLINE result operator()(const ::pmacc::math::Complex& other) + { + if(other.get_real() == type(0.0) && other.get_imag() == type(0.0)) + return type(0.0); + else if(other.get_real() == type(0.0) && other.get_imag() > type(0.0)) + return Pi::halfValue; + else if(other.get_real() == type(0.0) && other.get_imag() < type(0.0)) + return -Pi::halfValue; + else if(other.get_real() < type(0.0) && other.get_imag() == type(0.0)) + return Pi::value; + else + return cupla::math::atan2(other.get_imag(), other.get_real()); + } + }; + + /** Specialize abs2() for complex numbers. + * + * Note: Abs is specialized in alpaka::math below + */ + template + struct Abs2<::pmacc::math::Complex> + { + typedef typename ::pmacc::math::Complex::type result; -template -struct TypeCast > -{ - typedef const ::pmacc::math::Complex& result; + HDINLINE result operator()(const ::pmacc::math::Complex& other) + { + return pmacc::math::abs2(other.get_real()) + pmacc::math::abs2(other.get_imag()); + } + }; - HDINLINE result operator( )(const ::pmacc::math::Complex& complexNumber ) const - { - return complexNumber; - } -}; + } // namespace math +} // namespace pmacc -template -struct TypeCast > +namespace alpaka { - typedef ::pmacc::math::Complex result; - - HDINLINE result operator( )(const ::pmacc::math::Complex& complexNumber ) const + namespace math { - return result( complexNumber ); - } -}; + namespace traits + { + template + struct Pow, T_Type, void> + { + ALPAKA_FN_HOST_ACC static auto pow( + T_Ctx const& mathConcept, + ::pmacc::math::Complex const& other, + T_Type const& exponent) -> ::pmacc::math::Complex + { + return cupla::pow(cupla::math::abs(other), exponent) + * cupla::math::exp( + ::pmacc::math::Complex(T_Type(0.), T_Type(1.)) * pmacc::math::arg(other) + * exponent); + } + }; + + template + struct Sqrt, void> + { + ALPAKA_FN_HOST_ACC static auto sqrt( + T_Ctx const& mathConcept, + ::pmacc::math::Complex const& other) -> ::pmacc::math::Complex + { + using type = T_Type; + if(other.get_real() <= type(0.0) && other.get_imag() == type(0.0)) + { + return ::pmacc::math::Complex( + type(0.0), + alpaka::math::sqrt(mathConcept, -other.get_real())); + } + else + { + return alpaka::math::sqrt(mathConcept, cupla::math::abs(other)) + * (other + cupla::math::abs(other)) / cupla::math::abs(other + cupla::math::abs(other)); + } + } + }; + + template + struct Exp, void> + { + ALPAKA_FN_HOST_ACC static auto exp( + T_Ctx const& mathConcept, + ::pmacc::math::Complex const& other) -> ::pmacc::math::Complex + { + using type = T_Type; + return pmacc::math::euler(type(1.0), other.get_imag()) + * alpaka::math::exp(mathConcept, other.get_real()); + } + }; + + template + struct Abs, void> + { + ALPAKA_FN_HOST_ACC static auto abs( + T_Ctx const& mathConcept, + ::pmacc::math::Complex const& other) -> T_Type + { + /* It is not possible to use alpaka::math::sqrt( mathConcept, ... ) + * here, as the mathConcept would not match, so go around via cupla + */ + return cupla::math::sqrt(pmacc::math::abs2(other)); + } + }; + + template + struct Log, void> + { + ALPAKA_FN_HOST_ACC static auto log( + T_Ctx const& mathConcept, + ::pmacc::math::Complex const& other) -> ::pmacc::math::Complex + { + using type = T_Type; + return alpaka::math::log(mathConcept, cupla::math::abs(other)) + + ::pmacc::math::Complex(type(0.), type(1.)) * pmacc::math::arg(other); + } + }; + + template + struct Cos, void> + { + ALPAKA_FN_HOST_ACC static auto cos( + T_Ctx const& mathConcept, + ::pmacc::math::Complex const& other) -> ::pmacc::math::Complex + { + using type = T_Type; + return (alpaka::math::exp(mathConcept, ::pmacc::math::Complex(type(0.), type(1.)) * other) + + alpaka::math::exp( + mathConcept, + ::pmacc::math::Complex(type(0.), type(-1.)) * other)) + / type(2.0); + } + }; + + template + struct Sin, void> + { + ALPAKA_FN_HOST_ACC static auto sin( + T_Ctx const& mathConcept, + ::pmacc::math::Complex const& other) -> ::pmacc::math::Complex + { + using type = T_Type; + + return (alpaka::math::exp(mathConcept, ::pmacc::math::Complex(type(0.), type(1.)) * other) + - alpaka::math::exp( + mathConcept, + ::pmacc::math::Complex(type(0.), type(-1.)) * other)) + / ::pmacc::math::Complex(type(0.), type(2.)); + } + }; + } // namespace traits + } // namespace math +} // namespace alpaka -} //namespace typecast -} //namespace algorithms -namespace mpi +namespace pmacc { - - using complex_X = pmacc::math::Complex< picongpu::float_X >; - - // Specialize complex type grid buffer for MPI - template<> - MPI_StructAsArray getMPI_StructAsArray< pmacc::math::Complex >() + namespace algorithms { - MPI_StructAsArray result = getMPI_StructAsArray< complex_X::type > (); - result.sizeMultiplier *= uint32_t(sizeof(complex_X) / sizeof(typename complex_X::type)); - return result; - }; + namespace precisionCast + { + /* Specialize precisionCast-operators for complex numbers. */ + + template + struct TypeCast> + { + typedef const ::pmacc::math::Complex& result; + + HDINLINE result operator()(const ::pmacc::math::Complex& complexNumber) const + { + return complexNumber; + } + }; + + template + struct TypeCast> + { + typedef ::pmacc::math::Complex result; + + HDINLINE result operator()(const ::pmacc::math::Complex& complexNumber) const + { + return result(complexNumber); + } + }; + + } // namespace precisionCast + } // namespace algorithms + + namespace mpi + { + using complex_X = pmacc::math::Complex; + + // Specialize complex type grid buffer for MPI + template<> + MPI_StructAsArray getMPI_StructAsArray>() + { + MPI_StructAsArray result = getMPI_StructAsArray(); + result.sizeMultiplier *= uint32_t(sizeof(complex_X) / sizeof(typename complex_X::type)); + return result; + }; -} //namespace mpi -} //namespace pmacc + } // namespace mpi +} // namespace pmacc diff --git a/include/pmacc/math/vector/Float.hpp b/include/pmacc/math/vector/Float.hpp index 5e29beb0f4..b807091e6b 100644 --- a/include/pmacc/math/vector/Float.hpp +++ b/include/pmacc/math/vector/Float.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -25,46 +25,46 @@ namespace pmacc { -namespace math -{ - -template -struct Float : public Vector -{ - using BaseType = Vector; - - HDINLINE Float() + namespace math { - } + template + struct Float : public Vector + { + using BaseType = Vector; - HDINLINE Float(float x) : BaseType(x) - { - } + HDINLINE Float() + { + } - HDINLINE Float(float x, float y) : BaseType(x, y) - { - } + HDINLINE Float(float x) : BaseType(x) + { + } - HDINLINE Float(float x, float y, float z) : BaseType(x, y, z) - { - } + HDINLINE Float(float x, float y) : BaseType(x, y) + { + } - /*! only allow explicit cast*/ - template< - typename T_OtherType, - typename T_OtherAccessor, - typename T_OtherNavigator, - template class T_OtherStorage> - HDINLINE explicit Float(const Vector& vec) : - BaseType(vec) - { - } + HDINLINE Float(float x, float y, float z) : BaseType(x, y, z) + { + } - HDINLINE Float(const BaseType& vec) : - BaseType(vec) - { - } -}; + /*! only allow explicit cast*/ + template< + typename T_OtherType, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE explicit Float( + const Vector& vec) + : BaseType(vec) + { + } + + HDINLINE Float(const BaseType& vec) : BaseType(vec) + { + } + }; -} // math -} // PMacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/Int.hpp b/include/pmacc/math/vector/Int.hpp index dc502c333f..8b4aca725c 100644 --- a/include/pmacc/math/vector/Int.hpp +++ b/include/pmacc/math/vector/Int.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -25,46 +25,46 @@ namespace pmacc { -namespace math -{ - -template -struct Int : public Vector -{ - using BaseType = Vector; - - HDINLINE Int() + namespace math { - } + template + struct Int : public Vector + { + using BaseType = Vector; - HDINLINE Int(int x) : BaseType(x) - { - } + HDINLINE Int() + { + } - HDINLINE Int(int x, int y) : BaseType(x, y) - { - } + HDINLINE Int(int x) : BaseType(x) + { + } - HDINLINE Int(int x, int y, int z) : BaseType(x, y, z) - { - } + HDINLINE Int(int x, int y) : BaseType(x, y) + { + } - /*! only allow explicit cast*/ - template< - typename T_OtherType, - typename T_OtherAccessor, - typename T_OtherNavigator, - template class T_OtherStorage> - HDINLINE explicit Int(const Vector& vec) : - BaseType(vec) - { - } + HDINLINE Int(int x, int y, int z) : BaseType(x, y, z) + { + } - HDINLINE Int(const BaseType& vec) : - BaseType(vec) - { - } -}; + /*! only allow explicit cast*/ + template< + typename T_OtherType, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE explicit Int( + const Vector& vec) + : BaseType(vec) + { + } + + HDINLINE Int(const BaseType& vec) : BaseType(vec) + { + } + }; -} // math -} // PMacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/Size_t.hpp b/include/pmacc/math/vector/Size_t.hpp index c59872e5e5..9d5625cbf5 100644 --- a/include/pmacc/math/vector/Size_t.hpp +++ b/include/pmacc/math/vector/Size_t.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -25,46 +25,46 @@ namespace pmacc { -namespace math -{ - -template -struct Size_t : public Vector -{ - using BaseType = Vector; - - HDINLINE Size_t() + namespace math { - } + template + struct Size_t : public Vector + { + using BaseType = Vector; - HDINLINE Size_t(size_t x) : BaseType(x) - { - } + HDINLINE Size_t() + { + } - HDINLINE Size_t(size_t x, size_t y) : BaseType(x, y) - { - } + HDINLINE Size_t(size_t x) : BaseType(x) + { + } - HDINLINE Size_t(size_t x, size_t y, size_t z) : BaseType(x, y, z) - { - } + HDINLINE Size_t(size_t x, size_t y) : BaseType(x, y) + { + } - /*! only allow explicit cast*/ - template< - typename T_OtherType, - typename T_OtherAccessor, - typename T_OtherNavigator, - template class T_OtherStorage> - HDINLINE explicit Size_t(const Vector& vec) : - BaseType(vec) - { - } + HDINLINE Size_t(size_t x, size_t y, size_t z) : BaseType(x, y, z) + { + } - HDINLINE Size_t(const BaseType& vec) : - BaseType(vec) - { - } -}; + /*! only allow explicit cast*/ + template< + typename T_OtherType, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE explicit Size_t( + const Vector& vec) + : BaseType(vec) + { + } + + HDINLINE Size_t(const BaseType& vec) : BaseType(vec) + { + } + }; -} // math -} // PMacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/TwistComponents.hpp b/include/pmacc/math/vector/TwistComponents.hpp index 3dcb454d23..f471eb7275 100644 --- a/include/pmacc/math/vector/TwistComponents.hpp +++ b/include/pmacc/math/vector/TwistComponents.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -26,65 +26,56 @@ namespace pmacc { -namespace math -{ - -namespace result_of -{ - -template -struct TwistComponents -{ - using type = typename TwistComponents< - T_Axes, - typename T_Vector::This - >::type; -}; + namespace math + { + namespace result_of + { + template + struct TwistComponents + { + using type = typename TwistComponents::type; + }; -template class T_Storage> -struct TwistComponents > -{ - using type = math::Vector< - T_Type, - T_Dim, - T_Accessor, - math::StackedNavigator< - T_Navigator, - math::PermutedNavigator - >, - T_Storage - >&; -}; + template< + typename T_Axes, + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage> + struct TwistComponents> + { + using type = math::Vector< + T_Type, + T_Dim, + T_Accessor, + math::StackedNavigator>, + T_Storage>&; + }; -} // result_of + } // namespace result_of -/** Returns a reference of vector with twisted axes. - * - * The axes twist operation is done in place. This means that the result refers to the - * memory of the input vector. The input vector's navigator policy is replaced by - * a new navigator which merely consists of the old navigator plus a twisting navigator. - * This new navigator does not use any real memory. - * - * \tparam T_Axes Mapped indices - * \tparam T_Vector type of vector to be twisted - * \param vector vector to be twisted - * \return reference of the input vector with twisted axes. - */ -template -HDINLINE -typename result_of::TwistComponents::type -twistComponents(T_Vector& vector) -{ - /* The reinterpret_cast is valid because the target type is the same as the - * input type except its navigator policy which does not occupy any memory though. - */ - return reinterpret_cast::type>(vector); -} + /** Returns a reference of vector with twisted axes. + * + * The axes twist operation is done in place. This means that the result refers to the + * memory of the input vector. The input vector's navigator policy is replaced by + * a new navigator which merely consists of the old navigator plus a twisting navigator. + * This new navigator does not use any real memory. + * + * \tparam T_Axes Mapped indices + * \tparam T_Vector type of vector to be twisted + * \param vector vector to be twisted + * \return reference of the input vector with twisted axes. + */ + template + HDINLINE typename result_of::TwistComponents::type twistComponents(T_Vector& vector) + { + /* The reinterpret_cast is valid because the target type is the same as the + * input type except its navigator policy which does not occupy any memory though. + */ + return reinterpret_cast::type>(vector); + } -} // math -} // PMacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/UInt32.hpp b/include/pmacc/math/vector/UInt32.hpp index b224a00497..5b333d9e8b 100644 --- a/include/pmacc/math/vector/UInt32.hpp +++ b/include/pmacc/math/vector/UInt32.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -25,46 +25,46 @@ namespace pmacc { -namespace math -{ - -template -struct UInt32 : public Vector -{ - using BaseType = Vector; - - HDINLINE UInt32() + namespace math { - } + template + struct UInt32 : public Vector + { + using BaseType = Vector; - HDINLINE UInt32(uint32_t x) : BaseType(x) - { - } + HDINLINE UInt32() + { + } - HDINLINE UInt32(uint32_t x, uint32_t y) : BaseType(x, y) - { - } + HDINLINE UInt32(uint32_t x) : BaseType(x) + { + } - HDINLINE UInt32(uint32_t x, uint32_t y, uint32_t z) : BaseType(x, y, z) - { - } + HDINLINE UInt32(uint32_t x, uint32_t y) : BaseType(x, y) + { + } - /*! only allow explicit cast*/ - template< - typename T_OtherType, - typename T_OtherAccessor, - typename T_OtherNavigator, - template class T_OtherStorage> - HDINLINE explicit UInt32(const Vector& vec) : - BaseType(vec) - { - } + HDINLINE UInt32(uint32_t x, uint32_t y, uint32_t z) : BaseType(x, y, z) + { + } - HDINLINE UInt32(const BaseType& vec) : - BaseType(vec) - { - } -}; + /*! only allow explicit cast*/ + template< + typename T_OtherType, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE explicit UInt32( + const Vector& vec) + : BaseType(vec) + { + } + + HDINLINE UInt32(const BaseType& vec) : BaseType(vec) + { + } + }; -} // math -} // PMacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/UInt64.hpp b/include/pmacc/math/vector/UInt64.hpp index fff31a4168..31e7543353 100644 --- a/include/pmacc/math/vector/UInt64.hpp +++ b/include/pmacc/math/vector/UInt64.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl * * This file is part of PMacc. * @@ -25,46 +25,46 @@ namespace pmacc { -namespace math -{ - -template -struct UInt64 : public Vector -{ - using BaseType = Vector; - - HDINLINE UInt64() + namespace math { - } + template + struct UInt64 : public Vector + { + using BaseType = Vector; - HDINLINE UInt64(uint64_t x) : BaseType(x) - { - } + HDINLINE UInt64() + { + } - HDINLINE UInt64(uint64_t x, uint64_t y) : BaseType(x, y) - { - } + HDINLINE UInt64(uint64_t x) : BaseType(x) + { + } - HDINLINE UInt64(uint64_t x, uint64_t y, uint64_t z) : BaseType(x, y, z) - { - } + HDINLINE UInt64(uint64_t x, uint64_t y) : BaseType(x, y) + { + } - /*! only allow explicit cast*/ - template< - typename T_OtherType, - typename T_OtherAccessor, - typename T_OtherNavigator, - template class T_OtherStorage> - HDINLINE explicit UInt64(const Vector& vec) : - BaseType(vec) - { - } + HDINLINE UInt64(uint64_t x, uint64_t y, uint64_t z) : BaseType(x, y, z) + { + } - HDINLINE UInt64(const BaseType& vec) : - BaseType(vec) - { - } -}; + /*! only allow explicit cast*/ + template< + typename T_OtherType, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE explicit UInt64( + const Vector& vec) + : BaseType(vec) + { + } + + HDINLINE UInt64(const BaseType& vec) : BaseType(vec) + { + } + }; -} // math -} // PMacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/Vector.hpp b/include/pmacc/math/vector/Vector.hpp index 0f816d34eb..115ee23b5b 100644 --- a/include/pmacc/math/vector/Vector.hpp +++ b/include/pmacc/math/vector/Vector.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz, * Alexander Grund, Axel Huebl * * This file is part of PMacc. @@ -36,830 +36,822 @@ namespace pmacc { -namespace math -{ -namespace detail -{ - -template -struct Vector_components -{ - static constexpr bool isConst = false; - static constexpr int dim = T_Dim; - using type = T_Type; - - /*align full vector*/ - PMACC_ALIGN(v[dim], type); - - HDINLINE - type& operator[](const int idx) - { - return v[idx]; - } - - HDINLINE - const type& operator[](const int idx) const - { - return v[idx]; - } -}; - - -/** functor to copy a object element-wise - * - * @tparam isDestConst define if destination is const (not copyable) object - */ -template -struct CopyElementWise -{ - /** copy object element-wise - * - * @tparam T_Dest destination object type - * @tparam T_Src source object type - */ - template - HDINLINE void operator()(T_Dest& dest,const T_Src& src) const - { - PMACC_CASSERT_MSG(CopyElementWise_destination_and_source_had_different_dimension, - T_Dest::dim == T_Src::dim); - for (int d = 0; d < T_Dest::dim; d++) - dest[d] = src[d]; - } -}; - -/** specialization for constant destination - * - * the constant storage is already available and set in the destination - */ -template<> -struct CopyElementWise -{ - template - HDINLINE void operator()(T_Dest& dest,const T_Src& src) const - { - } -}; - -} //namespace detail - -namespace tag -{ - struct Vector; -} - -template class T_Storage = detail::Vector_components> -struct Vector : private T_Storage, protected T_Accessor, protected T_Navigator -{ - using Storage = T_Storage; - using type = typename Storage::type; - static constexpr int dim = Storage::dim; - using tag = tag::Vector; - using Accessor = T_Accessor; - using Navigator = T_Navigator; - using This = Vector; - using ParamType = typename boost::call_traits::param_type; - - /*Vectors without elements are not allowed*/ - PMACC_CASSERT_MSG(math_Vector__with_DIM_0_is_not_allowed,dim > 0); - - template struct result; - - template - struct result < F(T)> - { - using type = typename F::type&; - }; - - template - struct result < const F(T)> - { - using type = const typename F::type&; - }; - - HDINLINE Vector() - {} - - HDINLINE - Vector(const type x) - { - PMACC_CASSERT_MSG(math_Vector__constructor_is_only_allowed_for_DIM1,dim == 1); - (*this)[0] = x; - } - - HDINLINE - Vector(const type x, const type y) - { - PMACC_CASSERT_MSG(math_Vector__constructor_is_only_allowed_for_DIM2,dim == 2); - (*this)[0] = x; - (*this)[1] = y; - } - - HDINLINE - Vector(const type x, const type y, const type z) - { - PMACC_CASSERT_MSG(math_Vector__constructor_is_only_allowed_for_DIM3,dim == 3); - (*this)[0] = x; - (*this)[1] = y; - (*this)[2] = z; - } - - HDINLINE Vector(const This& other) - { - detail::CopyElementWise()(*this,other); - } - - template< - typename T_OtherType, - typename T_OtherAccessor, - typename T_OtherNavigator, - template class T_OtherStorage> - HDINLINE explicit Vector(const Vector< - T_OtherType, - dim, - T_OtherAccessor, - T_OtherNavigator, - T_OtherStorage - >& - other) - { - for (int i = 0; i < dim; i++) - (*this)[i] = static_cast (other[i]); - } - - /** Allow static_cast / explicit cast to member type for 1D vector */ - template< - int T_deferDim = T_dim, - typename = typename std::enable_if< T_deferDim == 1 >::type - > - HDINLINE - explicit - operator type() + namespace math { - return (*this)[0]; - } - - /** - * Creates a Vector where all dimensions are set to the same value - * - * @param value Value which is set for all dimensions - * @return new Vector<...> - */ - HDINLINE - static This create(ParamType value) - { - This result; - for (int i = 0; i < dim; i++) - result[i] = value; - - return result; - } - - HDINLINE const This& toRT() const - { - return *this; - } - - HDINLINE This& toRT() - { - return *this; - } - - HDINLINE This revert() - { - This invertedVector; - for (int i = 0; i < dim; i++) - invertedVector[dim-1-i] = (*this)[i]; - - return invertedVector; - } - - template< - typename T_OtherAccessor, - typename T_OtherNavigator, - template class T_OtherStorage> - HDINLINE This& - operator=(const Vector& rhs) - { - for (int i = 0; i < dim; i++) - (*this)[i] = rhs[i]; - return *this; - } - - HDINLINE - type& operator[](const int idx) - { - return Accessor::operator()(Storage::operator[](Navigator::operator()(idx))); - } - - HDINLINE - const type& operator[](const int idx) const - { - return Accessor::operator()(Storage::operator[](Navigator::operator()(idx))); - } - - HDINLINE type & x() - { - return (*this)[0]; - } - - HDINLINE type & y() - { - PMACC_CASSERT_MSG(math_Vector__access_to_y_is_not_allowed_for_DIM_lesser_than_2,dim >= 2); - return (*this)[1]; - } - - HDINLINE type & z() - { - PMACC_CASSERT_MSG(math_Vector__access_to_z_is_not_allowed_for_DIM_lesser_than_3,dim >= 3); - return (*this)[2]; - } - - HDINLINE const type & x() const - { - return (*this)[0]; - } - - HDINLINE const type & y() const - { - PMACC_CASSERT_MSG(math_Vector__access_to_y_is_not_allowed_for_DIM_lesser_than_2,dim >= 2); - return (*this)[1]; - } + namespace detail + { + template + struct Vector_components + { + static constexpr bool isConst = false; + static constexpr int dim = T_Dim; + using type = T_Type; + + HDINLINE + constexpr Vector_components() + { + } + + constexpr Vector_components& operator=(const Vector_components&) = default; + + /*align full vector*/ + PMACC_ALIGN(v[dim], type); + + HDINLINE + type& operator[](const int idx) + { + return v[idx]; + } + + HDINLINE + const type& operator[](const int idx) const + { + return v[idx]; + } + }; + + + /** functor to copy a object element-wise + * + * @tparam isDestConst define if destination is const (not copyable) object + */ + template + struct CopyElementWise + { + /** copy object element-wise + * + * @tparam T_Dest destination object type + * @tparam T_Src source object type + */ + template + HDINLINE void operator()(T_Dest& dest, const T_Src& src) const + { + PMACC_CASSERT_MSG( + CopyElementWise_destination_and_source_had_different_dimension, + T_Dest::dim == T_Src::dim); + for(int d = 0; d < T_Dest::dim; d++) + dest[d] = src[d]; + } + }; + + /** specialization for constant destination + * + * the constant storage is already available and set in the destination + */ + template<> + struct CopyElementWise + { + template + HDINLINE void operator()(T_Dest& dest, const T_Src& src) const + { + } + }; + + } // namespace detail + + namespace tag + { + struct Vector; + } - HDINLINE const type & z() const - { - PMACC_CASSERT_MSG(math_Vector__access_to_z_is_not_allowed_for_DIM_lesser_than_3,dim >= 3); - return (*this)[2]; - } + template< + typename T_Type, + int T_dim, + typename T_Accessor = StandardAccessor, + typename T_Navigator = StandardNavigator, + template class T_Storage = detail::Vector_components> + struct Vector + : private T_Storage + , protected T_Accessor + , protected T_Navigator + { + using Storage = T_Storage; + using type = typename Storage::type; + static constexpr int dim = Storage::dim; + using tag = tag::Vector; + using Accessor = T_Accessor; + using Navigator = T_Navigator; + using This = Vector; + using ParamType = typename boost::call_traits::param_type; + + /*Vectors without elements are not allowed*/ + PMACC_CASSERT_MSG(math_Vector__with_DIM_0_is_not_allowed, dim > 0); + + template + struct result; + + template + struct result + { + using type = typename F::type&; + }; + + template + struct result + { + using type = const typename F::type&; + }; + + HDINLINE + constexpr Vector() + { + } + + HDINLINE + constexpr Vector(const type x) + { + PMACC_CASSERT_MSG(math_Vector__constructor_is_only_allowed_for_DIM1, dim == 1); + (*this)[0] = x; + } + + HDINLINE + constexpr Vector(const type x, const type y) + { + PMACC_CASSERT_MSG(math_Vector__constructor_is_only_allowed_for_DIM2, dim == 2); + (*this)[0] = x; + (*this)[1] = y; + } + + HDINLINE + constexpr Vector(const type x, const type y, const type z) + { + PMACC_CASSERT_MSG(math_Vector__constructor_is_only_allowed_for_DIM3, dim == 3); + (*this)[0] = x; + (*this)[1] = y; + (*this)[2] = z; + } + + HDINLINE + constexpr Vector(const This& other) + { + detail::CopyElementWise()(*this, other); + } + + template< + typename T_OtherType, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE explicit Vector( + const Vector& other) + { + for(int i = 0; i < dim; i++) + (*this)[i] = static_cast(other[i]); + } + + /** Allow static_cast / explicit cast to member type for 1D vector */ + template::type> + HDINLINE explicit operator type() + { + return (*this)[0]; + } + + /** + * Creates a Vector where all dimensions are set to the same value + * + * @param value Value which is set for all dimensions + * @return new Vector<...> + */ + HDINLINE + static This create(ParamType value) + { + This result; + for(int i = 0; i < dim; i++) + result[i] = value; + + return result; + } + + HDINLINE const This& toRT() const + { + return *this; + } + + HDINLINE This& toRT() + { + return *this; + } + + HDINLINE This revert() + { + This invertedVector; + for(int i = 0; i < dim; i++) + invertedVector[dim - 1 - i] = (*this)[i]; + + return invertedVector; + } + + constexpr HDINLINE Vector& operator=(const Vector&) = default; + + template class T_OtherStorage> + HDINLINE This& operator=(const Vector& rhs) + { + for(int i = 0; i < dim; i++) + (*this)[i] = rhs[i]; + return *this; + } + + HDINLINE + type& operator[](const int idx) + { + return Accessor::operator()(Storage::operator[](Navigator::operator()(idx))); + } + + HDINLINE + const type& operator[](const int idx) const + { + return Accessor::operator()(Storage::operator[](Navigator::operator()(idx))); + } + + HDINLINE type& x() + { + return (*this)[0]; + } + + HDINLINE type& y() + { + PMACC_CASSERT_MSG(math_Vector__access_to_y_is_not_allowed_for_DIM_lesser_than_2, dim >= 2); + return (*this)[1]; + } + + HDINLINE type& z() + { + PMACC_CASSERT_MSG(math_Vector__access_to_z_is_not_allowed_for_DIM_lesser_than_3, dim >= 3); + return (*this)[2]; + } + + HDINLINE const type& x() const + { + return (*this)[0]; + } + + HDINLINE const type& y() const + { + PMACC_CASSERT_MSG(math_Vector__access_to_y_is_not_allowed_for_DIM_lesser_than_2, dim >= 2); + return (*this)[1]; + } + + HDINLINE const type& z() const + { + PMACC_CASSERT_MSG(math_Vector__access_to_z_is_not_allowed_for_DIM_lesser_than_3, dim >= 3); + return (*this)[2]; + } + + template + HDINLINE Vector shrink(const int startIdx = 0) const + { + PMACC_CASSERT_MSG( + math_Vector__shrinkedDim_DIM_must_be_lesser_or_equal_to_Vector_DIM, + shrinkedDim <= dim); + Vector result; + for(int i = 0; i < shrinkedDim; i++) + result[i] = (*this)[(startIdx + i) % dim]; + return result; + } + + /** Removes a component + * + * It is not allowed to call this method on a vector with the dimensionality of one. + * + * @tparam dimToRemove index which shall be removed; range: [ 0; dim - 1 ] + * @return vector with `dim - 1` elements + */ + template + HDINLINE Vector remove() const + { + PMACC_CASSERT_MSG(__math_Vector__dim_must_be_greater_than_1__, dim > 1); + PMACC_CASSERT_MSG(__math_Vector__dimToRemove_must_be_lesser_than_dim__, dimToRemove < dim); + Vector result; + for(int i = 0; i < dim - 1; ++i) + { + // skip component which must be deleted + const int sourceIdx = i >= dimToRemove ? i + 1 : i; + result[i] = (*this)[sourceIdx]; + } + return result; + } + + /** Returns product of all components. + * + * @return product of components + */ + HDINLINE type productOfComponents() const + { + type result = (*this)[0]; + for(int i = 1; i < dim; i++) + result *= (*this)[i]; + return result; + } + + /** Returns sum of all components. + * + * @return sum of components + */ + HDINLINE type sumOfComponents() const + { + type result = (*this)[0]; + for(int i = 1; i < dim; i++) + result += (*this)[i]; + return result; + } + + /*! += operator + * @param other instance with same type and dimension like the left instance + * @return reference to manipulated left instance + */ + template class T_OtherStorage> + HDINLINE This& operator+=( + const Vector& other) + { + for(int i = 0; i < dim; i++) + (*this)[i] += other[i]; + return *this; + } + + /*! -= operator + * @param other instance with same type and dimension like the left instance + * @return reference to manipulated left instance + */ + template class T_OtherStorage> + HDINLINE This& operator-=( + const Vector& other) + { + for(int i = 0; i < dim; i++) + (*this)[i] -= other[i]; + return *this; + } + + /*! *= operator + * @param other instance with same type and dimension like the left instance + * @return reference to manipulated left instance + */ + template class T_OtherStorage> + HDINLINE This& operator*=( + const Vector& other) + { + for(int i = 0; i < dim; i++) + (*this)[i] *= other[i]; + return *this; + } + + /*! /= operator + * @param other instance with same type and dimension like the left instance + * @return reference to manipulated left instance + */ + template class T_OtherStorage> + HDINLINE This& operator/=( + const Vector& other) + { + for(int i = 0; i < dim; i++) + (*this)[i] /= other[i]; + return *this; + } + + HDINLINE This& operator+=(ParamType other) + { + for(int i = 0; i < dim; i++) + (*this)[i] += other; + return *this; + } + + HDINLINE This& operator-=(ParamType other) + { + for(int i = 0; i < dim; i++) + (*this)[i] -= other; + return *this; + } + + HDINLINE This& operator*=(ParamType other) + { + for(int i = 0; i < dim; i++) + (*this)[i] *= other; + return *this; + } + + HDINLINE This& operator/=(ParamType other) + { + for(int i = 0; i < dim; i++) + (*this)[i] /= other; + return *this; + } + + /** + * == comparison operator. + * + * Compares sizes of two DataSpaces. + * + * @param other Vector to compare to + * @return true if all components in both vectors are equal, else false + */ + HDINLINE bool operator==(const This& rhs) const + { + bool result = true; + for(int i = 0; i < dim; i++) + result = result && ((*this)[i] == rhs[i]); + return result; + } + + /** + * != comparison operator. + * + * Compares sizes of two DataSpaces. + * + * @param other Vector to compare to + * @return true if one component in both vectors are not equal, else false + */ + HDINLINE bool operator!=(const This& rhs) const + { + return !((*this) == rhs); + } + + /** create string out of the vector + * + * @param separator string to separate components of the vector + * @param enclosings string with size 2 to enclose vector + * size == 0 ? no enclose symbols + * size == 1 ? means enclose symbol begin and end are equal + * size >= 2 ? letter[0] = begin enclose symbol + * letter[1] = end enclose symbol + * + * example: + * .toString(";","|") -> |x;...;z| + * .toString(",","[]") -> [x,...,z] + */ + std::string toString(const std::string separator = ",", const std::string enclosings = "{}") const + { + std::string locale_enclosing_begin; + std::string locale_enclosing_end; + size_t enclosing_size = enclosings.size(); + + if(enclosing_size > 0) + { + /* % avoid out of memory access */ + locale_enclosing_begin = enclosings[0 % enclosing_size]; + locale_enclosing_end = enclosings[1 % enclosing_size]; + } + + std::stringstream stream; + stream << locale_enclosing_begin << (*this)[0]; + + for(int i = 1; i < dim; ++i) + stream << separator << (*this)[i]; + stream << locale_enclosing_end; + return stream.str(); + } + + HDINLINE cupla::dim3 toDim3() const + { + cupla::dim3 result; + unsigned int* ptr = &result.x; + for(int d = 0; d < dim; ++d) + ptr[d] = (*this)[d]; + return result; + } + }; + + template + struct Vector + { + using type = Type; + static constexpr int dim = 0; + + template + HDINLINE operator Vector() const + { + return Vector(); + } + + /** + * == comparison operator. + * + * Returns always true + */ + HDINLINE bool operator==(const Vector& rhs) const + { + return true; + } + + /** + * != comparison operator. + * + * Returns always false + */ + HDINLINE bool operator!=(const Vector& rhs) const + { + return false; + } + + HDINLINE + static Vector create(Type) + { + /* this method should never be actually called, + * it exists only for Visual Studio to handle pmacc::math::Size_t< 0 > + */ + PMACC_CASSERT_MSG(Vector_dim_0_create_cannot_be_called, sizeof(Type) != 0 && false); + } + }; + + template + std::ostream& operator<<(std::ostream& s, const Vector& vec) + { + return s << vec.toString(); + } - template - HDINLINE Vector shrink(const int startIdx = 0) const - { - PMACC_CASSERT_MSG(math_Vector__shrinkedDim_DIM_must_be_lesser_or_equal_to_Vector_DIM,shrinkedDim <= dim); - Vector result; - for (int i = 0; i < shrinkedDim; i++) - result[i] = (*this)[(startIdx + i) % dim]; - return result; - } - - /** Removes a component - * - * It is not allowed to call this method on a vector with the dimensionality of one. - * - * @tparam dimToRemove index which shall be removed; range: [ 0; dim - 1 ] - * @return vector with `dim - 1` elements - */ - template - HDINLINE Vector remove() const - { - PMACC_CASSERT_MSG(__math_Vector__dim_must_be_greater_than_1__, dim > 1); - PMACC_CASSERT_MSG(__math_Vector__dimToRemove_must_be_lesser_than_dim__, dimToRemove < dim); - Vector result; - for (int i = 0; i < dim - 1; ++i) + template< + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE Vector operator+( + const Vector& lhs, + const Vector& rhs) { - // skip component which must be deleted - const int sourceIdx = i >= dimToRemove ? i + 1 : i; - result[i] = (*this)[sourceIdx]; + /* to avoid allocation side effects the result is always a vector + * with default policies*/ + Vector result(lhs); + result += rhs; + return result; } - return result; - } - - /** Returns product of all components. - * - * @return product of components - */ - HDINLINE type productOfComponents() const - { - type result = (*this)[0]; - for (int i = 1; i < dim; i++) - result *= (*this)[i]; - return result; - } - - /** Returns sum of all components. - * - * @return sum of components - */ - HDINLINE type sumOfComponents() const - { - type result = (*this)[0]; - for (int i = 1; i < dim; i++) - result += (*this)[i]; - return result; - } - - /*! += operator - * @param other instance with same type and dimension like the left instance - * @return reference to manipulated left instance - */ - template< - typename T_OtherAccessor, - typename T_OtherNavigator, - template class T_OtherStorage> - HDINLINE This& - operator+=(const Vector< - type, dim, - T_OtherAccessor, T_OtherNavigator, T_OtherStorage>& - other) - { - for (int i = 0; i < dim; i++) - (*this)[i] += other[i]; - return *this; - } - - /*! -= operator - * @param other instance with same type and dimension like the left instance - * @return reference to manipulated left instance - */ - template< - typename T_OtherAccessor, - typename T_OtherNavigator, - template class T_OtherStorage> - HDINLINE This& - operator-=(const Vector< - type, dim, - T_OtherAccessor, T_OtherNavigator, T_OtherStorage>& - other) - { - for (int i = 0; i < dim; i++) - (*this)[i] -= other[i]; - return *this; - } - - /*! *= operator - * @param other instance with same type and dimension like the left instance - * @return reference to manipulated left instance - */ - template< - typename T_OtherAccessor, - typename T_OtherNavigator, - template class T_OtherStorage> - HDINLINE This& - operator*=(const Vector< - type, dim, - T_OtherAccessor, T_OtherNavigator, T_OtherStorage>& - other) - { - for (int i = 0; i < dim; i++) - (*this)[i] *= other[i]; - return *this; - } - - /*! /= operator - * @param other instance with same type and dimension like the left instance - * @return reference to manipulated left instance - */ - template< - typename T_OtherAccessor, - typename T_OtherNavigator, - template class T_OtherStorage> - HDINLINE This& - operator/=(const Vector< - type, dim, - T_OtherAccessor, T_OtherNavigator, T_OtherStorage>& - other) - { + template< + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage> + HDINLINE Vector operator+( + const Vector& lhs, + typename Vector::ParamType rhs) + { + /* to avoid allocation side effects the result is always a vector + * with default policies*/ + Vector result(lhs); + result += rhs; + return result; + } - for (int i = 0; i < dim; i++) - (*this)[i] /= other[i]; - return *this; - } + template< + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE Vector operator-( + const Vector& lhs, + const Vector& rhs) + { + /* to avoid allocation side effects the result is always a vector + * with default policies*/ + Vector result(lhs); + result -= rhs; + return result; + } - HDINLINE This& operator+=(ParamType other) - { + template< + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage> + HDINLINE Vector operator-( + const Vector& lhs, + typename Vector::ParamType rhs) + { + /* to avoid allocation side effects the result is always a vector + * with default policies*/ + Vector result(lhs); + result -= rhs; + return result; + } - for (int i = 0; i < dim; i++) - (*this)[i] += other; - return *this; - } + template< + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE Vector operator*( + const Vector& lhs, + const Vector& rhs) + { + /* to avoid allocation side effects the result is always a vector + * with default policies*/ + Vector result(lhs); + result *= rhs; + return result; + } - HDINLINE This& operator-=(ParamType other) - { + template< + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE Vector operator/( + const Vector& lhs, + const Vector& rhs) + { + /* to avoid allocation side effects the result is always a vector + * with default policies*/ + Vector result(lhs); + result /= rhs; + return result; + } - for (int i = 0; i < dim; i++) - (*this)[i] -= other; - return *this; - } + template< + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage> + HDINLINE Vector operator*( + const Vector& lhs, + typename Vector::ParamType rhs) + { + /* to avoid allocation side effects the result is always a vector + * with default policies*/ + Vector result(lhs); + result *= rhs; + return result; + } - HDINLINE This& operator*=(ParamType other) - { + template< + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage> + HDINLINE Vector operator*( + typename boost::call_traits::param_type lhs, + const Vector& rhs) + { + /* to avoid allocation side effects the result is always a vector + * with default policies*/ + Vector result(rhs); + result *= lhs; + return result; + } - for (int i = 0; i < dim; i++) - (*this)[i] *= other; - return *this; - } + template< + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage> + HDINLINE Vector operator/( + const Vector& lhs, + typename Vector::ParamType rhs) + { + /* to avoid allocation side effects the result is always a vector + * with default policies*/ + Vector result(lhs); + result /= rhs; + return result; + } - HDINLINE This& operator/=(ParamType other) - { + template< + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage> + HDINLINE Vector operator-(const Vector& vec) + { + /* to avoid allocation side effects the result is always a vector + * with default policies*/ + Vector result(vec); - for (int i = 0; i < dim; i++) - (*this)[i] /= other; - return *this; - } - - /** - * == comparison operator. - * - * Compares sizes of two DataSpaces. - * - * @param other Vector to compare to - * @return true if all components in both vectors are equal, else false - */ - HDINLINE bool operator==(const This& rhs) const - { - bool result = true; - for (int i = 0; i < dim; i++) - result = result && ((*this)[i] == rhs[i]); - return result; - } - - /** - * != comparison operator. - * - * Compares sizes of two DataSpaces. - * - * @param other Vector to compare to - * @return true if one component in both vectors are not equal, else false - */ - HDINLINE bool operator!=(const This& rhs) const - { + for(int i = 0; i < T_Dim; i++) + result[i] = -result[i]; + return result; + } - return !((*this) == rhs); - } - - /** create string out of the vector - * - * @param separator string to separate components of the vector - * @param enclosings string with size 2 to enclose vector - * size == 0 ? no enclose symbols - * size == 1 ? means enclose symbol begin and end are equal - * size >= 2 ? letter[0] = begin enclose symbol - * letter[1] = end enclose symbol - * - * example: - * .toString(";","|") -> |x;...;z| - * .toString(",","[]") -> [x,...,z] - */ - std::string toString(const std::string separator = ",", const std::string enclosings = "{}") const - { - std::string locale_enclosing_begin; - std::string locale_enclosing_end; - size_t enclosing_size=enclosings.size(); + template< + typename T_Type, + int T_Dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE Vector operator>=( + const Vector& lhs, + const Vector& rhs) + { + /* to avoid allocation side effects the result is always a vector + * with default policies*/ + Vector result; + for(int i = 0; i < T_Dim; ++i) + result[i] = (lhs[i] >= rhs[i]); + return result; + } - if(enclosing_size > 0) + template< + typename T_Type, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE T_Type linearize( + const Vector& size, + const Vector& pos) { - /* % avoid out of memory access */ - locale_enclosing_begin=enclosings[0%enclosing_size]; - locale_enclosing_end=enclosings[1%enclosing_size]; + return pos.y() * size.x() + pos.x(); } - std::stringstream stream; - stream << locale_enclosing_begin << (*this)[0]; + template< + typename T_Type, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage, + typename T_OtherAccessor, + typename T_OtherNavigator, + template + class T_OtherStorage> + HDINLINE T_Type linearize( + const Vector& size, + const Vector& pos) + { + return pos.z() * size.x() * size.y() + pos.y() * size.x() + pos.x(); + } - for (int i = 1; i < dim; ++i) - stream << separator << (*this)[i]; - stream << locale_enclosing_end; - return stream.str(); - } - HDINLINE dim3 toDim3() const - { - dim3 result; - unsigned int* ptr = &result.x; - for (int d = 0; d < dim; ++d) - ptr[d] = (*this)[d]; - return result; - } -}; - -template -struct Vector -{ - using type = Type; - static constexpr int dim = 0; + template + HDINLINE Lhs operator%(const Lhs& lhs, const Rhs& rhs) + { + Lhs result; - template - HDINLINE operator Vector () const - { - return Vector (); - } - - /** - * == comparison operator. - * - * Returns always true - */ - HDINLINE bool operator==(const Vector& rhs) const - { - return true; - } - - /** - * != comparison operator. - * - * Returns always false - */ - HDINLINE bool operator!=(const Vector& rhs) const - { - return false; - } + for(int i = 0; i < Lhs::dim; i++) + result[i] = lhs[i] % rhs[i]; + return result; + } - HDINLINE - static Vector create(Type) - { - /* this method should never be actually called, - * it exists only for Visual Studio to handle pmacc::math::Size_t< 0 > + struct Abs + { + template + HDINLINE Type operator()(const Vector& vec) + { + return cupla::math::abs(vec); + } + }; + + /** Get the unit basis vector of the given type along the given direction + * + * In case 0 <= T_direction < T_Vector::dim, return the basis vector with value + * 1 in component T_direction and 0 in other components, otherwise return the + * zero vector. + * + * @tparam T_Vector result type + * @tparam T_direction index of the basis vector direction */ - PMACC_CASSERT_MSG(Vector_dim_0_create_cannot_be_called, false); - } -}; - -template -std::ostream& operator<<(std::ostream& s, const Vector& vec) -{ - - return s << vec.toString(); -} - -template class T_Storage, -typename T_OtherAccessor, -typename T_OtherNavigator, -template class T_OtherStorage -> -HDINLINE Vector -operator+(const Vector& lhs, - const Vector& rhs) -{ - /* to avoid allocation side effects the result is always a vector - * with default policies*/ - Vector result(lhs); - result += rhs; - return result; -} - -template class T_Storage -> -HDINLINE Vector -operator+(const Vector& lhs, - typename Vector::ParamType rhs) -{ - /* to avoid allocation side effects the result is always a vector - * with default policies*/ - Vector result(lhs); - result += rhs; - return result; -} - -template class T_Storage, -typename T_OtherAccessor, -typename T_OtherNavigator, -template class T_OtherStorage -> -HDINLINE Vector -operator-(const Vector& lhs, - const Vector& rhs) -{ - /* to avoid allocation side effects the result is always a vector - * with default policies*/ - Vector result(lhs); - result -= rhs; - return result; -} - -template class T_Storage -> -HDINLINE Vector -operator-(const Vector& lhs, - typename Vector::ParamType rhs) -{ - /* to avoid allocation side effects the result is always a vector - * with default policies*/ - Vector result(lhs); - result -= rhs; - return result; -} - -template class T_Storage, -typename T_OtherAccessor, -typename T_OtherNavigator, -template class T_OtherStorage -> -HDINLINE Vector -operator*(const Vector& lhs, - const Vector& rhs) -{ - /* to avoid allocation side effects the result is always a vector - * with default policies*/ - Vector result(lhs); - result *= rhs; - return result; -} - -template< -typename T_Type, int T_Dim, -typename T_Accessor, -typename T_Navigator, -template class T_Storage, -typename T_OtherAccessor, -typename T_OtherNavigator, -template class T_OtherStorage -> -HDINLINE Vector -operator/(const Vector& lhs, - const Vector& rhs) -{ - /* to avoid allocation side effects the result is always a vector - * with default policies*/ - Vector result(lhs); - result /= rhs; - return result; -} - -template< -typename T_Type, int T_Dim, -typename T_Accessor, -typename T_Navigator, -template class T_Storage -> -HDINLINE Vector -operator*(const Vector& lhs, - typename Vector::ParamType rhs) -{ - /* to avoid allocation side effects the result is always a vector - * with default policies*/ - Vector result(lhs); - result *= rhs; - return result; -} - -template< -typename T_Type, int T_Dim, -typename T_Accessor, -typename T_Navigator, -template class T_Storage -> -HDINLINE Vector -operator*(typename boost::call_traits::param_type lhs, - const Vector& rhs) -{ - /* to avoid allocation side effects the result is always a vector - * with default policies*/ - Vector result(rhs); - result *= lhs; - return result; -} - -template< -typename T_Type, int T_Dim, -typename T_Accessor, -typename T_Navigator, -template class T_Storage -> -HDINLINE Vector -operator/(const Vector& lhs, - typename Vector::ParamType rhs) -{ - /* to avoid allocation side effects the result is always a vector - * with default policies*/ - Vector result(lhs); - result /= rhs; - return result; -} - -template< -typename T_Type, int T_Dim, -typename T_Accessor, -typename T_Navigator, -template class T_Storage -> -HDINLINE Vector -operator-(const Vector& vec) -{ - /* to avoid allocation side effects the result is always a vector - * with default policies*/ - Vector result(vec); - - for (int i = 0; i < T_Dim; i++) - result[i] = -result[i]; - return result; -} - -template< -typename T_Type, int T_Dim, -typename T_Accessor, -typename T_Navigator, -template class T_Storage, -typename T_OtherAccessor, -typename T_OtherNavigator, -template class T_OtherStorage -> -HDINLINE Vector -operator>=(const Vector& lhs, - const Vector& rhs) -{ - /* to avoid allocation side effects the result is always a vector - * with default policies*/ - Vector result; - for (int i = 0; i < T_Dim; ++i) - result[i] = (lhs[i] >= rhs[i]); - return result; -} - -template< -typename T_Type, -typename T_Accessor, -typename T_Navigator, -template class T_Storage, -typename T_OtherAccessor, -typename T_OtherNavigator, -template class T_OtherStorage -> -HDINLINE T_Type -linearize(const Vector& size, - const Vector& pos) -{ - return pos.y() * size.x() + pos.x(); -} - -template< -typename T_Type, -typename T_Accessor, -typename T_Navigator, -template class T_Storage, -typename T_OtherAccessor, -typename T_OtherNavigator, -template class T_OtherStorage -> -HDINLINE T_Type -linearize(const Vector& size, - const Vector& pos) -{ - return pos.z() * size.x() * size.y() + pos.y() * size.x() + pos.x(); -} + template + HDINLINE T_Vector basisVector(); + } // namespace math -template -HDINLINE Lhs operator%(const Lhs& lhs, const Rhs& rhs) -{ - Lhs result; - - for (int i = 0; i < Lhs::dim; i++) - result[i] = lhs[i] % rhs[i]; - return result; -} - -struct Abs2 -{ - template - HDINLINE Type operator()(const Vector& vec) - { - return pmacc::algorithms::math::abs2(vec); - } -}; - -struct Abs -{ - template - HDINLINE Type operator()(const Vector& vec) + namespace result_of { - return pmacc::algorithms::math::abs(vec); - } -}; - -} //namespace math - -namespace result_of -{ - -template -struct Functor -{ - using type = typename TVector::type; -}; - -template -struct Functor -{ - using type = typename TVector::type; -}; + template + struct Functor + { + using type = typename TVector::type; + }; -} //namespace result_of -} //namespace pmacc + } // namespace result_of +} // namespace pmacc diff --git a/include/pmacc/math/vector/Vector.tpp b/include/pmacc/math/vector/Vector.tpp index c22243fabd..70a059f943 100644 --- a/include/pmacc/math/vector/Vector.tpp +++ b/include/pmacc/math/vector/Vector.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz, * Sergei Bastrakov * * This file is part of PMacc. @@ -25,6 +25,7 @@ #include "pmacc/math/Vector.hpp" +#include "pmacc/math/vector/compile-time/Vector.hpp" #include "pmacc/algorithms/math.hpp" #include "pmacc/algorithms/TypeCast.hpp" #include "pmacc/algorithms/PromoteType.hpp" @@ -33,332 +34,329 @@ #include "pmacc/traits/GetNComponents.hpp" #include "pmacc/traits/GetInitializedInstance.hpp" -namespace pmacc -{ -namespace traits -{ - -template -struct GetComponentsType, false > -{ - using type = typename pmacc::math::Vector::type; -}; - -template -struct GetNComponents,false > -{ - static constexpr uint32_t value = (uint32_t) pmacc::math::Vector::dim; -}; +#include -template class T_Storage> -struct GetInitializedInstance > +namespace pmacc { - using Type = math::Vector; - using ValueType = typename Type::type; - - HDINLINE Type operator()(const ValueType value) const + namespace traits { - return Type::create(value); - } -}; - -} // namespace traits + template + struct GetComponentsType, false> + { + using type = typename pmacc::math::Vector::type; + }; + + template + struct GetNComponents, false> + { + static constexpr uint32_t value = (uint32_t) pmacc::math::Vector::dim; + }; + + template< + typename T_Type, + int T_dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage> + struct GetInitializedInstance> + { + using Type = math::Vector; + using ValueType = typename Type::type; + + HDINLINE Type operator()(const ValueType value) const + { + return Type::create(value); + } + }; + + } // namespace traits } // namespace pmacc namespace pmacc { -namespace algorithms -{ -namespace math -{ - -/*#### comparison ############################################################*/ - -/*specialize max algorithm*/ -template -struct Max< ::pmacc::math::Vector, ::pmacc::math::Vector > -{ - using result = ::pmacc::math::Vector; - - HDINLINE result operator( )(const ::pmacc::math::Vector &vector1, const ::pmacc::math::Vector &vector2 ) + namespace math { - result tmp; - for ( int i = 0; i < dim; ++i ) - tmp[i] = pmacc::algorithms::math::max( vector1[i], vector2[i] ); - return tmp; - } -}; - -/*specialize max algorithm*/ -template -struct Min< ::pmacc::math::Vector, ::pmacc::math::Vector > -{ - using result = ::pmacc::math::Vector; - - HDINLINE result operator( )(const ::pmacc::math::Vector &vector1, const ::pmacc::math::Vector &vector2 ) - { - result tmp; - for ( int i = 0; i < dim; ++i ) - tmp[i] = pmacc::algorithms::math::min( vector1[i], vector2[i] ); - return tmp; - } -}; - -/*#### abs ###################################################################*/ - -/*specialize abs2 algorithm*/ -template -struct Abs2< ::pmacc::math::Vector > -{ - using result = typename ::pmacc::math::Vector::type; - - HDINLINE result operator( )(const ::pmacc::math::Vector &vector ) - { - result tmp = pmacc::algorithms::math::abs2( vector.x( ) ); - for ( int i = 1; i < dim; ++i ) - tmp += pmacc::algorithms::math::abs2( vector[i] ); - return tmp; - } -}; - -/*specialize abs algorithm*/ -template -struct Abs< ::pmacc::math::Vector > -{ - using result = typename ::pmacc::math::Vector::type; - - HDINLINE result operator( )( ::pmacc::math::Vector vector ) - { - const result tmp = pmacc::algorithms::math::abs2( vector ); - return pmacc::algorithms::math::sqrt( tmp ); - } -}; - -/*#### cross #################################################################*/ - -template -struct Cross< ::pmacc::math::Vector, ::pmacc::math::Vector > -{ - using myType = ::pmacc::math::Vector; - using result = myType; - - HDINLINE myType operator( )(const myType& lhs, const myType & rhs ) - { - return myType( lhs.y( ) * rhs.z( ) - lhs.z( ) * rhs.y( ), - lhs.z( ) * rhs.x( ) - lhs.x( ) * rhs.z( ), - lhs.x( ) * rhs.y( ) - lhs.y( ) * rhs.x( ) ); - } -}; - -/*#### dot ###################################################################*/ - -template -struct Dot< ::pmacc::math::Vector, ::pmacc::math::Vector > -{ - using myType = ::pmacc::math::Vector; - using result = Type; - - HDINLINE result operator( )(const myType& a, const myType & b ) - { - BOOST_STATIC_ASSERT( dim > 0 ); - result tmp = a.x( ) * b.x( ); - for ( int i = 1; i < dim; i++ ) - tmp += a[i] * b[i]; - return tmp; - } -}; - -/*#### exp ###################################################################*/ - -/*! Specialization of exp where power is a vector - * - * Compute exp separately for every component of the vector. - * - * @param power vector with power values - */ -template -struct Exp< ::pmacc::math::Vector > -{ - using Vector1 = ::pmacc::math::Vector; - using result = Vector1; - - HDINLINE result operator( )(const Vector1& power ) - { - BOOST_STATIC_ASSERT( dim > 0 ); - result tmp; - for ( int i = 0; i < dim; ++i ) - tmp[i] = pmacc::algorithms::math::exp( power[i] ); - return tmp; - } -}; + /*specialize max algorithm*/ + template + struct Max<::pmacc::math::Vector, ::pmacc::math::Vector> + { + using result = ::pmacc::math::Vector; + + HDINLINE result operator()( + const ::pmacc::math::Vector& vector1, + const ::pmacc::math::Vector& vector2) + { + result tmp; + for(int i = 0; i < dim; ++i) + tmp[i] = pmacc::math::max(vector1[i], vector2[i]); + return tmp; + } + }; + + /*specialize min algorithm*/ + template + struct Min<::pmacc::math::Vector, ::pmacc::math::Vector> + { + using result = ::pmacc::math::Vector; + + HDINLINE result operator()( + const ::pmacc::math::Vector& vector1, + const ::pmacc::math::Vector& vector2) + { + result tmp; + for(int i = 0; i < dim; ++i) + tmp[i] = pmacc::math::min(vector1[i], vector2[i]); + return tmp; + } + }; + + /*! Specialisation of cross where base is a vector with three components */ + template + struct Cross<::pmacc::math::Vector, ::pmacc::math::Vector> + { + using myType = ::pmacc::math::Vector; + using result = myType; + + HDINLINE myType operator()(const myType& lhs, const myType& rhs) + { + return myType( + lhs.y() * rhs.z() - lhs.z() * rhs.y(), + lhs.z() * rhs.x() - lhs.x() * rhs.z(), + lhs.x() * rhs.y() - lhs.y() * rhs.x()); + } + }; + + /*! Specialisation of Dot where base is a vector */ + template + struct Dot<::pmacc::math::Vector, ::pmacc::math::Vector> + { + using myType = ::pmacc::math::Vector; + using result = Type; + + HDINLINE result operator()(const myType& a, const myType& b) + { + PMACC_CASSERT(dim > 0); + result tmp = a.x() * b.x(); + for(int i = 1; i < dim; i++) + tmp += a[i] * b[i]; + return tmp; + } + }; + + /*specialize abs2 algorithm*/ + template + struct Abs2<::pmacc::math::Vector> + { + using result = typename ::pmacc::math::Vector::type; + + HDINLINE result operator()(const ::pmacc::math::Vector& vector) + { + result tmp = pmacc::math::abs2(vector.x()); + for(int i = 1; i < dim; ++i) + tmp += pmacc::math::abs2(vector[i]); + return tmp; + } + }; + + template + HDINLINE T_Vector basisVector() + { + using Result = typename CT::make_BasisVector::type; + return Result::toRT(); + } + + } // namespace math +} // namespace pmacc -/*#### pow ###################################################################*/ -/*! Specialisation of pow where base is a vector and exponent is a scalar - * - * Create pow separatley for every component of the vector. - * - * @prama base vector with base values - * @param exponent scalar with exponent value +/* Using the free alpaka functions `alpaka::math::*` will result into `__host__ __device__` + * errors, therefore the alpaka math trait must be used. */ -template -struct Pow< ::pmacc::math::Vector, T2 > -{ - using Vector1 = ::pmacc::math::Vector; - using result = Vector1; - - HDINLINE result operator( )(const Vector1& base, const T2 & exponent ) - { - BOOST_STATIC_ASSERT( dim > 0 ); - result tmp; - for ( int i = 0; i < dim; ++i ) - tmp[i] = pmacc::algorithms::math::pow( base[i], exponent ); - return tmp; +#define PMACC_UNARY_APAKA_MATH_SPECIALIZATION(functionName, alpakaMathTrait) \ + template \ + struct alpakaMathTrait, void> \ + { \ + using ResultType = ::pmacc::math::Vector; \ + \ + ALPAKA_FN_ACC static auto functionName( \ + T_Ctx const& mathConcept, \ + ::pmacc::math::Vector const& vector) -> ResultType \ + { \ + PMACC_CASSERT(T_dim > 0); \ + \ + ResultType tmp; \ + for(int i = 0; i < T_dim; ++i) \ + tmp[i] = alpaka::math::functionName(mathConcept, vector[i]); \ + return tmp; \ + } \ } -}; - -/*#### floor #################################################################*/ -/*specialize floor algorithm*/ -template -struct Floor< ::pmacc::math::Vector > +namespace alpaka { - using result = ::pmacc::math::Vector; - - HDINLINE result operator( )( ::pmacc::math::Vector &vector ) + namespace math { - result tmp; - for ( int i = 0; i < dim; ++i ) - tmp[i] = pmacc::algorithms::math::floor( vector[i] ); - return tmp; - } -}; - - -} // namespace math -} // namespace algorithms -} // namespace pmacc + namespace traits + { + /*! Specialisation of pow where base is a vector and exponent is a scalar + * + * Create pow separatley for every component of the vector. + */ + template + struct Pow, T_ScalarType, void> + { + using ResultType = typename ::pmacc::math::Vector::type; + + ALPAKA_FN_HOST_ACC static auto pow( + T_Ctx const& mathConcept, + ::pmacc::math::Vector const& vector, + T_ScalarType const& exponent) -> ResultType + { + PMACC_CASSERT(T_dim > 0); + ResultType tmp; + for(int i = 0; i < T_dim; ++i) + tmp[i] = cupla::pow(vector[i], exponent); + return tmp; + } + }; + + // Exp specialization + PMACC_UNARY_APAKA_MATH_SPECIALIZATION(exp, Exp); + + // Floor specialization + PMACC_UNARY_APAKA_MATH_SPECIALIZATION(floor, Floor); + + /* Abs specialization + * + * Returns the length of the vector to fit the old implementation. + * @todo implement a math function magnitude instead of using abs to get the length of the vector. + */ + template + struct Abs, void> + { + using ResultType = typename ::pmacc::math::Vector::type; + + ALPAKA_FN_HOST_ACC static auto abs( + T_Ctx const& mathConcept, + ::pmacc::math::Vector const& vector) -> ResultType + { + PMACC_CASSERT(T_dim > 0); + + ResultType const tmp = pmacc::math::abs2(vector); + return cupla::math::sqrt(tmp); + } + }; + + } // namespace traits + } // namespace math +} // namespace alpaka namespace pmacc { -namespace algorithms -{ -namespace precisionCast -{ - -template class T_Storage> -struct TypeCast< - CastToType, - ::pmacc::math::Vector -> -{ - using result = const ::pmacc::math::Vector< - CastToType, - dim, - T_Accessor, - T_Navigator, - T_Storage>&; - - HDINLINE result operator( )( result vector ) const + namespace algorithms { - return vector; - } -}; - -template class T_Storage> -struct TypeCast< - CastToType, - ::pmacc::math::Vector -> -{ - using result = ::pmacc::math::Vector; - using ParamType = ::pmacc::math::Vector; - - HDINLINE result operator( )(const ParamType& vector ) const - { - return result( vector ); - } -}; - -} // namespace typecast -} // namespace algorithms + namespace precisionCast + { + template< + typename CastToType, + int dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage> + struct TypeCast> + { + using result = const ::pmacc::math::Vector&; + + HDINLINE result operator()(result vector) const + { + return vector; + } + }; + + template< + typename CastToType, + typename OldType, + int dim, + typename T_Accessor, + typename T_Navigator, + template + class T_Storage> + struct TypeCast> + { + using result = ::pmacc::math::Vector; + using ParamType = ::pmacc::math::Vector; + + HDINLINE result operator()(const ParamType& vector) const + { + return result(vector); + } + }; + + } // namespace precisionCast + } // namespace algorithms } // namespace pmacc namespace pmacc { -namespace algorithms -{ -namespace promoteType -{ - -template -struct promoteType > -{ - using PartType = typename promoteType::type; - using type = ::pmacc::math::Vector; -}; - -} // namespace promoteType -} // namespace algorithms + namespace algorithms + { + namespace promoteType + { + template + struct promoteType> + { + using PartType = typename promoteType::type; + using type = ::pmacc::math::Vector; + }; + + } // namespace promoteType + } // namespace algorithms } // namespace pmacc namespace pmacc { -namespace mpi -{ -namespace def -{ - -template -struct GetMPI_StructAsArray< ::pmacc::math::Vector > -{ - - MPI_StructAsArray operator( )( ) const - { - return MPI_StructAsArray( MPI_FLOAT, T_dim ); - } -}; - -template -struct GetMPI_StructAsArray< ::pmacc::math::Vector[T_N] > -{ - - MPI_StructAsArray operator( )( ) const + namespace mpi { - return MPI_StructAsArray( MPI_FLOAT, T_dim * T_N ); - } -}; - -template -struct GetMPI_StructAsArray< ::pmacc::math::Vector > -{ - - MPI_StructAsArray operator( )( ) const - { - return MPI_StructAsArray( MPI_DOUBLE, T_dim ); - } -}; - -template -struct GetMPI_StructAsArray< ::pmacc::math::Vector[T_N] > -{ - - MPI_StructAsArray operator( )( ) const - { - return MPI_StructAsArray( MPI_DOUBLE, T_dim * T_N ); - } -}; - -} // namespace def -} // namespace mpi + namespace def + { + template + struct GetMPI_StructAsArray<::pmacc::math::Vector> + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_FLOAT, T_dim); + } + }; + + template + struct GetMPI_StructAsArray<::pmacc::math::Vector[T_N]> + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_FLOAT, T_dim * T_N); + } + }; + + template + struct GetMPI_StructAsArray<::pmacc::math::Vector> + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_DOUBLE, T_dim); + } + }; + + template + struct GetMPI_StructAsArray<::pmacc::math::Vector[T_N]> + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_DOUBLE, T_dim * T_N); + } + }; + + } // namespace def + } // namespace mpi } // namespace pmacc diff --git a/include/pmacc/math/vector/accessor/StandardAccessor.hpp b/include/pmacc/math/vector/accessor/StandardAccessor.hpp index 81484f330a..1030b50b87 100644 --- a/include/pmacc/math/vector/accessor/StandardAccessor.hpp +++ b/include/pmacc/math/vector/accessor/StandardAccessor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -25,25 +25,23 @@ namespace pmacc { -namespace math -{ - -/** \todo rename this class to AccessorIdentity*/ -struct StandardAccessor -{ -template -HDINLINE Data& operator()(Data& data) const -{ - return data; -} - -template -HDINLINE const Data& operator()(const Data& data) const -{ - return data; -} + namespace math + { + /** \todo rename this class to AccessorIdentity*/ + struct StandardAccessor + { + template + HDINLINE Data& operator()(Data& data) const + { + return data; + } -}; + template + HDINLINE const Data& operator()(const Data& data) const + { + return data; + } + }; -} // math -} // pmacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/compile-time/Float.hpp b/include/pmacc/math/vector/compile-time/Float.hpp index 74edca93f7..3278d7ad27 100644 --- a/include/pmacc/math/vector/compile-time/Float.hpp +++ b/include/pmacc/math/vector/compile-time/Float.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,43 +28,42 @@ namespace mpl = boost::mpl; namespace pmacc { -namespace math -{ -namespace CT -{ - -template -struct Float -{ - using x = X; - using y = Y; - using z = Z; + namespace math + { + namespace CT + { + template + struct Float + { + using x = X; + using y = Y; + using z = Z; - static constexpr int dim = 3; -}; + static constexpr int dim = 3; + }; -template<> -struct Float<> {}; + template<> + struct Float<> + { + }; -template -struct Float -{ - using x = X; + template + struct Float + { + using x = X; - static constexpr int dim = 1; -}; + static constexpr int dim = 1; + }; -template -struct Float -{ - using x = X; - using y = Y; + template + struct Float + { + using x = X; + using y = Y; - static constexpr int dim = 2u; -}; + static constexpr int dim = 2u; + }; -} // CT -} // math -} // pmacc + } // namespace CT + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/compile-time/Int.hpp b/include/pmacc/math/vector/compile-time/Int.hpp index 4334ff2366..324e9ee611 100644 --- a/include/pmacc/math/vector/compile-time/Int.hpp +++ b/include/pmacc/math/vector/compile-time/Int.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,65 +28,65 @@ namespace pmacc { -namespace math -{ -namespace CT -{ + namespace math + { + namespace CT + { + /** Compile time int vector + * + * + * @tparam x value for x allowed range [INT_MIN;INT_MAX-1] + * @tparam y value for y allowed range [INT_MIN;INT_MAX-1] + * @tparam z value for z allowed range [INT_MIN;INT_MAX-1] + * + * default parameter is used to distinguish between values given by + * the user and unset values. + */ + template< + int x = traits::limits::Max::value, + int y = traits::limits::Max::value, + int z = traits::limits::Max::value> + struct Int : public CT::Vector, mpl::integral_c, mpl::integral_c> + { + }; -/** Compile time int vector - * - * - * @tparam x value for x allowed range [INT_MIN;INT_MAX-1] - * @tparam y value for y allowed range [INT_MIN;INT_MAX-1] - * @tparam z value for z allowed range [INT_MIN;INT_MAX-1] - * - * default parameter is used to distinguish between values given by - * the user and unset values. - */ -template::value, - int y = traits::limits::Max::value, - int z = traits::limits::Max::value> -struct Int: public CT::Vector, - mpl::integral_c, - mpl::integral_c > -{}; + template<> + struct Int<> : public CT::Vector<> + { + }; -template<> -struct Int<> : public CT::Vector<> -{}; + template + struct Int : public CT::Vector> + { + }; -template -struct Int : public CT::Vector > -{}; + template + struct Int : public CT::Vector, mpl::integral_c> + { + }; -template -struct Int : public CT::Vector, - mpl::integral_c > -{}; + template + struct make_Int; + template + struct make_Int<1, val> + { + using type = Int; + }; -template -struct make_Int; + template + struct make_Int<2, val> + { + using type = Int; + }; -template -struct make_Int<1, val> -{ - using type = Int; -}; - -template -struct make_Int<2, val> -{ - using type = Int; -}; - -template -struct make_Int<3, val> -{ - using type = Int; -}; + template + struct make_Int<3, val> + { + using type = Int; + }; -} // CT -} // math -} // pmacc + } // namespace CT + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/compile-time/Size_t.hpp b/include/pmacc/math/vector/compile-time/Size_t.hpp index 307ee62e48..bb7d844fcd 100644 --- a/include/pmacc/math/vector/compile-time/Size_t.hpp +++ b/include/pmacc/math/vector/compile-time/Size_t.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,42 +28,44 @@ namespace pmacc { -namespace math -{ -namespace CT -{ - -/** Compile time size_t vector - * - * - * @tparam x value for x allowed range [0;max size_t value -1] - * @tparam y value for y allowed range [0;max size_t value -1] - * @tparam z value for z allowed range [0;max size_t value -1] - * - * default parameter is used to distinguish between values given by - * the user and unset values. - */ -template::value, - size_t y = traits::limits::Max::value, - size_t z = traits::limits::Max::value> -struct Size_t : public CT::Vector, - mpl::integral_c, - mpl::integral_c > -{}; + namespace math + { + namespace CT + { + /** Compile time size_t vector + * + * + * @tparam x value for x allowed range [0;max size_t value -1] + * @tparam y value for y allowed range [0;max size_t value -1] + * @tparam z value for z allowed range [0;max size_t value -1] + * + * default parameter is used to distinguish between values given by + * the user and unset values. + */ + template< + size_t x = traits::limits::Max::value, + size_t y = traits::limits::Max::value, + size_t z = traits::limits::Max::value> + struct Size_t + : public CT::Vector, mpl::integral_c, mpl::integral_c> + { + }; -template<> -struct Size_t<> : public CT::Vector<> -{}; + template<> + struct Size_t<> : public CT::Vector<> + { + }; -template -struct Size_t : public CT::Vector > -{}; + template + struct Size_t : public CT::Vector> + { + }; -template -struct Size_t : public CT::Vector, - mpl::integral_c > -{}; + template + struct Size_t : public CT::Vector, mpl::integral_c> + { + }; -} // CT -} // math -} // pmacc + } // namespace CT + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/compile-time/TwistComponents.hpp b/include/pmacc/math/vector/compile-time/TwistComponents.hpp index 69a7d1b8d7..00be3d31bd 100644 --- a/include/pmacc/math/vector/compile-time/TwistComponents.hpp +++ b/include/pmacc/math/vector/compile-time/TwistComponents.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PMacc. * @@ -25,42 +25,41 @@ namespace pmacc { -namespace math -{ -namespace CT -{ - -/** - * @class TwistComponents - * @brief Twists axes of a compile-time vector. - * @tparam Vec compile-time vector to be twisted - * @tparam Axes compile-time vector containing new axes - * - * Example: - * - * using Orientation_Y = pmacc::math::CT::Int<1,2,0>; - * using TwistedBlockDim = typename pmacc::math::CT::TwistComponents::type; - */ -template -struct TwistComponents; + namespace math + { + namespace CT + { + /** + * @class TwistComponents + * @brief Twists axes of a compile-time vector. + * @tparam Vec compile-time vector to be twisted + * @tparam Axes compile-time vector containing new axes + * + * Example: + * + * using Orientation_Y = pmacc::math::CT::Int<1,2,0>; + * using TwistedBlockDim = typename pmacc::math::CT::TwistComponents::type; + */ + template + struct TwistComponents; -template -struct TwistComponents -{ - using type = math::CT::Vector< - typename Vec::template at::type, - typename Vec::template at::type>; -}; + template + struct TwistComponents + { + using type = math::CT::Vector< + typename Vec::template at::type, + typename Vec::template at::type>; + }; -template -struct TwistComponents -{ - using type = math::CT::Vector< - typename Vec::template at::type, - typename Vec::template at::type, - typename Vec::template at::type>; -}; + template + struct TwistComponents + { + using type = math::CT::Vector< + typename Vec::template at::type, + typename Vec::template at::type, + typename Vec::template at::type>; + }; -} // namespace CT -} // namespace math + } // namespace CT + } // namespace math } // namespace pmacc diff --git a/include/pmacc/math/vector/compile-time/UInt32.hpp b/include/pmacc/math/vector/compile-time/UInt32.hpp index 0e797aa1e5..dcd813a61f 100644 --- a/include/pmacc/math/vector/compile-time/UInt32.hpp +++ b/include/pmacc/math/vector/compile-time/UInt32.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,44 +28,46 @@ namespace pmacc { -namespace math -{ -namespace CT -{ - -/** Compile time uint vector - * - * - * @tparam x value for x allowed range [0;max uint32_t value -1] - * @tparam y value for y allowed range [0;max uint32_t value -1] - * @tparam z value for z allowed range [0;max uint32_t value -1] - * - * default parameter is used to distinguish between values given by - * the user and unset values. - */ -template::value, - uint32_t y = traits::limits::Max::value, - uint32_t z = traits::limits::Max::value> -struct UInt32 : public CT::Vector, - mpl::integral_c, - mpl::integral_c > -{}; - -template<> -struct UInt32<> : public CT::Vector<> -{}; + namespace math + { + namespace CT + { + /** Compile time uint32_t vector + * + * + * @tparam x value for x allowed range [0;max uint32_t value -1] + * @tparam y value for y allowed range [0;max uint32_t value -1] + * @tparam z value for z allowed range [0;max uint32_t value -1] + * + * default parameter is used to distinguish between values given by + * the user and unset values. + */ + template< + uint32_t x = traits::limits::Max::value, + uint32_t y = traits::limits::Max::value, + uint32_t z = traits::limits::Max::value> + struct UInt32 + : public CT:: + Vector, mpl::integral_c, mpl::integral_c> + { + }; -template -struct UInt32 : public CT::Vector< mpl::integral_c > -{}; + template<> + struct UInt32<> : public CT::Vector<> + { + }; -template -struct UInt32 : public CT::Vector, - mpl::integral_c > -{}; + template + struct UInt32 : public CT::Vector> + { + }; + template + struct UInt32 : public CT::Vector, mpl::integral_c> + { + }; -} // CT -} // math -} // pmacc + } // namespace CT + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/compile-time/UInt64.hpp b/include/pmacc/math/vector/compile-time/UInt64.hpp index 23800ae137..193195d2f3 100644 --- a/include/pmacc/math/vector/compile-time/UInt64.hpp +++ b/include/pmacc/math/vector/compile-time/UInt64.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Axel Huebl +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Axel Huebl * * This file is part of PMacc. * @@ -28,44 +28,46 @@ namespace pmacc { -namespace math -{ -namespace CT -{ - -/** Compile time uint vector - * - * - * @tparam x value for x allowed range [0;max uint64_t value -1] - * @tparam y value for y allowed range [0;max uint64_t value -1] - * @tparam z value for z allowed range [0;max uint64_t value -1] - * - * default parameter is used to distinguish between values given by - * the user and unset values. - */ -template::value, - uint64_t y = traits::limits::Max::value, - uint64_t z = traits::limits::Max::value> -struct UInt64 : public CT::Vector, - mpl::integral_c, - mpl::integral_c > -{}; - -template<> -struct UInt64<> : public CT::Vector<> -{}; + namespace math + { + namespace CT + { + /** Compile time uint64_t vector + * + * + * @tparam x value for x allowed range [0;max uint64_t value -1] + * @tparam y value for y allowed range [0;max uint64_t value -1] + * @tparam z value for z allowed range [0;max uint64_t value -1] + * + * default parameter is used to distinguish between values given by + * the user and unset values. + */ + template< + uint64_t x = traits::limits::Max::value, + uint64_t y = traits::limits::Max::value, + uint64_t z = traits::limits::Max::value> + struct UInt64 + : public CT:: + Vector, mpl::integral_c, mpl::integral_c> + { + }; -template -struct UInt64 : public CT::Vector< mpl::integral_c > -{}; + template<> + struct UInt64<> : public CT::Vector<> + { + }; -template -struct UInt64 : public CT::Vector, - mpl::integral_c > -{}; + template + struct UInt64 : public CT::Vector> + { + }; + template + struct UInt64 : public CT::Vector, mpl::integral_c> + { + }; -} // CT -} // math -} // pmacc + } // namespace CT + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/compile-time/Vector.hpp b/include/pmacc/math/vector/compile-time/Vector.hpp index d440093187..b2e144ba5a 100644 --- a/include/pmacc/math/vector/compile-time/Vector.hpp +++ b/include/pmacc/math/vector/compile-time/Vector.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -40,455 +40,445 @@ namespace pmacc { -namespace math -{ -namespace CT -{ - -namespace mpl = boost::mpl; - -namespace detail -{ -template -struct VectorFromCT; - -template<> -struct VectorFromCT<1> -{ - - template - HDINLINE void operator()(Vec& vec, CTVec) const - { - BOOST_STATIC_ASSERT(Vec::dim == 1); - BOOST_STATIC_ASSERT(CTVec::dim == 1); - vec[0] = (typename Vec::type)CTVec::x::value; - } -}; - -template<> -struct VectorFromCT<2> -{ - - template - HDINLINE void operator()(Vec& vec, CTVec) const - { - BOOST_STATIC_ASSERT(Vec::dim == 2); - BOOST_STATIC_ASSERT(CTVec::dim == 2); - vec[0] = (typename Vec::type)CTVec::x::value; - vec[1] = (typename Vec::type)CTVec::y::value; - } -}; - -template<> -struct VectorFromCT<3> -{ - - template - HDINLINE void operator()(Vec& vec, CTVec) const - { - BOOST_STATIC_ASSERT(Vec::dim == 3); - BOOST_STATIC_ASSERT(CTVec::dim == 3); - vec[0] = (typename Vec::type)CTVec::x::value; - vec[1] = (typename Vec::type)CTVec::y::value; - vec[2] = (typename Vec::type)CTVec::z::value; - } -}; - -template -struct TypeSelector -{ - using type = Arg0; -}; - -/** get integral type*/ -template -struct TypeSelector > -{ - using type = T; -}; - -template<> -struct TypeSelector -{ - using type = mpl::int_<0>; -}; - -} - -namespace mpl = boost::mpl; - -template -struct Vector -{ - using x = Arg0; - using y = Arg1; - using z = Arg2; - - using mplVector = mpl::vector; - - template - struct at - { - using type = typename mpl::at_c::type; - }; - - static constexpr int dim = mpl::size::type::value; - - using type = typename detail::TypeSelector::type; - using This = Vector; - using RT_type = math::Vector; - using vector_type = This; - - template - HDINLINE - operator math::Vector() const - { - math::Vector result; - math::CT::detail::VectorFromCT()(result, *this); - return result; - } - - /** Create a runtime Vector - * - * Creates the corresponding runtime vector object. - * - * \return RT_type runtime vector with same value type - */ - static HDINLINE RT_type toRT() + namespace math { - math::Vector result; - math::CT::detail::VectorFromCT()(result, This()); - return result; - } -}; - -//********************************************************* - -//________________________OperatorBase____________________________ - -template -struct applyOperator -{ - using type = typename applyOperator< - typename Lhs::vector_type, - typename Rhs::vector_type, - T_BinaryOperator - >::type; -}; - -template -struct applyOperator, CT::Vector, T_BinaryOperator> -{ - using OpResult = typename mpl::apply::type; - using type = CT::Vector; -}; - -template -struct applyOperator, -CT::Vector, -T_BinaryOperator> -{ - using OpResult0 = typename mpl::apply::type; - using OpResult1 = typename mpl::apply::type; - using type = CT::Vector; -}; - -template -struct applyOperator, -CT::Vector, -T_BinaryOperator> -{ - using OpResult0 = typename mpl::apply::type; - using OpResult1 = typename mpl::apply::type; - using OpResult2 = typename mpl::apply::type; - using type = CT::Vector; -}; - -//________________________A D D____________________________ - -template -struct add -{ - using type = typename applyOperator< - typename Lhs::vector_type, - typename Rhs::vector_type, - mpl::plus - >::type; -}; - -//________________________M U L____________________________ - -template -struct mul -{ - using type = typename applyOperator< - typename Lhs::vector_type, - typename Rhs::vector_type, - mpl::times - >::type; -}; - -//________________________M A X____________________________ - -/** maximum value - * - * @tparam Lhs input vector - * @tparam Rhs input vector - * @return ::type if Rhs is not given - maximum value in elements of Lhs else - * vector with point-wise maximum value per component - */ -template -struct max -{ - using type = typename applyOperator< - typename Lhs::vector_type, - typename Rhs::vector_type, - mpl::max - >::type; -}; - - -/** get element with maximum value - * - * @tparam T_Vec input vector - * @return ::type maximum value in elements of T_Vec - */ -template -struct max< - T_Vec, - void -> -{ - using type = typename mpl::accumulate< - typename T_Vec::mplVector, - typename T_Vec::x, - mpl::max< - mpl::_1, - mpl::_2 - > - >::type; -}; - -//________________________M I N____________________________ - - -/** minimum value - * - * @tparam Lhs input vector - * @tparam Rhs input vector - * @return ::type if Rhs is not given - minimum value in elements of Lhs else - * vector with point-wise minimum value per component - */ -template -struct min -{ - using type = typename applyOperator< - typename Lhs::vector_type, - typename Rhs::vector_type, - mpl::min - >::type; -}; - -/** get element with minimum value - * - * @tparam T_Vec input vector - * @return ::type minimum value in elements of T_Vec - */ -template -struct min< - T_Vec, - void -> -{ - using type = typename mpl::accumulate< - typename T_Vec::mplVector, - typename T_Vec::x, - mpl::min< - mpl::_1, - mpl::_2 - > - >::type; -}; - -//________________________D O T____________________________ - -template -struct dot -{ - using MulResult = typename mul::type; - using type = typename mpl::accumulate< - typename MulResult::mplVector, - mpl::int_<0>, - mpl::plus - >::type; -}; - -//________________________V O L U M E____________________________ - -template -struct volume -{ - using type = typename mpl::accumulate< - typename T_Vec::mplVector, - mpl::int_<1>, - mpl::times - >::type; -}; - -//________________________S H R I N K T O________________________ - -/** shrink CT vector to given component count (dimension) - * - * This operation is designed to handle vectors with up to 3 components - * - * @tparam T_Vec vector to shrink - * @tparam T_dim target component count - * @treturn ::type new shrinked vector - */ -template -struct shrinkTo; - -template -struct shrinkTo -{ - using Vec = T_Vec; - using type = CT::Vector; -}; - -template -struct shrinkTo -{ - using Vec = T_Vec; - using type = CT::Vector; -}; - -template -struct shrinkTo -{ - using Vec = T_Vec; - using type = CT::Vector; -}; - -//________________________A S S I G N________________________ - -/** Assign a type to a given component in the CT::Vector - * - * defines a public type as result - * - * @tparam T_Vec math::CT::Vector which should be changed - * @tparam T_ComponentPos number of component to changed (type must be bmpl::integral_c) - * @tparam T_Value new value - */ -template -struct Assign; - -template -struct Assign, bmpl::integral_c , T_Value> -{ - using type = pmacc::math::CT::Vector; -}; - -template -struct Assign, bmpl::integral_c, T_Value> -{ - using type = pmacc::math::CT::Vector; -}; - -template -struct Assign, bmpl::integral_c, T_Value> -{ - using type = pmacc::math::CT::Vector; -}; - -/** Assign a type to a given component in the CT::Vector if position is not out of range - * - * if T_ComponentPos < T_Vec::dim ? T_Value is assigned to component T_ComponentPos - * else nothing is done. - * defines a public type as result - * - * @tparam T_Vec math::CT::Vector which should be changed - * @tparam T_ComponentPos number of component to changed (type must be bmpl::integral_c) - * @tparam T_Value new value - */ -template -struct AssignIfInRange -{ - using VectorDim = bmpl::integral_c; - using type = typename bmpl::if_< - bmpl::less, - typename pmacc::math::CT::Assign::type, - T_Vec - >::type; -}; - -//________________________At_c____________________________ - -/** get element from a CT::Vector - * - * defines a public type as result - * - * @tparam T_Vec input CT::Vector - * @tparam T_idx integral index of the component - */ -template -struct At_c -{ - using type = typename mpl::at_c::type; -}; - -//________________________At____________________________ - -/** get element from a CT::Vector - * - * defines a public type as result - * - * @tparam T_Vec input CT::Vector - * @tparam T_Idx integral type index of the component (e.g. boost::mpl::int_<2>) - */ -template -struct At -{ - using type = typename mpl::at::type; -}; - -//________________________make_Vector___________________ - -/** create CT::Vector with equal elements - * - * defines a public type as result - * - * @tparam T_dim count of components - * @tparam T_Type type which is assigned to all components - */ -template -struct make_Vector; - -template -struct make_Vector<1, T_Type> -{ - using type = pmacc::math::CT::Vector; -}; - -template -struct make_Vector<2, T_Type> -{ - using type = pmacc::math::CT::Vector; -}; - -template -struct make_Vector<3, T_Type> -{ - using type = pmacc::math::CT::Vector; -}; - -} // CT -} // math -} // pmacc + namespace CT + { + namespace mpl = boost::mpl; + + namespace detail + { + template + struct VectorFromCT; + + template<> + struct VectorFromCT<1> + { + template + HDINLINE void operator()(Vec& vec, CTVec) const + { + BOOST_STATIC_ASSERT(Vec::dim == 1); + BOOST_STATIC_ASSERT(CTVec::dim == 1); + vec[0] = (typename Vec::type) CTVec::x::value; + } + }; + + template<> + struct VectorFromCT<2> + { + template + HDINLINE void operator()(Vec& vec, CTVec) const + { + BOOST_STATIC_ASSERT(Vec::dim == 2); + BOOST_STATIC_ASSERT(CTVec::dim == 2); + vec[0] = (typename Vec::type) CTVec::x::value; + vec[1] = (typename Vec::type) CTVec::y::value; + } + }; + + template<> + struct VectorFromCT<3> + { + template + HDINLINE void operator()(Vec& vec, CTVec) const + { + BOOST_STATIC_ASSERT(Vec::dim == 3); + BOOST_STATIC_ASSERT(CTVec::dim == 3); + vec[0] = (typename Vec::type) CTVec::x::value; + vec[1] = (typename Vec::type) CTVec::y::value; + vec[2] = (typename Vec::type) CTVec::z::value; + } + }; + + template + struct TypeSelector + { + using type = Arg0; + }; + + /** get integral type*/ + template + struct TypeSelector> + { + using type = T; + }; + + template<> + struct TypeSelector + { + using type = mpl::int_<0>; + }; + + } // namespace detail + + namespace mpl = boost::mpl; + + template + struct Vector + { + using x = Arg0; + using y = Arg1; + using z = Arg2; + + using mplVector = mpl::vector; + + template + struct at + { + using type = typename mpl::at_c::type; + }; + + static constexpr int dim = mpl::size::type::value; + + using type = typename detail::TypeSelector::type; + using This = Vector; + using RT_type = math::Vector; + using vector_type = This; + + template + HDINLINE operator math::Vector() const + { + math::Vector result; + math::CT::detail::VectorFromCT()(result, *this); + return result; + } + + /** Create a runtime Vector + * + * Creates the corresponding runtime vector object. + * + * \return RT_type runtime vector with same value type + */ + static HDINLINE RT_type toRT() + { + math::Vector result; + math::CT::detail::VectorFromCT()(result, This()); + return result; + } + }; + + //********************************************************* + + //________________________OperatorBase____________________________ + + template + struct applyOperator + { + using type = + typename applyOperator:: + type; + }; + + template + struct applyOperator, CT::Vector, T_BinaryOperator> + { + using OpResult = typename mpl::apply::type; + using type = CT::Vector; + }; + + template< + typename T_TypeA0, + typename T_TypeA1, + typename T_TypeB0, + typename T_TypeB1, + typename T_BinaryOperator> + struct applyOperator, CT::Vector, T_BinaryOperator> + { + using OpResult0 = typename mpl::apply::type; + using OpResult1 = typename mpl::apply::type; + using type = CT::Vector; + }; + + template< + typename T_TypeA0, + typename T_TypeA1, + typename T_TypeA2, + typename T_TypeB0, + typename T_TypeB1, + typename T_TypeB2, + typename T_BinaryOperator> + struct applyOperator< + CT::Vector, + CT::Vector, + T_BinaryOperator> + { + using OpResult0 = typename mpl::apply::type; + using OpResult1 = typename mpl::apply::type; + using OpResult2 = typename mpl::apply::type; + using type = CT::Vector; + }; + + //________________________A D D____________________________ + + template + struct add + { + using type = typename applyOperator< + typename Lhs::vector_type, + typename Rhs::vector_type, + mpl::plus>::type; + }; + + //________________________M U L____________________________ + + template + struct mul + { + using type = typename applyOperator< + typename Lhs::vector_type, + typename Rhs::vector_type, + mpl::times>::type; + }; + + //________________________M A X____________________________ + + /** maximum value + * + * @tparam Lhs input vector + * @tparam Rhs input vector + * @return ::type if Rhs is not given - maximum value in elements of Lhs else + * vector with point-wise maximum value per component + */ + template + struct max + { + using type = typename applyOperator< + typename Lhs::vector_type, + typename Rhs::vector_type, + mpl::max>::type; + }; + + + /** get element with maximum value + * + * @tparam T_Vec input vector + * @return ::type maximum value in elements of T_Vec + */ + template + struct max + { + using type = typename mpl:: + accumulate>::type; + }; + + //________________________M I N____________________________ + + + /** minimum value + * + * @tparam Lhs input vector + * @tparam Rhs input vector + * @return ::type if Rhs is not given - minimum value in elements of Lhs else + * vector with point-wise minimum value per component + */ + template + struct min + { + using type = typename applyOperator< + typename Lhs::vector_type, + typename Rhs::vector_type, + mpl::min>::type; + }; + + /** get element with minimum value + * + * @tparam T_Vec input vector + * @return ::type minimum value in elements of T_Vec + */ + template + struct min + { + using type = typename mpl:: + accumulate>::type; + }; + + //________________________D O T____________________________ + + template + struct dot + { + using MulResult = typename mul::type; + using type = typename mpl:: + accumulate, mpl::plus>::type; + }; + + //________________________V O L U M E____________________________ + + template + struct volume + { + using type = typename mpl:: + accumulate, mpl::times>::type; + }; + + //________________________S H R I N K T O________________________ + + /** shrink CT vector to given component count (dimension) + * + * This operation is designed to handle vectors with up to 3 components + * + * @tparam T_Vec vector to shrink + * @tparam T_dim target component count + * @treturn ::type new shrinked vector + */ + template + struct shrinkTo; + + template + struct shrinkTo + { + using Vec = T_Vec; + using type = CT::Vector; + }; + + template + struct shrinkTo + { + using Vec = T_Vec; + using type = CT::Vector; + }; + + template + struct shrinkTo + { + using Vec = T_Vec; + using type = CT::Vector; + }; + + //________________________A S S I G N________________________ + + /** Assign a type to a given component in the CT::Vector + * + * defines a public type as result + * + * @tparam T_Vec math::CT::Vector which should be changed + * @tparam T_ComponentPos number of component to changed (type must be bmpl::integral_c) + * @tparam T_Value new value + */ + template + struct Assign; + + template + struct Assign, bmpl::integral_c, T_Value> + { + using type = pmacc::math::CT::Vector; + }; + + template + struct Assign, bmpl::integral_c, T_Value> + { + using type = pmacc::math::CT::Vector; + }; + + template + struct Assign, bmpl::integral_c, T_Value> + { + using type = pmacc::math::CT::Vector; + }; + + /** Assign a type to a given component in the CT::Vector if position is not out of range + * + * if T_ComponentPos < T_Vec::dim ? T_Value is assigned to component T_ComponentPos + * else nothing is done. + * defines a public type as result + * + * @tparam T_Vec math::CT::Vector which should be changed + * @tparam T_ComponentPos number of component to changed (type must be bmpl::integral_c) + * @tparam T_Value new value + */ + template + struct AssignIfInRange + { + using VectorDim = bmpl::integral_c; + using type = typename bmpl::if_< + bmpl::less, + typename pmacc::math::CT::Assign::type, + T_Vec>::type; + }; + + //________________________At_c____________________________ + + /** get element from a CT::Vector + * + * defines a public type as result + * + * @tparam T_Vec input CT::Vector + * @tparam T_idx integral index of the component + */ + template + struct At_c + { + using type = typename mpl::at_c::type; + }; + + //________________________At____________________________ + + /** get element from a CT::Vector + * + * defines a public type as result + * + * @tparam T_Vec input CT::Vector + * @tparam T_Idx integral type index of the component (e.g. boost::mpl::int_<2>) + */ + template + struct At + { + using type = typename mpl::at::type; + }; + + //________________________make_Vector___________________ + + /** create CT::Vector with equal elements + * + * defines a public type as result + * + * @tparam T_dim count of components + * @tparam T_Type type which is assigned to all components + */ + template + struct make_Vector; + + template + struct make_Vector<1, T_Type> + { + using type = pmacc::math::CT::Vector; + }; + + template + struct make_Vector<2, T_Type> + { + using type = pmacc::math::CT::Vector; + }; + + template + struct make_Vector<3, T_Type> + { + using type = pmacc::math::CT::Vector; + }; + + //________________________make_BasisVector___________________ + + /** Create CT::Vector that is the unit basis vector along the given direction + * + * Defines a public type as result. + * In case 0 <= T_direction < T_dim, return the basis vector type with value + * 1 in component T_direction and 0 in other components, otherwise return the + * zero vector type. + * + * @tparam T_dim count of components + * @tparam T_direction index of the basis vector direction + * @tparam T_ValueType value type of the vector + */ + template + struct make_BasisVector + { + using Zeroes = typename make_Vector>::type; + using type = typename AssignIfInRange< + Zeroes, + bmpl::integral_c, + bmpl::integral_c>::type; + }; + + } // namespace CT + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/math_functor/abs.hpp b/include/pmacc/math/vector/math_functor/abs.hpp deleted file mode 100644 index 2c656d0bf8..0000000000 --- a/include/pmacc/math/vector/math_functor/abs.hpp +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -#include "pmacc/types.hpp" -#include "pmacc/algorithms/math/defines/abs.hpp" - -namespace pmacc -{ -namespace math -{ -namespace math_functor -{ - -struct Abs -{ - template - HDINLINE - Type operator()(const Type& x) const - { - return algorithms::math::abs(x); - } -}; - -} // math_vector -} // math - -namespace result_of -{ - -template -struct Functor -{ - using type = Type; -}; - -} // result_of -} // pmacc diff --git a/include/pmacc/math/vector/math_functor/cosf.hpp b/include/pmacc/math/vector/math_functor/cosf.hpp deleted file mode 100644 index 87a75f231f..0000000000 --- a/include/pmacc/math/vector/math_functor/cosf.hpp +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -#include "pmacc/types.hpp" -#include "pmacc/algorithms/math/defines/trigo.hpp" - -namespace pmacc -{ -namespace math -{ -namespace math_functor -{ - -struct Cosf -{ - using result_type = float; - - DINLINE result_type operator()(const result_type& value) const - { - return algorithms::math::cos(value); - } -}; - -} // math_functor -} // math -} // pmacc - diff --git a/include/pmacc/math/vector/math_functor/max.hpp b/include/pmacc/math/vector/math_functor/max.hpp deleted file mode 100644 index db58d67813..0000000000 --- a/include/pmacc/math/vector/math_functor/max.hpp +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -#include "pmacc/types.hpp" -#include "pmacc/algorithms/math/defines/comparison.hpp" - -namespace pmacc -{ -namespace math -{ -namespace math_functor -{ - -struct Max -{ - template - HDINLINE - Type operator()(const Type& a, const Type& b) const - { - return algorithms::math::max(a,b); - } -}; - -} // math_vector -} // math - -namespace result_of -{ - -template -struct Functor -{ - using type = Type; -}; - -} // result_of -} // pmacc diff --git a/include/pmacc/math/vector/math_functor/min.hpp b/include/pmacc/math/vector/math_functor/min.hpp deleted file mode 100644 index da593802d1..0000000000 --- a/include/pmacc/math/vector/math_functor/min.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - - -#pragma once - -#include "pmacc/types.hpp" -#include "pmacc/algorithms/math/defines/comparison.hpp" - -namespace pmacc -{ -namespace math -{ -namespace math_functor -{ - -struct Min -{ - template - HDINLINE - Type operator()(const Type& a, const Type& b) const - { - return algorithms::math::min(a,b); - } -}; - -} // math_functor -} // math - -namespace result_of -{ - -template -struct Functor -{ - using type = Type; -}; - -} - -} // pmacc - diff --git a/include/pmacc/math/vector/math_functor/sin.hpp b/include/pmacc/math/vector/math_functor/sin.hpp deleted file mode 100644 index 1e34b499d7..0000000000 --- a/include/pmacc/math/vector/math_functor/sin.hpp +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -#include "pmacc/types.hpp" -#include "pmacc/algorithms/math/defines/trigo.hpp" - -namespace pmacc -{ -namespace math -{ -namespace math_functor -{ - -template -struct Sin -{ - using result_type = T_Type; - - DINLINE result_type operator()(const result_type& value) const - { - return algorithms::math::sin(value); - } -}; - -} // math_functor -} // math -} // pmacc - diff --git a/include/pmacc/math/vector/math_functor/sqrtf.hpp b/include/pmacc/math/vector/math_functor/sqrtf.hpp deleted file mode 100644 index 8810c40d3c..0000000000 --- a/include/pmacc/math/vector/math_functor/sqrtf.hpp +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -#include "pmacc/types.hpp" -#include "pmacc/algorithms/math/defines/sqrt.hpp" - -namespace pmacc -{ -namespace math -{ -namespace math_functor -{ - -struct Sqrtf -{ - using result_type = float; - - HDINLINE result_type operator()(const result_type& value) const - { - return algorithms::math::sqrt(value); - } -}; - -} // math_functor -} // math -} // PMacc - diff --git a/include/pmacc/math/vector/navigator/PermutedNavigator.hpp b/include/pmacc/math/vector/navigator/PermutedNavigator.hpp index e9547ff521..b993e1b565 100644 --- a/include/pmacc/math/vector/navigator/PermutedNavigator.hpp +++ b/include/pmacc/math/vector/navigator/PermutedNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -25,17 +25,16 @@ namespace pmacc { -namespace math -{ - -template -struct PermutedNavigator -{ - HDINLINE int operator()(int component) const + namespace math { - return Permutation().toRT()[component]; - } -}; + template + struct PermutedNavigator + { + HDINLINE int operator()(int component) const + { + return Permutation().toRT()[component]; + } + }; -} // math -} // PMacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/navigator/StackedNavigator.hpp b/include/pmacc/math/vector/navigator/StackedNavigator.hpp index 03e2dfc64b..fa99da708b 100644 --- a/include/pmacc/math/vector/navigator/StackedNavigator.hpp +++ b/include/pmacc/math/vector/navigator/StackedNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2014-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -25,22 +25,21 @@ namespace pmacc { -namespace math -{ - -/* Sticks two navigators together resulting in a new navigator. - * - * \tparam NaviA first navigator to be called - * \tparam NaviB second navigator to be called - */ -template -struct StackedNavigator -{ - HDINLINE int operator()(int component) const + namespace math { - return NaviB()(NaviA()(component)); - } -}; + /* Sticks two navigators together resulting in a new navigator. + * + * \tparam NaviA first navigator to be called + * \tparam NaviB second navigator to be called + */ + template + struct StackedNavigator + { + HDINLINE int operator()(int component) const + { + return NaviB()(NaviA()(component)); + } + }; -} // math -} // PMacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/math/vector/navigator/StandardNavigator.hpp b/include/pmacc/math/vector/navigator/StandardNavigator.hpp index 9650ff6d9b..f8ce1e337d 100644 --- a/include/pmacc/math/vector/navigator/StandardNavigator.hpp +++ b/include/pmacc/math/vector/navigator/StandardNavigator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -25,17 +25,16 @@ namespace pmacc { -namespace math -{ - -/** \todo rename this class to NavigatorIdentity*/ -struct StandardNavigator -{ - HDINLINE int operator()(int component) const + namespace math { - return component; - } -}; + /** \todo rename this class to NavigatorIdentity*/ + struct StandardNavigator + { + HDINLINE int operator()(int component) const + { + return component; + } + }; -} // math -} // PMacc + } // namespace math +} // namespace pmacc diff --git a/include/pmacc/memory/Align.hpp b/include/pmacc/memory/Align.hpp index c12089f026..43949229d7 100644 --- a/include/pmacc/memory/Align.hpp +++ b/include/pmacc/memory/Align.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, * Alexander Grund * @@ -26,17 +26,15 @@ #include "pmacc/ppFunctions.hpp" /** calculate and set the optimal alignment for data - * - * you must align all arrays and structs that are used on the device - * @param byte size of data in bytes - */ -#define __optimal_align__(byte) \ - alignas( \ - /** \bug avoid bug if alignment is >16 byte \ - * https://github.com/ComputationalRadiationPhysics/picongpu/issues/1563 \ - */ \ - PMACC_MIN(PMACC_ROUND_UP_NEXT_POW2(byte),16) \ - ) + * + * you must align all arrays and structs that are used on the device + * @param byte size of data in bytes + */ +#define __optimal_align__(byte) \ + alignas(/** \bug avoid bug if alignment is >16 byte \ + * https://github.com/ComputationalRadiationPhysics/picongpu/issues/1563 \ + */ \ + PMACC_MIN(PMACC_ROUND_UP_NEXT_POW2(byte), 16)) -#define PMACC_ALIGN( var, ... ) __optimal_align__( sizeof( __VA_ARGS__ ) ) __VA_ARGS__ var -#define PMACC_ALIGN8( var, ... ) alignas( 8 ) __VA_ARGS__ var +#define PMACC_ALIGN(var, ...) __optimal_align__(sizeof(__VA_ARGS__)) __VA_ARGS__ var +#define PMACC_ALIGN8(var, ...) alignas(8) __VA_ARGS__ var diff --git a/include/pmacc/memory/Array.hpp b/include/pmacc/memory/Array.hpp index a7df616ffc..55d3a67818 100644 --- a/include/pmacc/memory/Array.hpp +++ b/include/pmacc/memory/Array.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera * * This file is part of PMacc. * @@ -26,108 +26,101 @@ namespace pmacc { -namespace memory -{ - /** static sized array - * - * mimic the most parts of the `std::array` - */ - template< - typename T_Type, - size_t T_size - > - struct Array + namespace memory { - using value_type = T_Type; - using size_type = size_t; - using reference = value_type &; - using const_reference = value_type const &; - using pointer = value_type *; - using const_pointer = value_type const *; - - /** get number of elements */ - HDINLINE - constexpr size_type size( ) const - { - return T_size; - } - - /** get maximum number of elements */ - HDINLINE - constexpr size_type max_size( ) const - { - return T_size; - } - - /** get the direct access to the internal data + /** static sized array * - * @{ + * mimic the most parts of the `std::array` */ - HDINLINE - pointer data( ) + template + struct Array { - return reinterpret_cast< pointer >( m_data ); - } + using value_type = T_Type; + using size_type = size_t; + using reference = value_type&; + using const_reference = value_type const&; + using pointer = value_type*; + using const_pointer = value_type const*; - HDINLINE - const_pointer data( ) const - { - return reinterpret_cast< const_pointer >( m_data ); - } - /** @} */ + /** get number of elements */ + HDINLINE + constexpr size_type size() const + { + return T_size; + } - /** default constructor - * - * all members are uninitialized - */ - Array() = default; + /** get maximum number of elements */ + HDINLINE + constexpr size_type max_size() const + { + return T_size; + } - /** constructor - * - * initialize each member with the given value - * - * @param value element assigned to each member - */ - HDINLINE Array( T_Type const & value ) - { - for( size_type i = 0; i < size(); ++i ) - reinterpret_cast< T_Type* >( m_data )[ i ] = value; - } + /** get the direct access to the internal data + * + * @{ + */ + HDINLINE + pointer data() + { + return reinterpret_cast(m_data); + } - /** get N-th value - * - * @tparam T_Idx any type which can be implicit casted to an integral type - * @param idx index within the array - * - * @{ - */ - template< typename T_Idx > - HDINLINE - const_reference - operator[]( T_Idx const idx ) const - { - return reinterpret_cast< T_Type const * >( m_data )[ idx ]; - } + HDINLINE + const_pointer data() const + { + return reinterpret_cast(m_data); + } + /** @} */ - template< typename T_Idx > - HDINLINE - reference - operator[]( T_Idx const idx ) - { - return reinterpret_cast< T_Type* >( m_data )[ idx ]; - } - /** @} */ + /** default constructor + * + * all members are uninitialized + */ + Array() = default; - private: - /** data storage - * - * std::array is a so-called "aggregate" which does not default-initialize - * its members. In order to allow arbitrary types to skip implementing - * a default constructur, this member is not stored as - * `value_type m_data[ T_size ]` but as type-size aligned Byte type. - */ - uint8_t m_data alignas( alignof( T_Type ) ) [ T_size * sizeof( T_Type ) ]; - }; + /** constructor + * + * initialize each member with the given value + * + * @param value element assigned to each member + */ + HDINLINE Array(T_Type const& value) + { + for(size_type i = 0; i < size(); ++i) + reinterpret_cast(m_data)[i] = value; + } + + /** get N-th value + * + * @tparam T_Idx any type which can be implicit casted to an integral type + * @param idx index within the array + * + * @{ + */ + template + HDINLINE const_reference operator[](T_Idx const idx) const + { + return reinterpret_cast(m_data)[idx]; + } + + template + HDINLINE reference operator[](T_Idx const idx) + { + return reinterpret_cast(m_data)[idx]; + } + /** @} */ + + private: + /** data storage + * + * std::array is a so-called "aggregate" which does not default-initialize + * its members. In order to allow arbitrary types to skip implementing + * a default constructur, this member is not stored as + * `value_type m_data[ T_size ]` but as type-size aligned Byte type. + */ + uint8_t m_data alignas(alignof(T_Type))[T_size * sizeof(T_Type)]; + }; -} // namespace memory + } // namespace memory } // namespace pmacc diff --git a/include/pmacc/memory/CtxArray.hpp b/include/pmacc/memory/CtxArray.hpp index bcffda24a3..4733891359 100644 --- a/include/pmacc/memory/CtxArray.hpp +++ b/include/pmacc/memory/CtxArray.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -31,87 +31,70 @@ namespace pmacc { -namespace memory -{ - /** Static sized array for a local variable - * - * The array is designed to hold context variables in lock step - * programming. A context variable is just a local variable of a virtual - * worker. Allocating and using a context array allows to propagate - * virtual worker states over subsequent lock steps. A context array - * for a set of virtual workers is owned by their (physical) worker. - * - * The number of elements depends on the index domain size and the number - * of workers to process the indices. - */ - template< - typename T_Type, - typename T_IdxConfig - > - struct CtxArray : - public Array< - T_Type, - T_IdxConfig::numCollIter * T_IdxConfig::simdSize - >, - T_IdxConfig + namespace memory { - - using T_IdxConfig::domainSize; - using T_IdxConfig::workerSize; - using T_IdxConfig::simdSize; - using T_IdxConfig::numCollIter; - - using BaseArray = Array< - T_Type, - T_IdxConfig::numCollIter * T_IdxConfig::simdSize - >; - - /** default constructor - * - * data member are uninitialized - */ - CtxArray() = default; - - /** constructor + /** Static sized array for a local variable * - * initialize each member with the given value + * The array is designed to hold context variables in lock step + * programming. A context variable is just a local variable of a virtual + * worker. Allocating and using a context array allows to propagate + * virtual worker states over subsequent lock steps. A context array + * for a set of virtual workers is owned by their (physical) worker. * - * @param value element assigned to each member + * The number of elements depends on the index domain size and the number + * of workers to process the indices. */ - HDINLINE explicit CtxArray( T_Type const & value ) : BaseArray( value ) + template + struct CtxArray + : public Array + , T_IdxConfig { - } + using T_IdxConfig::domainSize; + using T_IdxConfig::numCollIter; + using T_IdxConfig::simdSize; + using T_IdxConfig::workerSize; - /** disable copy constructor - */ - HDINLINE CtxArray( CtxArray const & ) = delete; + using BaseArray = Array; - /** constructor - * - * initialize each member with the result of the given functor - * - * @tparam T_Functor type of the user functor - * @tparam T_Args type of user parameters - * @param workerIdx number of worker range: [0;workerSize) - * @param functor functor to initialize the member ( need to implement `::operator(size_type idx)`) - * @param args user defined arguments those should forwarded to the functor - */ - template< - typename T_Functor, - typename ... T_Args - > - HDINLINE explicit CtxArray( uint32_t const workerIdx, T_Functor const & functor, T_Args const && ... args ) - { - mappings::threads::ForEachIdx< T_IdxConfig > - { workerIdx }( - [&,this]( uint32_t const linearIdx, uint32_t const idx ) - { - (*this)[idx] = functor( linearIdx, idx, std::forward< T_Args >( args ) ... ); - } - ); - } + /** default constructor + * + * data member are uninitialized + */ + CtxArray() = default; + + /** constructor + * + * initialize each member with the given value + * + * @param value element assigned to each member + */ + HDINLINE explicit CtxArray(T_Type const& value) : BaseArray(value) + { + } + + /** disable copy constructor + */ + HDINLINE CtxArray(CtxArray const&) = delete; - }; + /** constructor + * + * initialize each member with the result of the given functor + * + * @tparam T_Functor type of the user functor + * @tparam T_Args type of user parameters + * @param workerIdx number of worker range: [0;workerSize) + * @param functor functor to initialize the member ( need to implement `::operator(size_type idx)`) + * @param args user defined arguments those should forwarded to the functor + */ + template + HDINLINE explicit CtxArray(uint32_t const workerIdx, T_Functor const& functor, T_Args const&&... args) + { + mappings::threads::ForEachIdx{workerIdx}( + [&, this](uint32_t const linearIdx, uint32_t const idx) { + (*this)[idx] = functor(linearIdx, idx, std::forward(args)...); + }); + } + }; -} // namespace memory + } // namespace memory } // namespace pmacc diff --git a/include/pmacc/memory/Delete.hpp b/include/pmacc/memory/Delete.hpp index 894490404a..45480c5614 100644 --- a/include/pmacc/memory/Delete.hpp +++ b/include/pmacc/memory/Delete.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, * Alexander Grund * @@ -24,5 +24,15 @@ #pragma once -#define __delete( var ) if( ( var ) ) { delete( var ); ( var ) = nullptr; } -#define __deleteArray( var ) if( ( var ) ) { delete[ ]( var ); ( var ) = nullptr; } +#define __delete(var) \ + if((var)) \ + { \ + delete(var); \ + (var) = nullptr; \ + } +#define __deleteArray(var) \ + if((var)) \ + { \ + delete[](var); \ + (var) = nullptr; \ + } diff --git a/include/pmacc/memory/IndexPool.hpp b/include/pmacc/memory/IndexPool.hpp index fbaf2b4a99..bf49606a06 100644 --- a/include/pmacc/memory/IndexPool.hpp +++ b/include/pmacc/memory/IndexPool.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Heiko Burau +/* Copyright 2017-2021 Heiko Burau * * This file is part of PMacc. * @@ -29,159 +29,142 @@ namespace pmacc { -namespace memory -{ - - /** A memory pool of dynamic size containing indices. - * - * At initial state the pool consists of consecutive indices according to - * the `size` parameter. A new index is created by calling `get()`. - * If the user releases an index, by calling - * `release()`, it will be recycled at the next `get()` call. - * Therefore the initial ordering is not preserved. - * This pool provides `begin()` and `end()` methods. The iteration is done - * reversely, allowing for additions and removal of the current element while - * iterating. - * - * Scalings: - * `` ~ O(N) - * `get()` ~ O(1) - * `release()` ~ O(N) - * `` ~ O(N) ~ std::array - * - * @warning: This class is not thread-safe! - * - * @tparam T_Index type of index - * @tparam T_maxSize maximum number of indices - */ - template< - typename T_Index, - size_t T_maxSize - > - struct IndexPool + namespace memory { - private: - - /** Reverse-iterator of the memory pool. The pool is iterated reversely - * to ensure removal of the current element while iterating. + /** A memory pool of dynamic size containing indices. + * + * At initial state the pool consists of consecutive indices according to + * the `size` parameter. A new index is created by calling `get()`. + * If the user releases an index, by calling + * `release()`, it will be recycled at the next `get()` call. + * Therefore the initial ordering is not preserved. + * This pool provides `begin()` and `end()` methods. The iteration is done + * reversely, allowing for additions and removal of the current element while + * iterating. + * + * Scalings: + * `` ~ O(N) + * `get()` ~ O(1) + * `release()` ~ O(N) + * `` ~ O(N) ~ std::array + * + * @warning: This class is not thread-safe! + * + * @tparam T_Index type of index + * @tparam T_maxSize maximum number of indices */ - struct ReverseIterator + template + struct IndexPool { - T_Index* pointer; + private: + /** Reverse-iterator of the memory pool. The pool is iterated reversely + * to ensure removal of the current element while iterating. + */ + struct ReverseIterator + { + T_Index* pointer; - HDINLINE - ReverseIterator( T_Index* const pointer ) : pointer( pointer ) - {} + HDINLINE + ReverseIterator(T_Index* const pointer) : pointer(pointer) + { + } - HDINLINE - void operator++() - { - this->pointer--; - } + HDINLINE + void operator++() + { + this->pointer--; + } + HDINLINE + T_Index& operator*() + { + return *(this->pointer); + } + + HDINLINE + bool operator!=(ReverseIterator const& other) const + { + return this->pointer != other.pointer; + } + }; + + size_t m_size; + Array listIds; + + public: + using Index = T_Index; + + PMACC_STATIC_ASSERT_MSG(std::numeric_limits::is_integer, _Index_type_must_be_an_integer_type); + PMACC_STATIC_ASSERT_MSG(std::numeric_limits::is_signed, _Index_type_must_be_a_signed_type); + PMACC_STATIC_ASSERT_MSG(T_maxSize > 0u, _maxSize_has_to_be_greater_than_zero); + + /** init pool with consecutive indices + * + * @param size initial number of indices + */ HDINLINE - T_Index& operator*() + IndexPool(const Index size = 0) : m_size(size) { - return *(this->pointer); + /* TODO: parallelize */ + for(size_t i = 0; i < T_maxSize; i++) + this->listIds[i] = static_cast(i); } + /** get a new index */ HDINLINE - bool operator!=( ReverseIterator const & other ) const + Index get() { - return this->pointer != other.pointer; - } - }; + if(this->m_size == T_maxSize - 1u) + return Index(-1); - size_t m_size; - Array< - T_Index, - T_maxSize - > listIds; - - public: - - using Index = T_Index; - - PMACC_STATIC_ASSERT_MSG( - std::numeric_limits< Index >::is_integer, - _Index_type_must_be_an_integer_type - ); - PMACC_STATIC_ASSERT_MSG( - std::numeric_limits< Index >::is_signed, - _Index_type_must_be_a_signed_type - ); - PMACC_STATIC_ASSERT_MSG( - T_maxSize > 0u, - _maxSize_has_to_be_greater_than_zero - ); - - /** init pool with consecutive indices - * - * @param size initial number of indices - */ - HDINLINE - IndexPool( const Index size = 0 ) : m_size( size ) - { - /* TODO: parallelize */ - for( size_t i = 0; i < T_maxSize; i++ ) - this->listIds[i] = static_cast< Index >( i ); - } - - /** get a new index */ - HDINLINE - Index get() - { - if( this->m_size == T_maxSize - 1u ) - return Index(-1); - - return this->listIds[this->m_size++]; - } + return this->listIds[this->m_size++]; + } - /** release an index */ - HDINLINE - void release( const Index idx ) - { - /* find position of `idx` */ - size_t pos; - for( size_t i = 0; i < this->m_size; i++ ) + /** release an index */ + HDINLINE + void release(const Index idx) { - if( this->listIds[i] == idx ) + /* find position of `idx` */ + size_t pos; + for(size_t i = 0; i < this->m_size; i++) { - pos = i; - break; + if(this->listIds[i] == idx) + { + pos = i; + break; + } } - } - this->listIds[pos] = this->listIds[--this->m_size]; - this->listIds[this->m_size] = idx; - } + this->listIds[pos] = this->listIds[--this->m_size]; + this->listIds[this->m_size] = idx; + } - /** get number of indices within pool */ - HDINLINE - size_t size( ) const - { - return this->m_size; - } + /** get number of indices within pool */ + HDINLINE + size_t size() const + { + return this->m_size; + } - /** get maximum number of indices within pool */ - HDINLINE - constexpr size_t max_size( ) const - { - return T_maxSize; - } + /** get maximum number of indices within pool */ + HDINLINE + constexpr size_t max_size() const + { + return T_maxSize; + } - HDINLINE - ReverseIterator begin() - { - return ReverseIterator( this->listIds.data() + this->m_size - 1u ); - } + HDINLINE + ReverseIterator begin() + { + return ReverseIterator(this->listIds.data() + this->m_size - 1u); + } - HDINLINE - ReverseIterator end() - { - return ReverseIterator( this->listIds.data() - 1u ); - } - }; + HDINLINE + ReverseIterator end() + { + return ReverseIterator(this->listIds.data() - 1u); + } + }; -} // namespace memory + } // namespace memory } // namespace pmacc diff --git a/include/pmacc/memory/MakeUnique.hpp b/include/pmacc/memory/MakeUnique.hpp deleted file mode 100644 index 38d92ad6bf..0000000000 --- a/include/pmacc/memory/MakeUnique.hpp +++ /dev/null @@ -1,48 +0,0 @@ -/* Copyright 2019-2020 Sergei Bastrakov - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#pragma once - -#include -#include - - -namespace pmacc -{ -namespace memory -{ - - /* - * Analogue of std::make_unique for C++11, except not disabled for arrays. - * Implementation is taken from - * https://en.cppreference.com/w/cpp/memory/unique_ptr/make_unique - */ - template< - typename T, - typename ... T_Args - > - inline std::unique_ptr< T > makeUnique( T_Args && ... args ) - { - return std::unique_ptr< T >( new T( std::forward< T_Args >( args ) ... ) ); - } - -} // namespace memory -} // namespace pmacc diff --git a/include/pmacc/memory/boxes/CachedBox.hpp b/include/pmacc/memory/boxes/CachedBox.hpp index fc9833e4e2..ec164a902b 100644 --- a/include/pmacc/memory/boxes/CachedBox.hpp +++ b/include/pmacc/memory/boxes/CachedBox.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -31,49 +31,49 @@ namespace pmacc { namespace intern { - - template< typename T_ValueType, class T_BlockDescription, uint32_t T_Id> + template class CachedBox { public: typedef T_BlockDescription BlockDescription; typedef T_ValueType ValueType; + private: typedef typename BlockDescription::SuperCellSize SuperCellSize; typedef typename BlockDescription::FullSuperCellSize FullSuperCellSize; typedef typename BlockDescription::OffsetOrigin OffsetOrigin; public: - typedef DataBox > Type; + typedef DataBox> Type; - template< typename T_Acc > - HDINLINE static Type create( T_Acc const & acc ) + template + HDINLINE static Type create(T_Acc const& acc) { DataSpace offset(OffsetOrigin::toRT()); - Type c_box(Type::init( acc )); + Type c_box(Type::init(acc)); return c_box.shift(offset); } - }; - } + } // namespace intern struct CachedBox { - - template - DINLINE static typename intern::CachedBox::Type - create( T_Acc const & acc, const ValueType_& value, const BlockDescription_ block ) + template + DINLINE static typename intern::CachedBox::Type create( + T_Acc const& acc, + const ValueType_& value, + const BlockDescription_ block) { - return intern::CachedBox::create( acc ); + return intern::CachedBox::create(acc); } - template< uint32_t Id_, typename ValueType_, class BlockDescription_, typename T_Acc > - DINLINE static typename intern::CachedBox::Type - create( T_Acc const & acc, const BlockDescription_ block ) + template + DINLINE static typename intern::CachedBox::Type create( + T_Acc const& acc, + const BlockDescription_ block) { - return intern::CachedBox::create( acc ); + return intern::CachedBox::create(acc); } - }; -} +} // namespace pmacc diff --git a/include/pmacc/memory/boxes/DataBox.hpp b/include/pmacc/memory/boxes/DataBox.hpp index bd8fe3a20e..042773e5e0 100644 --- a/include/pmacc/memory/boxes/DataBox.hpp +++ b/include/pmacc/memory/boxes/DataBox.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz * * This file is part of PMacc. @@ -33,10 +33,9 @@ namespace pmacc class Box; template - class Box< DIM1, Base> : public Base + class Box : public Base { public: - enum { Dim = DIM1 @@ -44,12 +43,12 @@ namespace pmacc typedef typename Base::ValueType ValueType; typedef typename Base::RefValueType RefValueType; - HDINLINE RefValueType operator()(const DataSpace &idx = DataSpace()) const + HDINLINE RefValueType operator()(const DataSpace& idx = DataSpace()) const { return Base::operator[](idx.x()); } - HDINLINE RefValueType operator()(const DataSpace &idx = DataSpace()) + HDINLINE RefValueType operator()(const DataSpace& idx = DataSpace()) { return Base::operator[](idx.x()); } @@ -63,24 +62,23 @@ namespace pmacc } }; - template< class Base> - class Box< DIM2, Base> : public Base + template + class Box : public Base { public: - enum { Dim = DIM2 }; typedef typename Base::ValueType ValueType; - typedef typename Base::RefValueType RefValueType; + typedef typename Base::RefValueType RefValueType; - HDINLINE RefValueType operator()(const DataSpace &idx = DataSpace()) const + HDINLINE RefValueType operator()(const DataSpace& idx = DataSpace()) const { return (Base::operator[](idx.y()))[idx.x()]; } - HDINLINE RefValueType operator()(const DataSpace &idx = DataSpace()) + HDINLINE RefValueType operator()(const DataSpace& idx = DataSpace()) { return (Base::operator[](idx.y()))[idx.x()]; } @@ -92,14 +90,12 @@ namespace pmacc HDINLINE Box() : Base() { } - }; template class Box : public Base { public: - enum { Dim = DIM3 @@ -107,12 +103,12 @@ namespace pmacc typedef typename Base::ValueType ValueType; typedef typename Base::RefValueType RefValueType; - HDINLINE RefValueType operator()(const DataSpace &idx = DataSpace()) const + HDINLINE RefValueType operator()(const DataSpace& idx = DataSpace()) const { return (Base::operator[](idx.z()))[idx.y()][idx.x()]; } - HDINLINE RefValueType operator()(const DataSpace &idx = DataSpace()) + HDINLINE RefValueType operator()(const DataSpace& idx = DataSpace()) { return (Base::operator[](idx.z()))[idx.y()][idx.x()]; } @@ -124,18 +120,15 @@ namespace pmacc HDINLINE Box() : Base() { } - }; - - } + } // namespace private_Box template class DataBox : public private_Box::Box { public: - typedef typename Base::ValueType ValueType; typedef DataBox Type; typedef typename Base::RefValueType RefValueType; @@ -157,8 +150,8 @@ namespace pmacc HDINLINE DataBox reduceZ(const int zOffset) const { - return DataBox (Base::reduceZ(zOffset)); + return DataBox(Base::reduceZ(zOffset)); } }; -} +} // namespace pmacc diff --git a/include/pmacc/memory/boxes/DataBoxDim1Access.hpp b/include/pmacc/memory/boxes/DataBoxDim1Access.hpp index 5d3ac59cb2..cbe28612c4 100644 --- a/include/pmacc/memory/boxes/DataBoxDim1Access.hpp +++ b/include/pmacc/memory/boxes/DataBoxDim1Access.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,54 +28,54 @@ namespace pmacc { - -template -class DataBoxDim1Access : protected T_Base -{ -public: - - typedef T_Base Base; - static constexpr uint32_t Dim = Base::Dim; - - - typedef typename Base::ValueType ValueType; - typedef typename Base::RefValueType RefValueType; - - - HDINLINE RefValueType operator()(const pmacc::DataSpace &idx = pmacc::DataSpace()) const + template + class DataBoxDim1Access : protected T_Base { - const pmacc::DataSpace real_idx(DataSpaceOperations::map(originalSize, idx.x())); - return Base::operator()(real_idx); - } - - HDINLINE RefValueType operator()(const pmacc::DataSpace &idx = pmacc::DataSpace()) - { - const pmacc::DataSpace real_idx(DataSpaceOperations::map(originalSize, idx.x())); - return Base::operator()(real_idx); - } - - HDINLINE RefValueType operator[](const int idx) const - { - const pmacc::DataSpace real_idx(DataSpaceOperations::map(originalSize, idx)); - return Base::operator()(real_idx); - } - - HDINLINE RefValueType operator[](const int idx) - { - const pmacc::DataSpace real_idx(DataSpaceOperations::map(originalSize, idx)); - return Base::operator()(real_idx); - } - - HDINLINE DataBoxDim1Access(const Base base, const pmacc::DataSpace originalSize) : Base(base), originalSize(originalSize) - { - } - - HDINLINE DataBoxDim1Access(const pmacc::DataSpace originalSize) : Base(), originalSize(originalSize) - { - } -private: - PMACC_ALIGN(originalSize, const pmacc::DataSpace); - -}; - -} //namespace + public: + typedef T_Base Base; + static constexpr uint32_t Dim = Base::Dim; + + + typedef typename Base::ValueType ValueType; + typedef typename Base::RefValueType RefValueType; + + + HDINLINE RefValueType operator()(const pmacc::DataSpace& idx = pmacc::DataSpace()) const + { + const pmacc::DataSpace real_idx(DataSpaceOperations::map(originalSize, idx.x())); + return Base::operator()(real_idx); + } + + HDINLINE RefValueType operator()(const pmacc::DataSpace& idx = pmacc::DataSpace()) + { + const pmacc::DataSpace real_idx(DataSpaceOperations::map(originalSize, idx.x())); + return Base::operator()(real_idx); + } + + HDINLINE RefValueType operator[](const int idx) const + { + const pmacc::DataSpace real_idx(DataSpaceOperations::map(originalSize, idx)); + return Base::operator()(real_idx); + } + + HDINLINE RefValueType operator[](const int idx) + { + const pmacc::DataSpace real_idx(DataSpaceOperations::map(originalSize, idx)); + return Base::operator()(real_idx); + } + + HDINLINE DataBoxDim1Access(const Base base, const pmacc::DataSpace originalSize) + : Base(base) + , originalSize(originalSize) + { + } + + HDINLINE DataBoxDim1Access(const pmacc::DataSpace originalSize) : Base(), originalSize(originalSize) + { + } + + private: + PMACC_ALIGN(originalSize, const pmacc::DataSpace); + }; + +} // namespace pmacc diff --git a/include/pmacc/memory/boxes/DataBoxUnaryTransform.hpp b/include/pmacc/memory/boxes/DataBoxUnaryTransform.hpp index b0d6394e30..1a14a20e16 100644 --- a/include/pmacc/memory/boxes/DataBoxUnaryTransform.hpp +++ b/include/pmacc/memory/boxes/DataBoxUnaryTransform.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -27,60 +27,57 @@ namespace pmacc { - -/** DataBox which apply a unary functor on every operator () and [] access - * - * @tparam T_Base base class to inherit from - * @tparam T_UnaryFunctor unary functor which is applied on every access - * - template parameter of functor is the input type for the functor - * - functor must have defined the result type as ::result - */ -template class T_UnaryFunctor> -class DataBoxUnaryTransform : public T_Base -{ -public: - - typedef T_Base Base; - typedef typename Base::ValueType BaseValueType; - - typedef T_UnaryFunctor UnaryFunctor; - - typedef typename UnaryFunctor::result ValueType; - typedef ValueType RefValueType; - static constexpr uint32_t Dim = Base::Dim; - - HDINLINE DataBoxUnaryTransform(const Base& base) : Base(base) - { - } - - HDINLINE DataBoxUnaryTransform() : Base() + /** DataBox which apply a unary functor on every operator () and [] access + * + * @tparam T_Base base class to inherit from + * @tparam T_UnaryFunctor unary functor which is applied on every access + * - template parameter of functor is the input type for the functor + * - functor must have defined the result type as ::result + */ + template class T_UnaryFunctor> + class DataBoxUnaryTransform : public T_Base { - } - - template - HDINLINE ValueType operator()(const T_Index &idx) const - { - return UnaryFunctor()(Base::operator()(idx)); - } - - template - HDINLINE ValueType operator()(const T_Index &idx) - { - return UnaryFunctor()(Base::operator()(idx)); - } - - template - HDINLINE ValueType operator[](const T_Index idx) - { - return UnaryFunctor()(Base::operator[](idx)); - } - - template - HDINLINE ValueType operator[](const T_Index idx) const - { - return UnaryFunctor()(Base::operator[](idx)); - } - -}; - -} //namespace pmacc + public: + typedef T_Base Base; + typedef typename Base::ValueType BaseValueType; + + typedef T_UnaryFunctor UnaryFunctor; + + typedef typename UnaryFunctor::result ValueType; + typedef ValueType RefValueType; + static constexpr uint32_t Dim = Base::Dim; + + HDINLINE DataBoxUnaryTransform(const Base& base) : Base(base) + { + } + + HDINLINE DataBoxUnaryTransform() : Base() + { + } + + template + HDINLINE ValueType operator()(const T_Index& idx) const + { + return UnaryFunctor()(Base::operator()(idx)); + } + + template + HDINLINE ValueType operator()(const T_Index& idx) + { + return UnaryFunctor()(Base::operator()(idx)); + } + + template + HDINLINE ValueType operator[](const T_Index idx) + { + return UnaryFunctor()(Base::operator[](idx)); + } + + template + HDINLINE ValueType operator[](const T_Index idx) const + { + return UnaryFunctor()(Base::operator[](idx)); + } + }; + +} // namespace pmacc diff --git a/include/pmacc/memory/boxes/MultiBox.hpp b/include/pmacc/memory/boxes/MultiBox.hpp index 10444dbef7..7b40e945b1 100644 --- a/include/pmacc/memory/boxes/MultiBox.hpp +++ b/include/pmacc/memory/boxes/MultiBox.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -29,258 +29,265 @@ namespace pmacc { - -namespace mutiBoxAccess -{ - -template -class MutiBoxAccess -{ -public: - typedef Type ValueType; - typedef ValueType& RefValueType; - - HDINLINE MutiBoxAccess(ValueType* ptr, const size_t offset) : - offset(offset), ptr((char*) ptr) - { - } - - HDINLINE RefValueType operator[](const uint32_t idx) - { - return *((ValueType*) (ptr + (idx * offset))); - } - - HDINLINE RefValueType operator[](const uint32_t idx) const - { - return *((ValueType*) (ptr + (idx * offset))); - } - -private: - PMACC_ALIGN(offset, const size_t); - PMACC_ALIGN(ptr, const char*); -}; - -}//namespace MutiBoxAccass - -template -class MultiBox; - -template -class MultiBox -{ -private: - typedef DataBox > DataBoxType; -public: - - enum - { - Dim = DIM1 + namespace mutiBoxAccess + { + template + class MutiBoxAccess + { + public: + typedef Type ValueType; + typedef ValueType& RefValueType; + + HDINLINE MutiBoxAccess(ValueType* ptr, const size_t offset) : offset(offset), ptr((char*) ptr) + { + } + + HDINLINE RefValueType operator[](const uint32_t idx) + { + return *((ValueType*) (ptr + (idx * offset))); + } + + HDINLINE RefValueType operator[](const uint32_t idx) const + { + return *((ValueType*) (ptr + (idx * offset))); + } + + private: + PMACC_ALIGN(offset, const size_t); + PMACC_ALIGN(ptr, const char*); + }; + + } // namespace mutiBoxAccess + + template + class MultiBox; + + template + class MultiBox + { + private: + typedef DataBox> DataBoxType; + + public: + enum + { + Dim = DIM1 + }; + typedef mutiBoxAccess::MutiBoxAccess ValueType; + typedef mutiBoxAccess::MutiBoxAccess RefValueType; + typedef MultiBox ReducedType; + + HDINLINE DataBoxType getDataBox(uint32_t nameId) + { + return DataBoxType(PitchedBox((Type*) ((char*) fixedPointer + attributePitch * nameId))); + } + + HDINLINE RefValueType operator[](const int idx) + { + return RefValueType(fixedPointer + idx, attributePitch); + } + + HDINLINE RefValueType operator[](const int idx) const + { + return RefValueType(fixedPointer + idx, attributePitch); + } + + HDINLINE MultiBox(Type* pointer, const DataSpace& offset, const DataSpace&, const size_t pitch) + : attributePitch(pitch) + , fixedPointer(pointer + offset[0]) + { + } + + HDINLINE MultiBox(Type* pointer, const size_t attributePitch) + : attributePitch(attributePitch) + , fixedPointer(pointer) + { + } + + /*Object must init by copy a valid instance*/ + HDINLINE MultiBox() + { + } + + /*!return the first value in the box (list) + * @return first value + */ + HDINLINE RefValueType operator*() + { + return RefValueType(fixedPointer, attributePitch); + } + + HDINLINE Type const* getPointer() const + { + return fixedPointer; + } + HDINLINE Type* getPointer() + { + return fixedPointer; + } + + + protected: + PMACC_ALIGN(attributePitch, size_t); + PMACC_ALIGN(fixedPointer, Type*); }; - typedef mutiBoxAccess::MutiBoxAccess ValueType; - typedef mutiBoxAccess::MutiBoxAccess RefValueType; - typedef MultiBox ReducedType; - - HDINLINE DataBoxType getDataBox(uint32_t nameId) - { - return DataBoxType(PitchedBox ((Type*) ((char*) fixedPointer + attributePitch * nameId))); - } - - HDINLINE RefValueType operator[](const int idx) - { - return RefValueType(fixedPointer + idx, attributePitch); - } - HDINLINE RefValueType operator[](const int idx) const - { - return RefValueType(fixedPointer + idx, attributePitch); - } - - HDINLINE MultiBox(Type* pointer, const DataSpace &offset, const DataSpace&, const size_t pitch) : - attributePitch(pitch), fixedPointer(pointer + offset[0]) - { - } - - HDINLINE MultiBox(Type* pointer, const size_t attributePitch) : - attributePitch(attributePitch), fixedPointer(pointer) - { - } - - /*Object must init by copy a valid instance*/ - HDINLINE MultiBox() - { - } - - /*!return the first value in the box (list) - * @return first value - */ - HDINLINE RefValueType operator*() - { - return RefValueType(fixedPointer, attributePitch); - } - - HDINLINE Type const * getPointer() const - { - return fixedPointer; - } - HDINLINE Type* getPointer() - { - return fixedPointer; - } - - -protected: - - PMACC_ALIGN(attributePitch, size_t); - PMACC_ALIGN(fixedPointer, Type*); -}; - -template -class MultiBox -{ -private: - typedef DataBox > DataBoxType; -public: - - enum - { - Dim = DIM2 + template + class MultiBox + { + private: + typedef DataBox> DataBoxType; + + public: + enum + { + Dim = DIM2 + }; + typedef mutiBoxAccess::MutiBoxAccess ValueType; + typedef mutiBoxAccess::MutiBoxAccess RefValueType; + typedef MultiBox ReducedType; + + HDINLINE DataBoxType getDataBox(uint32_t nameId) + { + return DataBoxType( + PitchedBox((Type*) ((char*) fixedPointer + attributePitch * nameId), pitch)); + } + + HDINLINE MultiBox( + Type* pointer, + const DataSpace& offset, + const DataSpace& memSize, + const size_t pitch) + : pitch(pitch) + , attributePitch(pitch * memSize.y()) + , fixedPointer((Type*) ((char*) pointer + offset[1] * pitch) + offset[0]) + { + } + + /*Object must init by copy a valid instance*/ + HDINLINE MultiBox() + { + } + + HDINLINE ReducedType operator[](const int idx) + { + return ReducedType((Type*) ((char*) this->fixedPointer + idx * pitch), attributePitch); + } + + HDINLINE ReducedType operator[](const int idx) const + { + return ReducedType((Type*) ((char*) this->fixedPointer + idx * pitch), attributePitch); + } + + HDINLINE MultiBox(Type* pointer, size_t pitch, size_t attributePitch) + : pitch(pitch) + , attributePitch(attributePitch) + , fixedPointer(pointer) + { + } + + /*!return the first value in the box (list) + * @return first value + */ + HDINLINE RefValueType operator*() + { + return RefValueType(fixedPointer, attributePitch); + } + + HDINLINE Type const* getPointer() const + { + return fixedPointer; + } + HDINLINE Type* getPointer() + { + return fixedPointer; + } + + protected: + PMACC_ALIGN(pitch, size_t); + PMACC_ALIGN(attributePitch, size_t); + PMACC_ALIGN(fixedPointer, Type*); }; - typedef mutiBoxAccess::MutiBoxAccess ValueType; - typedef mutiBoxAccess::MutiBoxAccess RefValueType; - typedef MultiBox ReducedType; - HDINLINE DataBoxType getDataBox(uint32_t nameId) - { - return DataBoxType(PitchedBox ((Type*) ((char*) fixedPointer + attributePitch * nameId), pitch)); - } - - HDINLINE MultiBox(Type* pointer, const DataSpace &offset, const DataSpace &memSize, const size_t pitch) : - pitch(pitch), - attributePitch(pitch*memSize.y()), - fixedPointer((Type*) ((char*) pointer + offset[1] * pitch) + offset[0]) - { - } - - /*Object must init by copy a valid instance*/ - HDINLINE MultiBox() - { - } - - HDINLINE ReducedType operator[](const int idx) - { - return ReducedType((Type*) ((char*) this->fixedPointer + idx * pitch), attributePitch); - } - - HDINLINE ReducedType operator[](const int idx) const - { - return ReducedType((Type*) ((char*) this->fixedPointer + idx * pitch), attributePitch); - } - - HDINLINE MultiBox(Type* pointer, size_t pitch, size_t attributePitch) : - pitch(pitch), - attributePitch(attributePitch), - fixedPointer(pointer) - { - } - - /*!return the first value in the box (list) - * @return first value - */ - HDINLINE RefValueType operator*() - { - return RefValueType(fixedPointer, attributePitch); - } - - HDINLINE Type const * getPointer() const - { - return fixedPointer; - } - HDINLINE Type* getPointer() - { - return fixedPointer; - } - -protected: - - PMACC_ALIGN(pitch, size_t); - PMACC_ALIGN(attributePitch, size_t); - PMACC_ALIGN(fixedPointer, Type*); -}; - -template -class MultiBox -{ -private: - typedef DataBox > DataBoxType; -public: - - enum - { - Dim = DIM3 + template + class MultiBox + { + private: + typedef DataBox> DataBoxType; + + public: + enum + { + Dim = DIM3 + }; + typedef mutiBoxAccess::MutiBoxAccess ValueType; + typedef mutiBoxAccess::MutiBoxAccess RefValueType; + typedef MultiBox ReducedType; + + HDINLINE DataBoxType getDataBox(uint32_t nameId) + { + return DataBoxType( + PitchedBox((Type*) ((char*) fixedPointer + attributePitch * nameId), pitch, pitch2D)); + } + + HDINLINE ReducedType operator[](const int idx) + { + return ReducedType((Type*) ((char*) (this->fixedPointer) + idx * pitch2D), pitch, attributePitch); + } + + HDINLINE ReducedType operator[](const int idx) const + { + return ReducedType((Type*) ((char*) (this->fixedPointer) + idx * pitch2D), pitch, attributePitch); + } + + /** constructor + * + * @param pointer pointer to the origin of the physical memory + * @param offset offset (in elements) + * @param memSize size of the physical memory (in elements) + * @param pitch number of bytes in one line (first dimension) + */ + HDINLINE MultiBox( + Type* pointer, + const DataSpace& offset, + const DataSpace& memSize, + const size_t pitch) + : pitch(pitch) + , pitch2D(memSize.y() * pitch) + , attributePitch((memSize.y() * pitch) * size.z()) + , fixedPointer( + (Type*) ((char*) pointer + offset[2] * (memSize.y() * pitch) + offset[1] * pitch) + offset[0]) + { + } + + /*Object must init by copy a valid instance*/ + HDINLINE MultiBox() + { + } + + /*!return the first value in the box (list) + * @return first value + */ + HDINLINE RefValueType operator*() + { + return RefValueType(fixedPointer, attributePitch); + } + + HDINLINE Type const* getPointer() const + { + return fixedPointer; + } + HDINLINE Type* getPointer() + { + return fixedPointer; + } + + + PMACC_ALIGN(pitch, size_t); + PMACC_ALIGN(pitch2D, size_t); + PMACC_ALIGN(attributePitch, size_t); + PMACC_ALIGN(fixedPointer, Type*); }; - typedef mutiBoxAccess::MutiBoxAccess ValueType; - typedef mutiBoxAccess::MutiBoxAccess RefValueType; - typedef MultiBox ReducedType; - - HDINLINE DataBoxType getDataBox(uint32_t nameId) - { - return DataBoxType(PitchedBox ((Type*) ((char*) fixedPointer + attributePitch * nameId), pitch, pitch2D)); - } - - HDINLINE ReducedType operator[](const int idx) - { - return ReducedType((Type*) ((char*) (this->fixedPointer) + idx * pitch2D), pitch, attributePitch); - } - - HDINLINE ReducedType operator[](const int idx) const - { - return ReducedType((Type*) ((char*) (this->fixedPointer) + idx * pitch2D), pitch, attributePitch); - } - - /** constructor - * - * @param pointer pointer to the origin of the physical memory - * @param offset offset (in elements) - * @param memSize size of the physical memory (in elements) - * @param pitch number of bytes in one line (first dimension) - */ - HDINLINE MultiBox(Type* pointer, const DataSpace &offset, const DataSpace &memSize, const size_t pitch) : - pitch(pitch), pitch2D(memSize.y() * pitch), attributePitch((memSize.y() * pitch) * size.z()), - fixedPointer((Type*) ((char*) pointer + offset[2] * (memSize.y() * pitch) + offset[1] * pitch) + offset[0]) - { - } - - /*Object must init by copy a valid instance*/ - HDINLINE MultiBox() - { - } - - /*!return the first value in the box (list) - * @return first value - */ - HDINLINE RefValueType operator*() - { - return RefValueType(fixedPointer, attributePitch); - } - - HDINLINE Type const * getPointer() const - { - return fixedPointer; - } - HDINLINE Type* getPointer() - { - return fixedPointer; - } - - - PMACC_ALIGN(pitch, size_t); - PMACC_ALIGN(pitch2D, size_t); - PMACC_ALIGN(attributePitch, size_t); - PMACC_ALIGN(fixedPointer, Type*); - -}; - - -} +} // namespace pmacc diff --git a/include/pmacc/memory/boxes/PitchedBox.hpp b/include/pmacc/memory/boxes/PitchedBox.hpp index 44e2dc0976..b61b915647 100644 --- a/include/pmacc/memory/boxes/PitchedBox.hpp +++ b/include/pmacc/memory/boxes/PitchedBox.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -28,235 +28,224 @@ namespace pmacc { - -template -class PitchedBox; - -template -class PitchedBox -{ -public: - - enum - { - Dim = DIM1 + template + class PitchedBox; + + template + class PitchedBox + { + public: + enum + { + Dim = DIM1 + }; + typedef TYPE ValueType; + typedef ValueType& RefValueType; + typedef PitchedBox ReducedType; + + HDINLINE RefValueType operator[](const int idx) + { + return fixedPointer[idx]; + } + + HDINLINE RefValueType operator[](const int idx) const + { + return fixedPointer[idx]; + } + + HDINLINE PitchedBox(TYPE* pointer, const DataSpace& offset, const DataSpace&, const size_t) + : fixedPointer(pointer + offset[0]) + { + } + + HDINLINE PitchedBox(TYPE* pointer, const DataSpace& offset) : fixedPointer(pointer + offset[0]) + { + } + + HDINLINE PitchedBox(TYPE* pointer) : fixedPointer(pointer) + { + } + + /*Object must init by copy a valid instance*/ + HDINLINE PitchedBox() + { + } + + /*!return the first value in the box (list) + * @return first value + */ + HDINLINE RefValueType operator*() + { + return *(fixedPointer); + } + + HDINLINE TYPE const* getPointer() const + { + return fixedPointer; + } + HDINLINE TYPE* getPointer() + { + return fixedPointer; + } + + + protected: + PMACC_ALIGN(fixedPointer, TYPE*); }; - typedef TYPE ValueType; - typedef ValueType& RefValueType; - typedef PitchedBox ReducedType; - - HDINLINE RefValueType operator[](const int idx) - { - return fixedPointer[idx]; - } - - HDINLINE RefValueType operator[](const int idx) const - { - return fixedPointer[idx]; - } - - HDINLINE PitchedBox(TYPE* pointer, const DataSpace &offset, const DataSpace&, const size_t) : - fixedPointer(pointer + offset[0]) - { - } - - HDINLINE PitchedBox(TYPE* pointer, const DataSpace &offset) : - fixedPointer(pointer + offset[0]) - { - } - - HDINLINE PitchedBox(TYPE* pointer) : - fixedPointer(pointer) - { - } - - /*Object must init by copy a valid instance*/ - HDINLINE PitchedBox() - { - } - - /*!return the first value in the box (list) - * @return first value - */ - HDINLINE RefValueType operator*() - { - return *(fixedPointer); - } - HDINLINE TYPE const * getPointer() const - { - return fixedPointer; - } - HDINLINE TYPE* getPointer() - { - return fixedPointer; - } - - -protected: - - PMACC_ALIGN(fixedPointer, TYPE*); -}; - -template -class PitchedBox -{ -public: - - enum - { - Dim = DIM2 + template + class PitchedBox + { + public: + enum + { + Dim = DIM2 + }; + typedef TYPE ValueType; + typedef ValueType& RefValueType; + typedef PitchedBox ReducedType; + + HDINLINE PitchedBox(TYPE* pointer, const DataSpace& offset, const DataSpace&, const size_t pitch) + : pitch(pitch) + , fixedPointer((TYPE*) ((char*) pointer + offset[1] * pitch) + offset[0]) + { + } + + HDINLINE PitchedBox(TYPE* pointer, size_t pitch) : pitch(pitch), fixedPointer(pointer) + { + } + + /*Object must init by copy a valid instance*/ + HDINLINE PitchedBox() + { + } + + HDINLINE ReducedType operator[](const int idx) + { + return ReducedType((TYPE*) ((char*) this->fixedPointer + idx * pitch)); + } + + HDINLINE ReducedType operator[](const int idx) const + { + return ReducedType((TYPE*) ((char*) this->fixedPointer + idx * pitch)); + } + + HDINLINE PitchedBox(TYPE* pointer, const DataSpace& offset, size_t pitch) + : pitch(pitch) + , fixedPointer((TYPE*) ((char*) pointer + offset[1] * pitch) + offset[0]) + { + } + + /*!return the first value in the box (list) + * @return first value + */ + HDINLINE RefValueType operator*() + { + return *((TYPE*) fixedPointer); + } + + HDINLINE TYPE const* getPointer() const + { + return fixedPointer; + } + HDINLINE TYPE* getPointer() + { + return fixedPointer; + } + + protected: + PMACC_ALIGN(pitch, size_t); + PMACC_ALIGN(fixedPointer, TYPE*); }; - typedef TYPE ValueType; - typedef ValueType& RefValueType; - typedef PitchedBox ReducedType; - - HDINLINE PitchedBox(TYPE* pointer, const DataSpace &offset, const DataSpace&, const size_t pitch) : - pitch(pitch), - fixedPointer((TYPE*) ((char*) pointer + offset[1] * pitch) + offset[0]) - { - } - - HDINLINE PitchedBox(TYPE* pointer, size_t pitch) : - pitch(pitch), - fixedPointer(pointer) - { - } - - /*Object must init by copy a valid instance*/ - HDINLINE PitchedBox() - { - } - - HDINLINE ReducedType operator[](const int idx) - { - return ReducedType((TYPE*) ((char*) this->fixedPointer + idx * pitch)); - } - - HDINLINE ReducedType operator[](const int idx) const - { - return ReducedType((TYPE*) ((char*) this->fixedPointer + idx * pitch)); - } - HDINLINE PitchedBox(TYPE* pointer, const DataSpace& offset, size_t pitch) : - pitch(pitch), - fixedPointer((TYPE*) ((char*) pointer + offset[1] * pitch) + offset[0]) - { - } - - /*!return the first value in the box (list) - * @return first value - */ - HDINLINE RefValueType operator*() - { - return *((TYPE*) fixedPointer); - } - - HDINLINE TYPE const * getPointer() const - { - return fixedPointer; - } - HDINLINE TYPE* getPointer() - { - return fixedPointer; - } - -protected: - - PMACC_ALIGN(pitch, size_t); - PMACC_ALIGN(fixedPointer, TYPE*); - -}; - -template -class PitchedBox -{ -public: - - enum - { - Dim = DIM3 + template + class PitchedBox + { + public: + enum + { + Dim = DIM3 + }; + typedef TYPE ValueType; + typedef ValueType& RefValueType; + typedef PitchedBox ReducedType; + + HDINLINE ReducedType operator[](const int idx) + { + return ReducedType((TYPE*) ((char*) (this->fixedPointer) + idx * pitch2D), pitch); + } + + HDINLINE ReducedType operator[](const int idx) const + { + return ReducedType((TYPE*) ((char*) (this->fixedPointer) + idx * pitch2D), pitch); + } + + /** constructor + * + * @param pointer pointer to the origin of the physical memory + * @param offset offset (in elements) + * @param memSize size of the physical memory (in elements) + * @param pitch number of bytes in one line (first dimension) + */ + HDINLINE PitchedBox( + TYPE* pointer, + const DataSpace& offset, + const DataSpace& memSize, + const size_t pitch) + : pitch(pitch) + , pitch2D(memSize[1] * pitch) + , fixedPointer( + (TYPE*) ((char*) pointer + offset[2] * (memSize[1] * pitch) + offset[1] * pitch) + offset[0]) + { + } + + HDINLINE PitchedBox(TYPE* pointer, const size_t pitch, const size_t pitch2D) + : pitch(pitch) + , pitch2D(pitch2D) + , fixedPointer(pointer) + { + } + + /*Object must init by copy a valid instance*/ + HDINLINE PitchedBox() + { + } + + /*!return the first value in the box (list) + * @return first value + */ + HDINLINE RefValueType operator*() + { + return *(fixedPointer); + } + + HDINLINE TYPE const* getPointer() const + { + return fixedPointer; + } + HDINLINE TYPE* getPointer() + { + return fixedPointer; + } + + HDINLINE pmacc::cursor::BufferCursor toCursor() const + { + return pmacc::cursor::BufferCursor( + (TYPE*) fixedPointer, + ::pmacc::math::Size_t<2>(pitch, pitch2D)); + } + + protected: + HDINLINE PitchedBox reduceZ(const int zOffset) const + { + return PitchedBox((TYPE*) ((char*) (this->fixedPointer) + pitch2D * zOffset), pitch); + } + + + PMACC_ALIGN(pitch, size_t); + PMACC_ALIGN(pitch2D, size_t); + PMACC_ALIGN(fixedPointer, TYPE*); }; - typedef TYPE ValueType; - typedef ValueType& RefValueType; - typedef PitchedBox ReducedType; - - HDINLINE ReducedType operator[](const int idx) - { - return ReducedType((TYPE*) ((char*) (this->fixedPointer) + idx * pitch2D), pitch); - } - - HDINLINE ReducedType operator[](const int idx) const - { - return ReducedType((TYPE*) ((char*) (this->fixedPointer) + idx * pitch2D), pitch); - } - - /** constructor - * - * @param pointer pointer to the origin of the physical memory - * @param offset offset (in elements) - * @param memSize size of the physical memory (in elements) - * @param pitch number of bytes in one line (first dimension) - */ - HDINLINE PitchedBox(TYPE* pointer, const DataSpace &offset, const DataSpace &memSize, const size_t pitch) : - pitch(pitch), pitch2D(memSize[1] * pitch), - fixedPointer((TYPE*) ((char*) pointer + offset[2] * (memSize[1] * pitch) + offset[1] * pitch) + offset[0]) - { - } - - HDINLINE PitchedBox(TYPE* pointer, const size_t pitch, const size_t pitch2D) : - pitch(pitch), pitch2D(pitch2D), - fixedPointer(pointer) - { - } - - /*Object must init by copy a valid instance*/ - HDINLINE PitchedBox() - { - } - - /*!return the first value in the box (list) - * @return first value - */ - HDINLINE RefValueType operator*() - { - return *(fixedPointer); - } - - HDINLINE TYPE const * getPointer() const - { - return fixedPointer; - } - HDINLINE TYPE* getPointer() - { - return fixedPointer; - } - - HDINLINE pmacc::cursor::BufferCursor - toCursor() const - { - return pmacc::cursor::BufferCursor - ((TYPE*)fixedPointer, ::pmacc::math::Size_t<2>(pitch, pitch2D)); - } - -protected: - - HDINLINE PitchedBox reduceZ(const int zOffset) const - { - return PitchedBox ( - (TYPE*) ((char*) (this->fixedPointer) + pitch2D * zOffset), - pitch - ); - } - - - PMACC_ALIGN(pitch, size_t); - PMACC_ALIGN(pitch2D, size_t); - PMACC_ALIGN(fixedPointer, TYPE*); - -}; - - -} +} // namespace pmacc diff --git a/include/pmacc/memory/boxes/SharedBox.hpp b/include/pmacc/memory/boxes/SharedBox.hpp index db4660701b..0cddec71a8 100644 --- a/include/pmacc/memory/boxes/SharedBox.hpp +++ b/include/pmacc/memory/boxes/SharedBox.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -32,253 +32,230 @@ namespace pmacc { - -/** create shared memory on gpu - * - * @tparam T_TYPE type of memory objects - * @tparam T_Vector CT::Vector with size description (per dimension) - * @tparam T_id unique id for this object - * (is needed if more than one instance of shared memory in one kernel is used) - * @tparam T_dim dimension of the memory (supports DIM1,DIM2 and DIM3) - */ -template -class SharedBox; - -template -class SharedBox -{ -public: - - enum - { - Dim = DIM1 - }; - typedef T_TYPE ValueType; - typedef ValueType& RefValueType; - typedef T_Vector Size; - typedef SharedBox, T_id> ReducedType; - typedef SharedBox This; - - HDINLINE RefValueType operator[](const int idx) - { - return fixedPointer[idx]; - } - - HDINLINE RefValueType operator[](const int idx) const - { - return fixedPointer[idx]; - } - - HDINLINE SharedBox(ValueType* pointer) : - fixedPointer(pointer) - { - } - - DINLINE SharedBox() : - fixedPointer(nullptr) - { - } - - /*!return the first value in the box (list) - * @return first value - */ - HDINLINE RefValueType operator*() - { - return *(fixedPointer); - } - - HDINLINE ValueType const * getPointer() const - { - return fixedPointer; - } - HDINLINE ValueType* getPointer() - { - return fixedPointer; - } - - /** create a shared memory box + /** create shared memory on gpu * - * This call synchronizes a block and must be called from all threads and - * not inside a if clauses + * @tparam T_TYPE type of memory objects + * @tparam T_Vector CT::Vector with size description (per dimension) + * @tparam T_id unique id for this object + * (is needed if more than one instance of shared memory in one kernel is used) + * @tparam T_dim dimension of the memory (supports DIM1,DIM2 and DIM3) */ - template< typename T_Acc > - static DINLINE SharedBox - init( T_Acc const & acc ) - { - auto& mem_sh = pmacc::memory::shared::allocate< - T_id, - memory::Array< - ValueType, - math::CT::volume< Size >::type::value - > - >( acc ); - return SharedBox( mem_sh.data() ); - } - -protected: - - PMACC_ALIGN(fixedPointer, ValueType*); -}; + template + class SharedBox; -template -class SharedBox -{ -public: - - enum + template + class SharedBox { - Dim = DIM2 + public: + enum + { + Dim = DIM1 + }; + typedef T_TYPE ValueType; + typedef ValueType& RefValueType; + typedef T_Vector Size; + typedef SharedBox, T_id> ReducedType; + typedef SharedBox This; + + HDINLINE RefValueType operator[](const int idx) + { + return fixedPointer[idx]; + } + + HDINLINE RefValueType operator[](const int idx) const + { + return fixedPointer[idx]; + } + + HDINLINE SharedBox(ValueType* pointer) : fixedPointer(pointer) + { + } + + DINLINE SharedBox() : fixedPointer(nullptr) + { + } + + /*!return the first value in the box (list) + * @return first value + */ + HDINLINE RefValueType operator*() + { + return *(fixedPointer); + } + + HDINLINE ValueType const* getPointer() const + { + return fixedPointer; + } + HDINLINE ValueType* getPointer() + { + return fixedPointer; + } + + /** create a shared memory box + * + * This call synchronizes a block and must be called from all threads and + * not inside a if clauses + */ + template + static DINLINE SharedBox init(T_Acc const& acc) + { + auto& mem_sh + = pmacc::memory::shared::allocate::type::value>>( + acc); + return SharedBox(mem_sh.data()); + } + + protected: + PMACC_ALIGN(fixedPointer, ValueType*); }; - typedef T_TYPE ValueType; - typedef ValueType& RefValueType; - typedef T_Vector Size; - typedef SharedBox, T_id > ReducedType; - typedef SharedBox This; - - HDINLINE SharedBox(ValueType* pointer = nullptr) : - fixedPointer(pointer) - { - } - - HDINLINE ReducedType operator[](const int idx) - { - return ReducedType(this->fixedPointer + idx * Size::x::value); - } - HDINLINE ReducedType operator[](const int idx) const + template + class SharedBox { - return ReducedType(this->fixedPointer + idx * Size::x::value); - } - - /*!return the first value in the box (list) - * @return first value - */ - HDINLINE RefValueType operator*() - { - return *((ValueType*) fixedPointer); - } - - HDINLINE ValueType const * getPointer() const - { - return fixedPointer; - } - HDINLINE ValueType* getPointer() - { - return fixedPointer; - } - - /** create a shared memory box - * - * This call synchronizes a block and must be called from all threads and - * not inside a if clauses - */ - template< typename T_Acc > - static DINLINE SharedBox - init( T_Acc const & acc ) - { - auto& mem_sh = pmacc::memory::shared::allocate< - T_id, - memory::Array< - ValueType, - math::CT::volume< Size >::type::value - > - >( acc ); - return SharedBox( mem_sh.data() ); - } - - HDINLINE pmacc::cursor::CT::BufferCursor > - toCursor() const - { - return pmacc::cursor::CT::BufferCursor > - ((ValueType*) fixedPointer); - } - -protected: - - PMACC_ALIGN(fixedPointer, ValueType*); -}; - -template -class SharedBox -{ -public: - - enum - { - Dim = DIM3 + public: + enum + { + Dim = DIM2 + }; + typedef T_TYPE ValueType; + typedef ValueType& RefValueType; + typedef T_Vector Size; + typedef SharedBox, T_id> ReducedType; + typedef SharedBox This; + + HDINLINE SharedBox(ValueType* pointer = nullptr) : fixedPointer(pointer) + { + } + + HDINLINE ReducedType operator[](const int idx) + { + return ReducedType(this->fixedPointer + idx * Size::x::value); + } + + HDINLINE ReducedType operator[](const int idx) const + { + return ReducedType(this->fixedPointer + idx * Size::x::value); + } + + /*!return the first value in the box (list) + * @return first value + */ + HDINLINE RefValueType operator*() + { + return *((ValueType*) fixedPointer); + } + + HDINLINE ValueType const* getPointer() const + { + return fixedPointer; + } + HDINLINE ValueType* getPointer() + { + return fixedPointer; + } + + /** create a shared memory box + * + * This call synchronizes a block and must be called from all threads and + * not inside a if clauses + */ + template + static DINLINE SharedBox init(T_Acc const& acc) + { + auto& mem_sh + = pmacc::memory::shared::allocate::type::value>>( + acc); + return SharedBox(mem_sh.data()); + } + + HDINLINE pmacc::cursor::CT::BufferCursor> + toCursor() const + { + return pmacc::cursor::CT:: + BufferCursor>( + (ValueType*) fixedPointer); + } + + protected: + PMACC_ALIGN(fixedPointer, ValueType*); }; - typedef T_TYPE ValueType; - typedef ValueType& RefValueType; - typedef T_Vector Size; - typedef SharedBox, T_id > ReducedType; - typedef SharedBox This; - - HDINLINE ReducedType operator[](const int idx) - { - return ReducedType(this->fixedPointer + idx * (Size::x::value * Size::y::value)); - } - - HDINLINE ReducedType operator[](const int idx) const - { - return ReducedType(this->fixedPointer + idx * (Size::x::value *Size::y::value)); - } - HDINLINE SharedBox(ValueType* pointer = nullptr) : - fixedPointer(pointer) + template + class SharedBox { - } - - /*!return the first value in the box (list) - * @return first value - */ - HDINLINE RefValueType operator*() - { - return *(fixedPointer); - } - - HDINLINE ValueType const * getPointer() const - { - return fixedPointer; - } - HDINLINE ValueType* getPointer() - { - return fixedPointer; - } - - HDINLINE pmacc::cursor::CT::BufferCursor > - toCursor() const - { - return pmacc::cursor::CT::BufferCursor > - ((ValueType*)fixedPointer); - } - - /** create a shared memory box - * - * This call synchronizes a block and must be called from all threads and - * not inside a if clauses - */ - template< typename T_Acc > - static DINLINE SharedBox - init( T_Acc const & acc ) - { - auto& mem_sh = pmacc::memory::shared::allocate< - T_id, - memory::Array< + public: + enum + { + Dim = DIM3 + }; + typedef T_TYPE ValueType; + typedef ValueType& RefValueType; + typedef T_Vector Size; + typedef SharedBox, T_id> ReducedType; + typedef SharedBox This; + + HDINLINE ReducedType operator[](const int idx) + { + return ReducedType(this->fixedPointer + idx * (Size::x::value * Size::y::value)); + } + + HDINLINE ReducedType operator[](const int idx) const + { + return ReducedType(this->fixedPointer + idx * (Size::x::value * Size::y::value)); + } + + HDINLINE SharedBox(ValueType* pointer = nullptr) : fixedPointer(pointer) + { + } + + /*!return the first value in the box (list) + * @return first value + */ + HDINLINE RefValueType operator*() + { + return *(fixedPointer); + } + + HDINLINE ValueType const* getPointer() const + { + return fixedPointer; + } + HDINLINE ValueType* getPointer() + { + return fixedPointer; + } + + HDINLINE pmacc::cursor::CT::BufferCursor< + ValueType, + ::pmacc::math::CT:: + Int> + toCursor() const + { + return pmacc::cursor::CT::BufferCursor< ValueType, - math::CT::volume< Size >::type::value - > - >( acc ); - return SharedBox( mem_sh.data() ); - } - -protected: - - PMACC_ALIGN(fixedPointer, ValueType*); - -}; - + ::pmacc::math::CT:: + Int>( + (ValueType*) fixedPointer); + } + + /** create a shared memory box + * + * This call synchronizes a block and must be called from all threads and + * not inside a if clauses + */ + template + static DINLINE SharedBox init(T_Acc const& acc) + { + auto& mem_sh + = pmacc::memory::shared::allocate::type::value>>( + acc); + return SharedBox(mem_sh.data()); + } + + protected: + PMACC_ALIGN(fixedPointer, ValueType*); + }; -} +} // namespace pmacc diff --git a/include/pmacc/memory/buffers/Buffer.hpp b/include/pmacc/memory/buffers/Buffer.hpp index 26fae41b6a..7916336a7b 100644 --- a/include/pmacc/memory/buffers/Buffer.hpp +++ b/include/pmacc/memory/buffers/Buffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -32,19 +32,17 @@ namespace pmacc { - /** * Minimal function description of a buffer, * * @tparam TYPE data type stored in the buffer * @tparam DIM dimension of the buffer (1-3) */ - template + template class Buffer { public: - - typedef DataBox > DataBoxType; + typedef DataBox> DataBoxType; /** constructor * @@ -53,10 +51,13 @@ namespace pmacc * can be less than `physicalMemorySize` * @param physicalMemorySize size of the physical memory (in elements) */ - Buffer(DataSpace size, DataSpace physicalMemorySize) : - data_space(size), data1D(true), current_size(nullptr), m_physicalMemorySize(physicalMemorySize) + Buffer(DataSpace size, DataSpace physicalMemorySize) + : data_space(size) + , data1D(true) + , current_size(nullptr) + , m_physicalMemorySize(physicalMemorySize) { - CUDA_CHECK(cudaMallocHost((void**)¤t_size, sizeof (size_t))); + CUDA_CHECK(cuplaMallocHost((void**) ¤t_size, sizeof(size_t))); *current_size = size.productOfComponents(); } @@ -65,7 +66,7 @@ namespace pmacc */ virtual ~Buffer() { - CUDA_CHECK_NO_EXCEPT(cudaFreeHost(current_size)); + CUDA_CHECK_NO_EXCEPT(cuplaFreeHost(current_size)); } /*! Get base pointer to memory @@ -110,41 +111,44 @@ namespace pmacc int64_t current_size = static_cast(currentSize); //!\todo: current size can be changed if it is a DeviceBuffer and current size is on device - //call first get current size (but const not allow this) + // call first get current size (but const not allow this) - if (DIM == DIM1) + if(DIM == DIM1) { tmp[0] = current_size; } - if (DIM == DIM2) + if(DIM == DIM2) { - if (current_size <= data_space[0]) + if(current_size <= data_space[0]) { tmp[0] = current_size; tmp[1] = 1; - } else + } + else { tmp[0] = data_space[0]; - tmp[1] = (current_size+data_space[0]-1) / data_space[0]; + tmp[1] = (current_size + data_space[0] - 1) / data_space[0]; } } - if (DIM == DIM3) + if(DIM == DIM3) { - if (current_size <= data_space[0]) + if(current_size <= data_space[0]) { tmp[0] = current_size; tmp[1] = 1; tmp[2] = 1; - } else if (current_size <= (data_space[0] * data_space[1])) + } + else if(current_size <= (data_space[0] * data_space[1])) { tmp[0] = data_space[0]; - tmp[1] = (current_size+data_space[0]-1) / data_space[0]; + tmp[1] = (current_size + data_space[0] - 1) / data_space[0]; tmp[2] = 1; - } else + } + else { tmp[0] = data_space[0]; tmp[1] = data_space[1]; - tmp[2] = (current_size+(data_space[0] * data_space[1])-1) / (data_space[0] * data_space[1]); + tmp[2] = (current_size + (data_space[0] * data_space[1]) - 1) / (data_space[0] * data_space[1]); } } @@ -170,11 +174,11 @@ namespace pmacc *current_size = newsize; } - virtual void reset(bool preserveData = false)=0; + virtual void reset(bool preserveData = false) = 0; - virtual void setValue(const TYPE& value)=0; + virtual void setValue(const TYPE& value) = 0; - virtual DataBox > getDataBox()=0; + virtual DataBox> getDataBox() = 0; inline bool is1D() { @@ -182,7 +186,6 @@ namespace pmacc } protected: - /*! Check if my DataSpace is greater than other. * @param other other DataSpace * @return true if my DataSpace (one dimension) is greater than other, false otherwise @@ -195,10 +198,9 @@ namespace pmacc DataSpace data_space; DataSpace m_physicalMemorySize; - size_t *current_size; + size_t* current_size; bool data1D; - }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/memory/buffers/DeviceBuffer.hpp b/include/pmacc/memory/buffers/DeviceBuffer.hpp index 5ad1c73eb9..1a2f344070 100644 --- a/include/pmacc/memory/buffers/DeviceBuffer.hpp +++ b/include/pmacc/memory/buffers/DeviceBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * Alexander Grund * * This file is part of PMacc. @@ -31,18 +31,16 @@ #include "pmacc/types.hpp" - - #include namespace pmacc { class EventTask; - template + template class HostBuffer; - template + template class Buffer; /** @@ -51,11 +49,10 @@ namespace pmacc * @tparam TYPE datatype of the buffer * @tparam DIM dimension of the buffer */ - template + template class DeviceBuffer : public Buffer { protected: - /** constructor * * @param size extent for each dimension (in elements) @@ -63,36 +60,35 @@ namespace pmacc * can be less than `physicalMemorySize` * @param physicalMemorySize size of the physical memory (in elements) */ - DeviceBuffer(DataSpace size, DataSpace physicalMemorySize) : - Buffer(size, physicalMemorySize) + DeviceBuffer(DataSpace size, DataSpace physicalMemorySize) + : Buffer(size, physicalMemorySize) { - } public: - using Buffer::setCurrentSize; //!\todo :this function was hidden, I don't know why. /** * Destructor. */ - virtual ~DeviceBuffer() - { - }; + virtual ~DeviceBuffer(){}; HINLINE - container::CartBuffer, - copier::D2DCopier, - assigner::DeviceMemAssigner<> > + container::CartBuffer< + TYPE, + DIM, + allocator::DeviceMemAllocator, + copier::D2DCopier, + assigner::DeviceMemAssigner<>> cartBuffer() const { - cudaPitchedPtr cudaData = this->getCudaPitched(); + cuplaPitchedPtr cuplaData = this->getCudaPitched(); math::Size_t pitch; if(DIM >= 2) - pitch[0] = cudaData.pitch; + pitch[0] = cuplaData.pitch; if(DIM == 3) pitch[1] = pitch[0] * this->getPhysicalMemorySize()[1]; - container::DeviceBuffer result((TYPE*)cudaData.ptr, this->getDataSpace(), false, pitch); + container::DeviceBuffer result((TYPE*) cuplaData.ptr, this->getDataSpace(), false, pitch); return result; } @@ -121,7 +117,7 @@ namespace pmacc * * @return pointer to stored value on host side */ - virtual size_t* getCurrentSizeHostSidePointer()=0; + virtual size_t* getCurrentSizeHostSidePointer() = 0; /** * Sets current size of any dimension. @@ -135,11 +131,11 @@ namespace pmacc virtual void setCurrentSize(const size_t size) = 0; /** - * Returns the internal pitched cuda pointer. + * Returns the internal pitched cupla pointer. * - * @return internal pitched cuda pointer + * @return internal pitched cupla pointer */ - virtual const cudaPitchedPtr getCudaPitched() const = 0; + virtual const cuplaPitchedPtr getCudaPitched() const = 0; /** get line pitch of memory in byte * @@ -160,7 +156,6 @@ namespace pmacc * @param other the DeviceBuffer to copy from */ virtual void copyFrom(DeviceBuffer& other) = 0; - }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/memory/buffers/DeviceBufferIntern.hpp b/include/pmacc/memory/buffers/DeviceBufferIntern.hpp index f056b3ddac..b5618b41bd 100644 --- a/include/pmacc/memory/buffers/DeviceBufferIntern.hpp +++ b/include/pmacc/memory/buffers/DeviceBufferIntern.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -26,293 +26,289 @@ #include "pmacc/eventSystem/tasks/Factory.hpp" #include "pmacc/memory/buffers/DeviceBuffer.hpp" #include "pmacc/memory/boxes/DataBox.hpp" +#include "pmacc/memory/Array.hpp" #include "pmacc/assert.hpp" namespace pmacc { - -/** - * Internal device buffer implementation. - */ -template -class DeviceBufferIntern : public DeviceBuffer -{ -public: - - typedef typename DeviceBuffer::DataBoxType DataBoxType; - - /*! create device buffer - * @param size extent for each dimension (in elements) - * @param sizeOnDevice memory with the current size of the grid is stored on device - * @param useVectorAsBase use a vector as base of the array (is not lined pitched) - * if true size on device is atomaticly set to false + /** + * Internal device buffer implementation. */ - DeviceBufferIntern(DataSpace size, bool sizeOnDevice = false, bool useVectorAsBase = false) : - DeviceBuffer(size, size), - sizeOnDevice(sizeOnDevice), - useOtherMemory(false), - offset(DataSpace()) + template + class DeviceBufferIntern : public DeviceBuffer { - //create size on device before any use of setCurrentSize - if (useVectorAsBase) + public: + typedef typename DeviceBuffer::DataBoxType DataBoxType; + + /*! create device buffer + * @param size extent for each dimension (in elements) + * @param sizeOnDevice memory with the current size of the grid is stored on device + * @param useVectorAsBase use a vector as base of the array (is not lined pitched) + * if true size on device is atomaticly set to false + */ + DeviceBufferIntern(DataSpace size, bool sizeOnDevice = false, bool useVectorAsBase = false) + : DeviceBuffer(size, size) + , sizeOnDevice(sizeOnDevice) + , useOtherMemory(false) + , offset(DataSpace()) { - this->sizeOnDevice = false; - createSizeOnDevice(this->sizeOnDevice); - createFakeData(); - this->data1D = true; + // create size on device before any use of setCurrentSize + if(useVectorAsBase) + { + this->sizeOnDevice = false; + createSizeOnDevice(this->sizeOnDevice); + createFakeData(); + this->data1D = true; + } + else + { + createSizeOnDevice(this->sizeOnDevice); + createData(); + this->data1D = false; + } } - else + + DeviceBufferIntern( + DeviceBuffer& source, + DataSpace size, + DataSpace offset, + bool sizeOnDevice = false) + : DeviceBuffer(size, source.getPhysicalMemorySize()) + , sizeOnDevice(sizeOnDevice) + , offset(offset + source.getOffset()) + , data(source.getCudaPitched()) + , useOtherMemory(true) { - createSizeOnDevice(this->sizeOnDevice); - createData(); + createSizeOnDevice(sizeOnDevice); this->data1D = false; } - } - - DeviceBufferIntern(DeviceBuffer& source, DataSpace size, DataSpace offset, bool sizeOnDevice = false) : - DeviceBuffer(size, source.getPhysicalMemorySize()), - sizeOnDevice(sizeOnDevice), - offset(offset + source.getOffset()), - data(source.getCudaPitched()), - useOtherMemory(true) - { - createSizeOnDevice(sizeOnDevice); - this->data1D = false; - } - - virtual ~DeviceBufferIntern() - { - __startOperation(ITask::TASK_CUDA); - - if (sizeOnDevice) - { - CUDA_CHECK_NO_EXCEPT(cudaFree(sizeOnDevicePtr)); - } - if (!useOtherMemory) + virtual ~DeviceBufferIntern() { - CUDA_CHECK_NO_EXCEPT(cudaFree(data.ptr)); + __startOperation(ITask::TASK_DEVICE); + if(sizeOnDevice) + { + CUDA_CHECK_NO_EXCEPT(cuplaFree(sizeOnDevicePtr)); + } + if(!useOtherMemory) + { + CUDA_CHECK_NO_EXCEPT(cuplaFree(data.ptr)); + } } - } - - void reset(bool preserveData = true) - { - this->setCurrentSize(Buffer::getDataSpace().productOfComponents()); - __startOperation(ITask::TASK_CUDA); - if (!preserveData) + void reset(bool preserveData = true) { - TYPE value; - /* using `uint8_t` for byte-wise looping through tmp var value of `TYPE` */ - uint8_t* valuePtr = reinterpret_cast(&value); - for( size_t b = 0; b < sizeof(TYPE); ++b) + this->setCurrentSize(Buffer::getDataSpace().productOfComponents()); + + __startOperation(ITask::TASK_DEVICE); + if(!preserveData) { - valuePtr[b] = static_cast(0); + // Using Array is a workaround for types without default constructor + memory::Array tmp; + memset(reinterpret_cast(tmp.data()), 0, sizeof(tmp)); + // use first element to avoid issue because Array is aligned (sizeof can be larger than component type) + setValue(tmp[0]); } - /* set value with zero-ed `TYPE` */ - setValue(value); } - } - - DataBoxType getDataBox() - { - __startOperation(ITask::TASK_CUDA); - return DataBoxType(PitchedBox ((TYPE*) data.ptr, offset, - this->getPhysicalMemorySize(), data.pitch)); - } - TYPE* getPointer() - { - __startOperation(ITask::TASK_CUDA); - - if (DIM == DIM1) + DataBoxType getDataBox() { - return (TYPE*) (data.ptr) + this->offset[0]; + __startOperation(ITask::TASK_DEVICE); + return DataBoxType( + PitchedBox((TYPE*) data.ptr, offset, this->getPhysicalMemorySize(), data.pitch)); } - else if (DIM == DIM2) + + TYPE* getPointer() { - return (TYPE*) ((char*) data.ptr + this->offset[1] * this->data.pitch) + this->offset[0]; + __startOperation(ITask::TASK_DEVICE); + + if(DIM == DIM1) + { + return (TYPE*) (data.ptr) + this->offset[0]; + } + else if(DIM == DIM2) + { + return (TYPE*) ((char*) data.ptr + this->offset[1] * this->data.pitch) + this->offset[0]; + } + else + { + const size_t offsetY = this->offset[1] * this->data.pitch; + const size_t sizePlaneXY = this->getPhysicalMemorySize()[1] * this->data.pitch; + return (TYPE*) ((char*) data.ptr + this->offset[2] * sizePlaneXY + offsetY) + this->offset[0]; + } } - else + + DataSpace getOffset() const { - const size_t offsetY = this->offset[1] * this->data.pitch; - const size_t sizePlaneXY = this->getPhysicalMemorySize()[1] * this->data.pitch; - return (TYPE*) ((char*) data.ptr + this->offset[2] * sizePlaneXY + offsetY) + this->offset[0]; + return offset; } - } - - DataSpace getOffset() const - { - return offset; - } - bool hasCurrentSizeOnDevice() const - { - return sizeOnDevice; - } - - size_t* getCurrentSizeOnDevicePointer() - { - __startOperation(ITask::TASK_CUDA); - if (!sizeOnDevice) + bool hasCurrentSizeOnDevice() const { - throw std::runtime_error("Buffer has no size on device!, currentSize is only stored on host side."); + return sizeOnDevice; } - return sizeOnDevicePtr; - } - - size_t* getCurrentSizeHostSidePointer() - { - __startOperation(ITask::TASK_HOST); - return this->current_size; - } - - TYPE* getBasePointer() - { - __startOperation(ITask::TASK_CUDA); - return (TYPE*) data.ptr; - } - /*! Get current size of any dimension - * @return count of current elements per dimension - */ - virtual size_t getCurrentSize() - { - if (sizeOnDevice) + size_t* getCurrentSizeOnDevicePointer() { - __startTransaction(__getTransactionEvent()); - Environment<>::get().Factory().createTaskGetCurrentSizeFromDevice(*this); - __endTransaction().waitForFinished(); + __startOperation(ITask::TASK_DEVICE); + if(!sizeOnDevice) + { + throw std::runtime_error("Buffer has no size on device!, currentSize is only stored on host side."); + } + return sizeOnDevicePtr; } - return DeviceBuffer::getCurrentSize(); - } - - virtual void setCurrentSize(const size_t size) - { - Buffer::setCurrentSize(size); - - if (sizeOnDevice) + size_t* getCurrentSizeHostSidePointer() { - Environment<>::get().Factory().createTaskSetCurrentSizeOnDevice( - *this, size); + __startOperation(ITask::TASK_HOST); + return this->current_size; } - } - - void copyFrom(HostBuffer& other) - { - - PMACC_ASSERT(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); - Environment<>::get().Factory().createTaskCopyHostToDevice(other, *this); - - } - - void copyFrom(DeviceBuffer& other) - { - PMACC_ASSERT(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); - Environment<>::get().Factory().createTaskCopyDeviceToDevice(other, *this); - - } - - const cudaPitchedPtr getCudaPitched() const - { - __startOperation(ITask::TASK_CUDA); - return data; - } + TYPE* getBasePointer() + { + __startOperation(ITask::TASK_DEVICE); + return (TYPE*) data.ptr; + } - size_t getPitch() const - { - return data.pitch; - } + /*! Get current size of any dimension + * @return count of current elements per dimension + */ + virtual size_t getCurrentSize() + { + if(sizeOnDevice) + { + __startTransaction(__getTransactionEvent()); + Environment<>::get().Factory().createTaskGetCurrentSizeFromDevice(*this); + __endTransaction().waitForFinished(); + } - virtual void setValue(const TYPE& value) - { - Environment<>::get().Factory().createTaskSetValue(*this, value); - }; + return DeviceBuffer::getCurrentSize(); + } -private: + virtual void setCurrentSize(const size_t size) + { + Buffer::setCurrentSize(size); - /*! create native array with pitched lines - */ - void createData() - { - __startOperation(ITask::TASK_CUDA); - data.ptr = nullptr; - data.pitch = 1; - data.xsize = this->getDataSpace()[0] * sizeof (TYPE); - data.ysize = 1; + if(sizeOnDevice) + { + Environment<>::get().Factory().createTaskSetCurrentSizeOnDevice(*this, size); + } + } - if (DIM == DIM1) + void copyFrom(HostBuffer& other) { - log("Create device 1D data: %1% MiB") % (data.xsize / 1024 / 1024); - CUDA_CHECK(cudaMallocPitch(&data.ptr, &data.pitch, data.xsize, 1)); + PMACC_ASSERT(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); + Environment<>::get().Factory().createTaskCopyHostToDevice(other, *this); } - if (DIM == DIM2) - { - data.ysize = this->getDataSpace()[1]; - log("Create device 2D data: %1% MiB") % (data.xsize * data.ysize / 1024 / 1024); - CUDA_CHECK(cudaMallocPitch(&data.ptr, &data.pitch, data.xsize, data.ysize)); - } - if (DIM == DIM3) + void copyFrom(DeviceBuffer& other) { - cudaExtent extent; - extent.width = this->getDataSpace()[0] * sizeof (TYPE); - extent.height = this->getDataSpace()[1]; - extent.depth = this->getDataSpace()[2]; + PMACC_ASSERT(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); + Environment<>::get().Factory().createTaskCopyDeviceToDevice(other, *this); + } - log("Create device 3D data: %1% MiB") % (this->getDataSpace().productOfComponents() * sizeof (TYPE) / 1024 / 1024); - CUDA_CHECK(cudaMalloc3D(&data, extent)); + const cuplaPitchedPtr getCudaPitched() const + { + __startOperation(ITask::TASK_DEVICE); + return data; } - reset(false); - } + size_t getPitch() const + { + return data.pitch; + } - /*!create 1D, 2D, 3D Array which use only a vector as base - */ - void createFakeData() - { - __startOperation(ITask::TASK_CUDA); - data.ptr = nullptr; - data.pitch = 1; - data.xsize = this->getDataSpace()[0] * sizeof (TYPE); - data.ysize = 1; + virtual void setValue(const TYPE& value) + { + Environment<>::get().Factory().createTaskSetValue(*this, value); + }; - log("Create device fake data: %1% MiB") % (this->getDataSpace().productOfComponents() * sizeof (TYPE) / 1024 / 1024); - CUDA_CHECK(cudaMallocPitch(&data.ptr, &data.pitch, this->getDataSpace().productOfComponents() * sizeof (TYPE), 1)); + private: + /*! create native array with pitched lines + */ + void createData() + { + __startOperation(ITask::TASK_DEVICE); + data.ptr = nullptr; + data.pitch = 1; + data.xsize = this->getDataSpace()[0] * sizeof(TYPE); + data.ysize = 1; - //fake the pitch, thus we can use this 1D Buffer as 2D or 3D - data.pitch = this->getDataSpace()[0] * sizeof (TYPE); + if(DIM == DIM1) + { + log("Create device 1D data: %1% MiB") % (data.xsize / 1024 / 1024); + CUDA_CHECK(cuplaMallocPitch(&data.ptr, &data.pitch, data.xsize, 1)); + } + if(DIM == DIM2) + { + data.ysize = this->getDataSpace()[1]; + log("Create device 2D data: %1% MiB") % (data.xsize * data.ysize / 1024 / 1024); + CUDA_CHECK(cuplaMallocPitch(&data.ptr, &data.pitch, data.xsize, data.ysize)); + } + if(DIM == DIM3) + { + cuplaExtent extent; + extent.width = this->getDataSpace()[0] * sizeof(TYPE); + extent.height = this->getDataSpace()[1]; + extent.depth = this->getDataSpace()[2]; + + log("Create device 3D data: %1% MiB") + % (this->getDataSpace().productOfComponents() * sizeof(TYPE) / 1024 / 1024); + CUDA_CHECK(cuplaMalloc3D(&data, extent)); + } - if (DIM > DIM1) - { - data.ysize = this->getDataSpace()[1]; + reset(false); } - reset(false); - } + /*!create 1D, 2D, 3D Array which use only a vector as base + */ + void createFakeData() + { + __startOperation(ITask::TASK_DEVICE); + data.ptr = nullptr; + data.pitch = 1; + data.xsize = this->getDataSpace()[0] * sizeof(TYPE); + data.ysize = 1; + + log("Create device fake data: %1% MiB") + % (this->getDataSpace().productOfComponents() * sizeof(TYPE) / 1024 / 1024); + CUDA_CHECK(cuplaMallocPitch( + &data.ptr, + &data.pitch, + this->getDataSpace().productOfComponents() * sizeof(TYPE), + 1)); + + // fake the pitch, thus we can use this 1D Buffer as 2D or 3D + data.pitch = this->getDataSpace()[0] * sizeof(TYPE); + + if(DIM > DIM1) + { + data.ysize = this->getDataSpace()[1]; + } - void createSizeOnDevice(bool sizeOnDevice) - { - __startOperation(ITask::TASK_HOST); - sizeOnDevicePtr = nullptr; + reset(false); + } - if (sizeOnDevice) + void createSizeOnDevice(bool sizeOnDevice) { - CUDA_CHECK(cudaMalloc((void**)&sizeOnDevicePtr, sizeof (size_t))); + __startOperation(ITask::TASK_HOST); + sizeOnDevicePtr = nullptr; + + if(sizeOnDevice) + { + CUDA_CHECK(cuplaMalloc((void**) &sizeOnDevicePtr, sizeof(size_t))); + } + setCurrentSize(this->getDataSpace().productOfComponents()); } - setCurrentSize(this->getDataSpace().productOfComponents()); - } -private: - DataSpace offset; + private: + DataSpace offset; - bool sizeOnDevice; - size_t* sizeOnDevicePtr; - cudaPitchedPtr data; - bool useOtherMemory; -}; + bool sizeOnDevice; + size_t* sizeOnDevicePtr; + cuplaPitchedPtr data; + bool useOtherMemory; + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/memory/buffers/Exchange.hpp b/include/pmacc/memory/buffers/Exchange.hpp index 0ee14142e1..e7243b1841 100644 --- a/include/pmacc/memory/buffers/Exchange.hpp +++ b/include/pmacc/memory/buffers/Exchange.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -26,7 +26,6 @@ namespace pmacc { - /** * Interface for a DIM-dimensional buffer used for data exchange. * @@ -38,24 +37,23 @@ namespace pmacc * @tparam TYPE the datatype for internal buffers * @tparam DIM the dimension of the internal buffers */ - template + template class Exchange { public: - /** * Returns the exchange buffer on the device. * * @return Exchange buffer on device */ - virtual DeviceBuffer &getDeviceBuffer() = 0; + virtual DeviceBuffer& getDeviceBuffer() = 0; /** * Returns the exchange buffer on the host. * * @return Exchange buffer on host */ - virtual HostBuffer &getHostBuffer() = 0; + virtual HostBuffer& getHostBuffer() = 0; /** * Returns the type describing exchange directions @@ -77,21 +75,24 @@ namespace pmacc return communicationTag; } - virtual bool hasDeviceDoubleBuffer()=0; + /** + * Return the buffer which can be used for data exchange with MPI + * + * The buffer can point to device or host memory. + */ + virtual Buffer* getCommunicationBuffer() = 0; + + virtual bool hasDeviceDoubleBuffer() = 0; - virtual DeviceBuffer& getDeviceDoubleBuffer()=0; + virtual DeviceBuffer& getDeviceDoubleBuffer() = 0; protected: - - Exchange(uint32_t extype, uint32_t tag) : - exchange(extype), - communicationTag(tag) + Exchange(uint32_t extype, uint32_t tag) : exchange(extype), communicationTag(tag) { - } uint32_t exchange; uint32_t communicationTag; }; -} +} // namespace pmacc diff --git a/include/pmacc/memory/buffers/ExchangeIntern.hpp b/include/pmacc/memory/buffers/ExchangeIntern.hpp index abf0360c99..ac561a3656 100644 --- a/include/pmacc/memory/buffers/ExchangeIntern.hpp +++ b/include/pmacc/memory/buffers/ExchangeIntern.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -27,7 +27,6 @@ #include "pmacc/memory/dataTypes/Mask.hpp" #include "pmacc/memory/buffers/DeviceBufferIntern.hpp" #include "pmacc/memory/buffers/HostBufferIntern.hpp" -#include "pmacc/memory/MakeUnique.hpp" #include "pmacc/eventSystem/tasks/Factory.hpp" #include "pmacc/eventSystem/tasks/TaskReceive.hpp" @@ -39,20 +38,26 @@ namespace pmacc { - - /** - * Internal Exchange implementation. + /** Internal Exchange implementation. + * + * There will be no host double buffer available if MPI direct for PMacc is enabled. */ - template + template class ExchangeIntern : public Exchange { public: - - ExchangeIntern(DeviceBuffer& source, GridLayout memoryLayout, DataSpace guardingCells, uint32_t exchange, - uint32_t communicationTag, uint32_t area = BORDER, bool sizeOnDevice = false) : - Exchange(exchange, communicationTag) + ExchangeIntern( + DeviceBuffer& source, + GridLayout memoryLayout, + DataSpace guardingCells, + uint32_t exchange, + uint32_t communicationTag, + uint32_t area = BORDER, + bool sizeOnDevice = false) + : Exchange(exchange, communicationTag) + , deviceDoubleBuffer(nullptr) + , hostBuffer(nullptr) { - PMACC_ASSERT(!guardingCells.isOneDimensionGreaterThan(memoryLayout.getGuard())); DataSpace tmp_size = memoryLayout.getDataSpaceWithoutGuarding(); @@ -62,62 +67,56 @@ namespace pmacc DataSpace exchangeDimensions = exchangeTypeToDim(exchange); - for (uint32_t dim = 0; dim < DIM; dim++) + for(uint32_t dim = 0; dim < DIM; dim++) { - if (DIM > dim && exchangeDimensions[dim] == 1) + if(DIM > dim && exchangeDimensions[dim] == 1) tmp_size[dim] = guardingCells[dim]; } /*This is only a pointer to other device data */ using DeviceBuffer = DeviceBufferIntern; - deviceBuffer = memory::makeUnique( + deviceBuffer = std::make_unique( source, tmp_size, - exchangeTypeToOffset( - exchange, - memoryLayout, - guardingCells, - area - ), - sizeOnDevice - ); - if (DIM > DIM1) + exchangeTypeToOffset(exchange, memoryLayout, guardingCells, area), + sizeOnDevice); + if(DIM > DIM1) { /*create double buffer on gpu for faster memory transfers*/ - deviceDoubleBuffer = memory::makeUnique( - tmp_size, - false, - true - ); + deviceDoubleBuffer = std::make_unique(tmp_size, false, true); } - using HostBuffer = HostBufferIntern; - hostBuffer = memory::makeUnique(tmp_size); + if(!Environment<>::get().isMpiDirectEnabled()) + { + using HostBuffer = HostBufferIntern; + hostBuffer = std::make_unique(tmp_size); + } } - ExchangeIntern(DataSpace exchangeDataSpace, uint32_t exchange, - uint32_t communicationTag, bool sizeOnDevice = false) : - Exchange(exchange, communicationTag) + ExchangeIntern( + DataSpace exchangeDataSpace, + uint32_t exchange, + uint32_t communicationTag, + bool sizeOnDevice = false) + : Exchange(exchange, communicationTag) + , deviceDoubleBuffer(nullptr) + , hostBuffer(nullptr) { - using DeviceBuffer = DeviceBufferIntern; - deviceBuffer = memory::makeUnique( - exchangeDataSpace, - sizeOnDevice - ); + using DeviceBuffer = DeviceBufferIntern; + deviceBuffer = std::make_unique(exchangeDataSpace, sizeOnDevice); // this->deviceBuffer = new DeviceBufferIntern (exchangeDataSpace, sizeOnDevice,true); - if (DIM > DIM1) + if(DIM > DIM1) { /*create double buffer on gpu for faster memory transfers*/ - deviceDoubleBuffer = memory::makeUnique( - exchangeDataSpace, - false, - true - ); + deviceDoubleBuffer = std::make_unique(exchangeDataSpace, false, true); } - using HostBuffer = HostBufferIntern; - hostBuffer = memory::makeUnique(exchangeDataSpace); + if(!Environment<>::get().isMpiDirectEnabled()) + { + using HostBuffer = HostBufferIntern; + hostBuffer = std::make_unique(exchangeDataSpace); + } } /** @@ -131,13 +130,13 @@ namespace pmacc Mask exchangeMask(exchange); - if (exchangeMask.containsExchangeType(LEFT) || exchangeMask.containsExchangeType(RIGHT)) + if(exchangeMask.containsExchangeType(LEFT) || exchangeMask.containsExchangeType(RIGHT)) result[0] = 1; - if (DIM > DIM1 && (exchangeMask.containsExchangeType(TOP) || exchangeMask.containsExchangeType(BOTTOM))) + if(DIM > DIM1 && (exchangeMask.containsExchangeType(TOP) || exchangeMask.containsExchangeType(BOTTOM))) result[1] = 1; - if (DIM > DIM2 && (exchangeMask.containsExchangeType(FRONT) || exchangeMask.containsExchangeType(BACK))) + if(DIM > DIM2 && (exchangeMask.containsExchangeType(FRONT) || exchangeMask.containsExchangeType(BACK))) result[2] = 1; return result; @@ -145,19 +144,22 @@ namespace pmacc virtual ~ExchangeIntern() = default; - DataSpace exchangeTypeToOffset(uint32_t exchange, GridLayout &memoryLayout, - DataSpace guardingCells, uint32_t area) const + DataSpace exchangeTypeToOffset( + uint32_t exchange, + GridLayout& memoryLayout, + DataSpace guardingCells, + uint32_t area) const { DataSpace size = memoryLayout.getDataSpace(); DataSpace border = memoryLayout.getGuard(); Mask mask(exchange); DataSpace tmp_offset; - if (DIM >= DIM1) + if(DIM >= DIM1) { - if (mask.containsExchangeType(RIGHT)) + if(mask.containsExchangeType(RIGHT)) { tmp_offset[0] = size[0] - border[0] - guardingCells[0]; - if (area == GUARD) + if(area == GUARD) { tmp_offset[0] += guardingCells[0]; } @@ -166,18 +168,18 @@ namespace pmacc else { tmp_offset[0] = border[0]; - if (area == GUARD && mask.containsExchangeType(LEFT)) + if(area == GUARD && mask.containsExchangeType(LEFT)) { tmp_offset[0] -= guardingCells[0]; } } } - if (DIM >= DIM2) + if(DIM >= DIM2) { - if (mask.containsExchangeType(BOTTOM)) + if(mask.containsExchangeType(BOTTOM)) { tmp_offset[1] = size[1] - border[1] - guardingCells[1]; - if (area == GUARD) + if(area == GUARD) { tmp_offset[1] += guardingCells[1]; } @@ -185,18 +187,18 @@ namespace pmacc else { tmp_offset[1] = border[1]; - if (area == GUARD && mask.containsExchangeType(TOP)) + if(area == GUARD && mask.containsExchangeType(TOP)) { tmp_offset[1] -= guardingCells[1]; } } } - if (DIM == DIM3) + if(DIM == DIM3) { - if (mask.containsExchangeType(BACK)) + if(mask.containsExchangeType(BACK)) { tmp_offset[2] = size[2] - border[2] - guardingCells[2]; - if (area == GUARD) + if(area == GUARD) { tmp_offset[2] += guardingCells[2]; } @@ -204,7 +206,7 @@ namespace pmacc else /*all other begin from front*/ { tmp_offset[2] = border[2]; - if (area == GUARD && mask.containsExchangeType(FRONT)) + if(area == GUARD && mask.containsExchangeType(FRONT)) { tmp_offset[2] -= guardingCells[2]; } @@ -212,26 +214,28 @@ namespace pmacc } return tmp_offset; - } - virtual HostBuffer& getHostBuffer() + HostBuffer& getHostBuffer() override { + PMACC_ASSERT(hostBuffer != nullptr); return *hostBuffer; } - virtual DeviceBuffer& getDeviceBuffer() + DeviceBuffer& getDeviceBuffer() override { + PMACC_ASSERT(deviceBuffer != nullptr); return *deviceBuffer; } - virtual bool hasDeviceDoubleBuffer() + bool hasDeviceDoubleBuffer() override { return deviceDoubleBuffer != nullptr; } - virtual DeviceBuffer& getDeviceDoubleBuffer() + DeviceBuffer& getDeviceDoubleBuffer() override { + PMACC_ASSERT(deviceDoubleBuffer != nullptr); return *deviceDoubleBuffer; } @@ -245,13 +249,29 @@ namespace pmacc return Environment<>::get().Factory().createTaskReceive(*this); } + Buffer* getCommunicationBuffer() override + { + if(Environment<>::get().isMpiDirectEnabled()) + { + if(hasDeviceDoubleBuffer()) + return &(getDeviceDoubleBuffer()); + else + return &(getDeviceBuffer()); + } + + return &(getHostBuffer()); + } + protected: - std::unique_ptr< HostBufferIntern > hostBuffer; + /** host double buffer of the exchange data + * + * Is always a nullptr if MPI direct is used + */ + std::unique_ptr> hostBuffer; //! This buffer is a vector which is used as message buffer for faster memcopy - std::unique_ptr< DeviceBufferIntern > deviceDoubleBuffer; - std::unique_ptr< DeviceBufferIntern > deviceBuffer; - + std::unique_ptr> deviceDoubleBuffer; + std::unique_ptr> deviceBuffer; }; -} +} // namespace pmacc diff --git a/include/pmacc/memory/buffers/GridBuffer.hpp b/include/pmacc/memory/buffers/GridBuffer.hpp index 8e86644d4b..0e737d505b 100644 --- a/include/pmacc/memory/buffers/GridBuffer.hpp +++ b/include/pmacc/memory/buffers/GridBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Alexander Grund +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Alexander Grund * * This file is part of PMacc. * @@ -35,502 +35,533 @@ namespace pmacc { -namespace privateGridBuffer -{ - -class UniquTag -{ -public: - - static UniquTag& getInstance() - { - static UniquTag instance; - return instance; - } - - bool isTagUniqu(uint32_t tag) - { - bool isUniqu = tags.find(tag) == tags.end(); - if (isUniqu) - tags.insert(tag); - return isUniqu; - } -private: - - UniquTag() - { - } - - /** - * Constructor - */ - UniquTag(const UniquTag&) + namespace privateGridBuffer { + class UniquTag + { + public: + static UniquTag& getInstance() + { + static UniquTag instance; + return instance; + } - } - - std::set tags; -}; + bool isTagUniqu(uint32_t tag) + { + bool isUniqu = tags.find(tag) == tags.end(); + if(isUniqu) + tags.insert(tag); + return isUniqu; + } -}//end namespace privateGridBuffer + private: + UniquTag() + { + } -/** - * GridBuffer represents a DIM-dimensional buffer which exists on the host as well as on the device. - * - * GridBuffer combines a HostBuffer and a DeviceBuffer with equal sizes. - * Additionally, it allows sending data from and receiving data to these buffers. - * Buffers consist of core data which may be surrounded by border data. - * - * @tparam TYPE datatype for internal Host- and DeviceBuffer - * @tparam DIM dimension of the buffers - * @tparam BORDERTYPE optional type for border data in the buffers. TYPE is used by default. - */ -template -class GridBuffer: public HostDeviceBuffer -{ - typedef HostDeviceBuffer Parent; -public: + /** + * Constructor + */ + UniquTag(const UniquTag&) + { + } - typedef typename Parent::DataBoxType DataBoxType; + std::set tags; + }; - /** - * Constructor. - * - * @param gridLayout layout of the buffers, including border-cells - * @param sizeOnDevice if true, size information exists on device, too. - */ - GridBuffer(const GridLayout& gridLayout, bool sizeOnDevice = false) : - Parent(gridLayout.getDataSpace(), sizeOnDevice), - gridLayout(gridLayout), - hasOneExchange(false), - maxExchange(0) - { - init(); - } + } // end namespace privateGridBuffer /** - * Constructor. + * GridBuffer represents a DIM-dimensional buffer which exists on the host as well as on the device. * - * @param dataSpace DataSpace representing buffer size without border-cells - * @param sizeOnDevice if true, internal buffers must store their - * size additionally on the device - * (as we keep this information coherent with the host, it influences - * performance on host-device copies, but some algorithms on the device - * might need to know the size of the buffer) - */ - GridBuffer(const DataSpace& dataSpace, bool sizeOnDevice = false) : - Parent(dataSpace, sizeOnDevice), - gridLayout(dataSpace), - hasOneExchange(false), - maxExchange(0) - { - init(); - } - - /** - * Constructor. + * GridBuffer combines a HostBuffer and a DeviceBuffer with equal sizes. + * Additionally, it allows sending data from and receiving data to these buffers. + * Buffers consist of core data which may be surrounded by border data. * - * @param otherDeviceBuffer DeviceBuffer which should be used instead of creating own DeviceBuffer - * @param gridLayout layout of the buffers, including border-cells - * @param sizeOnDevice if true, internal buffers must store their - * size additionally on the device - * (as we keep this information coherent with the host, it influences - * performance on host-device copies, but some algorithms on the device - * might need to know the size of the buffer) - */ - GridBuffer(DeviceBuffer& otherDeviceBuffer, const GridLayout& gridLayout, bool sizeOnDevice = false) : - Parent(otherDeviceBuffer, gridLayout.getDataSpace(), sizeOnDevice), - gridLayout(gridLayout), - hasOneExchange(false), - maxExchange(0) - { - init(); - } - - GridBuffer( - HostBuffer& otherHostBuffer, - const DataSpace& offsetHost, - DeviceBuffer& otherDeviceBuffer, - const DataSpace& offsetDevice, - const GridLayout& gridLayout, - bool sizeOnDevice = false) : - Parent(otherHostBuffer, offsetHost, otherDeviceBuffer, offsetDevice, gridLayout.getDataSpace(), sizeOnDevice), - gridLayout(gridLayout), - hasOneExchange(false), - maxExchange(0) - { - init(); - } - - /** - * Destructor. + * @tparam TYPE datatype for internal Host- and DeviceBuffer + * @tparam DIM dimension of the buffers + * @tparam BORDERTYPE optional type for border data in the buffers. TYPE is used by default. */ - virtual ~GridBuffer() + template + class GridBuffer : public HostDeviceBuffer { - for (uint32_t i = 0; i < 27; ++i) + typedef HostDeviceBuffer Parent; + + public: + typedef typename Parent::DataBoxType DataBoxType; + + /** + * Constructor. + * + * @param gridLayout layout of the buffers, including border-cells + * @param sizeOnDevice if true, size information exists on device, too. + */ + GridBuffer(const GridLayout& gridLayout, bool sizeOnDevice = false) + : Parent(gridLayout.getDataSpace(), sizeOnDevice) + , gridLayout(gridLayout) + , hasOneExchange(false) + , maxExchange(0) { - __delete(sendExchanges[i]); - __delete(receiveExchanges[i]); - }; - } - - /** - * Add Exchange in GridBuffer memory space. - * - * An Exchange is added to this GridBuffer. The exchange buffers use - * the same memory as this GridBuffer. - * - * @param dataPlace place where received data is stored [GUARD | BORDER] - * if dataPlace=GUARD than copy other BORDER to my GUARD - * if dataPlace=BORDER than copy other GUARD to my BORDER - * @param receive a Mask which describes the directions for the exchange - * @param guardingCells number of guarding cells in each dimension - * @param communicationTag unique tag/id for communication - * @param sizeOnDeviceSend if true, internal send buffers must store their - * size additionally on the device - * (as we keep this information coherent with the host, it influences - * performance on host-device copies, but some algorithms on the device - * might need to know the size of the buffer) - * @param sizeOnDeviceReceive if true, internal receive buffers must store their - * size additionally on the device - */ - void addExchange(uint32_t dataPlace, const Mask &receive, DataSpace guardingCells, uint32_t communicationTag, bool sizeOnDeviceSend, bool sizeOnDeviceReceive ) - { - - if (hasOneExchange && (communicationTag != lastUsedCommunicationTag)) - throw std::runtime_error("It is not allowed to give the same GridBuffer different communicationTags"); - - lastUsedCommunicationTag = communicationTag; + init(); + } - receiveMask = receiveMask + receive; - sendMask = this->receiveMask.getMirroredMask(); - Mask send = receive.getMirroredMask(); + /** + * Constructor. + * + * @param dataSpace DataSpace representing buffer size without border-cells + * @param sizeOnDevice if true, internal buffers must store their + * size additionally on the device + * (as we keep this information coherent with the host, it influences + * performance on host-device copies, but some algorithms on the device + * might need to know the size of the buffer) + */ + GridBuffer(const DataSpace& dataSpace, bool sizeOnDevice = false) + : Parent(dataSpace, sizeOnDevice) + , gridLayout(dataSpace) + , hasOneExchange(false) + , maxExchange(0) + { + init(); + } + /** + * Constructor. + * + * @param otherDeviceBuffer DeviceBuffer which should be used instead of creating own DeviceBuffer + * @param gridLayout layout of the buffers, including border-cells + * @param sizeOnDevice if true, internal buffers must store their + * size additionally on the device + * (as we keep this information coherent with the host, it influences + * performance on host-device copies, but some algorithms on the device + * might need to know the size of the buffer) + */ + GridBuffer( + DeviceBuffer& otherDeviceBuffer, + const GridLayout& gridLayout, + bool sizeOnDevice = false) + : Parent(otherDeviceBuffer, gridLayout.getDataSpace(), sizeOnDevice) + , gridLayout(gridLayout) + , hasOneExchange(false) + , maxExchange(0) + { + init(); + } + GridBuffer( + HostBuffer& otherHostBuffer, + const DataSpace& offsetHost, + DeviceBuffer& otherDeviceBuffer, + const DataSpace& offsetDevice, + const GridLayout& gridLayout, + bool sizeOnDevice = false) + : Parent( + otherHostBuffer, + offsetHost, + otherDeviceBuffer, + offsetDevice, + gridLayout.getDataSpace(), + sizeOnDevice) + , gridLayout(gridLayout) + , hasOneExchange(false) + , maxExchange(0) + { + init(); + } - for (uint32_t ex = 1; ex< -12 * (int) DIM + 6 * (int) DIM * (int) DIM + 9; ++ex) + /** + * Destructor. + */ + virtual ~GridBuffer() { - if (send.isSet(ex)) + for(uint32_t i = 0; i < 27; ++i) { - uint32_t uniqCommunicationTag = (communicationTag << 5) | ex; - - if (!hasOneExchange && !privateGridBuffer::UniquTag::getInstance().isTagUniqu(uniqCommunicationTag)) - { - std::stringstream message; - message << "unique exchange communication tag (" - << uniqCommunicationTag << ") which is created from communicationTag (" - << communicationTag << ") already used for other GridBuffer exchange"; - throw std::runtime_error(message.str()); - } - hasOneExchange = true; - - if (sendExchanges[ex] != nullptr) - { - throw std::runtime_error("Exchange already added!"); - } - - maxExchange = std::max(maxExchange, ex + 1u); - sendExchanges[ex] = new ExchangeIntern (this->getDeviceBuffer(), gridLayout, guardingCells, - (ExchangeType) ex, uniqCommunicationTag, - dataPlace == GUARD ? BORDER : GUARD, sizeOnDeviceSend); - ExchangeType recvex = Mask::getMirroredExchangeType(ex); - maxExchange = std::max(maxExchange, recvex + 1u); - receiveExchanges[recvex] = - new ExchangeIntern ( - this->getDeviceBuffer(), - gridLayout, - guardingCells, - recvex, - uniqCommunicationTag, - dataPlace == GUARD ? GUARD : BORDER, - sizeOnDeviceReceive); - } + __delete(sendExchanges[i]); + __delete(receiveExchanges[i]); + }; } - } - /** - * Add Exchange in GridBuffer memory space. - * - * An Exchange is added to this GridBuffer. The exchange buffers use - * the same memory as this GridBuffer. - * - * @param dataPlace place where received data is stored [GUARD | BORDER] - * if dataPlace=GUARD than copy other BORDER to my GUARD - * if dataPlace=BORDER than copy other GUARD to my BORDER - * @param receive a Mask which describes the directions for the exchange - * @param guardingCells number of guarding cells in each dimension - * @param communicationTag unique tag/id for communication - * @param sizeOnDevice if true, internal buffers must store their - * size additionally on the device - * (as we keep this information coherent with the host, it influences - * performance on host-device copies, but some algorithms on the device - * might need to know the size of the buffer) - */ - void addExchange(uint32_t dataPlace, const Mask &receive, DataSpace guardingCells, uint32_t communicationTag, bool sizeOnDevice = false) - { - addExchange( dataPlace, receive, guardingCells, communicationTag, sizeOnDevice, sizeOnDevice ); - } - - /** - * Add Exchange in dedicated memory space. - * - * An Exchange is added to this GridBuffer. The exchange buffers use - * the their own memory instead of using the GridBuffer's memory space. - * - * @param receive a Mask which describes the directions for the exchange - * @param dataSpace size of the newly created exchange buffer in each dimension - * @param communicationTag unique tag/id for communication - * @param sizeOnDeviceSend if true, internal send buffers must store their - * size additionally on the device - * (as we keep this information coherent with the host, it influences - * performance on host-device copies, but some algorithms on the device - * might need to know the size of the buffer) - * @param sizeOnDeviceReceive if true, internal receive buffers must store their - * size additionally on the device - */ - void addExchangeBuffer(const Mask &receive, const DataSpace &dataSpace, uint32_t communicationTag, bool sizeOnDeviceSend, bool sizeOnDeviceReceive ) - { - - if (hasOneExchange && (communicationTag != lastUsedCommunicationTag)) - throw std::runtime_error("It is not allowed to give the same GridBuffer different communicationTags"); - lastUsedCommunicationTag = communicationTag; + /** + * Add Exchange in GridBuffer memory space. + * + * An Exchange is added to this GridBuffer. The exchange buffers use + * the same memory as this GridBuffer. + * + * @param dataPlace place where received data is stored [GUARD | BORDER] + * if dataPlace=GUARD than copy other BORDER to my GUARD + * if dataPlace=BORDER than copy other GUARD to my BORDER + * @param receive a Mask which describes the directions for the exchange + * @param guardingCells number of guarding cells in each dimension + * @param communicationTag unique tag/id for communication + * @param sizeOnDeviceSend if true, internal send buffers must store their + * size additionally on the device + * (as we keep this information coherent with the host, it influences + * performance on host-device copies, but some algorithms on the device + * might need to know the size of the buffer) + * @param sizeOnDeviceReceive if true, internal receive buffers must store their + * size additionally on the device + */ + void addExchange( + uint32_t dataPlace, + const Mask& receive, + DataSpace guardingCells, + uint32_t communicationTag, + bool sizeOnDeviceSend, + bool sizeOnDeviceReceive) + { + if(hasOneExchange && (communicationTag != lastUsedCommunicationTag)) + throw std::runtime_error("It is not allowed to give the same GridBuffer different communicationTags"); + lastUsedCommunicationTag = communicationTag; - /*don't create buffer with 0 (zero) elements*/ - if (dataSpace.productOfComponents() != 0) - { receiveMask = receiveMask + receive; sendMask = this->receiveMask.getMirroredMask(); Mask send = receive.getMirroredMask(); - for (uint32_t ex = 1; ex < 27; ++ex) + + + for(uint32_t ex = 1; ex < -12 * (int) DIM + 6 * (int) DIM * (int) DIM + 9; ++ex) { - if (send.isSet(ex)) + if(send.isSet(ex)) { uint32_t uniqCommunicationTag = (communicationTag << 5) | ex; - if (!hasOneExchange && !privateGridBuffer::UniquTag::getInstance().isTagUniqu(uniqCommunicationTag)) + + if(!hasOneExchange && !privateGridBuffer::UniquTag::getInstance().isTagUniqu(uniqCommunicationTag)) { std::stringstream message; - message << "unique exchange communication tag (" - << uniqCommunicationTag << ") which is created from communicationTag (" - << communicationTag << ") already used for other GridBuffer exchange"; + message << "unique exchange communication tag (" << uniqCommunicationTag + << ") which is created from communicationTag (" << communicationTag + << ") already used for other GridBuffer exchange"; throw std::runtime_error(message.str()); } hasOneExchange = true; - if (sendExchanges[ex] != nullptr) + if(sendExchanges[ex] != nullptr) { throw std::runtime_error("Exchange already added!"); } - //GridLayout memoryLayout(size); maxExchange = std::max(maxExchange, ex + 1u); - sendExchanges[ex] = new ExchangeIntern (/*memoryLayout*/ dataSpace, - ex, uniqCommunicationTag, sizeOnDeviceSend); - + sendExchanges[ex] = new ExchangeIntern( + this->getDeviceBuffer(), + gridLayout, + guardingCells, + (ExchangeType) ex, + uniqCommunicationTag, + dataPlace == GUARD ? BORDER : GUARD, + sizeOnDeviceSend); ExchangeType recvex = Mask::getMirroredExchangeType(ex); maxExchange = std::max(maxExchange, recvex + 1u); - receiveExchanges[recvex] = new ExchangeIntern (/*memoryLayout*/ dataSpace, - recvex, uniqCommunicationTag, sizeOnDeviceReceive); + receiveExchanges[recvex] = new ExchangeIntern( + this->getDeviceBuffer(), + gridLayout, + guardingCells, + recvex, + uniqCommunicationTag, + dataPlace == GUARD ? GUARD : BORDER, + sizeOnDeviceReceive); } } } - } - /** - * Add Exchange in dedicated memory space. - * - * An Exchange is added to this GridBuffer. The exchange buffers use - * the their own memory instead of using the GridBuffer's memory space. - * - * @param receive a Mask which describes the directions for the exchange - * @param dataSpace size of the newly created exchange buffer in each dimension - * @param communicationTag unique tag/id for communication - * @param sizeOnDevice if true, internal buffers must store their - * size additionally on the device - * (as we keep this information coherent with the host, it influences - * performance on host-device copies, but some algorithms on the device - * might need to know the size of the buffer) - */ - void addExchangeBuffer(const Mask &receive, const DataSpace &dataSpace, uint32_t communicationTag, bool sizeOnDevice = false ) - { - addExchangeBuffer( receive, dataSpace, communicationTag, sizeOnDevice, sizeOnDevice ); - } + /** + * Add Exchange in GridBuffer memory space. + * + * An Exchange is added to this GridBuffer. The exchange buffers use + * the same memory as this GridBuffer. + * + * @param dataPlace place where received data is stored [GUARD | BORDER] + * if dataPlace=GUARD than copy other BORDER to my GUARD + * if dataPlace=BORDER than copy other GUARD to my BORDER + * @param receive a Mask which describes the directions for the exchange + * @param guardingCells number of guarding cells in each dimension + * @param communicationTag unique tag/id for communication + * @param sizeOnDevice if true, internal buffers must store their + * size additionally on the device + * (as we keep this information coherent with the host, it influences + * performance on host-device copies, but some algorithms on the device + * might need to know the size of the buffer) + */ + void addExchange( + uint32_t dataPlace, + const Mask& receive, + DataSpace guardingCells, + uint32_t communicationTag, + bool sizeOnDevice = false) + { + addExchange(dataPlace, receive, guardingCells, communicationTag, sizeOnDevice, sizeOnDevice); + } - /** - * Returns whether this GridBuffer has an Exchange for sending in ex direction. - * - * @param ex exchange direction to query - * @return true if send exchanges with ex direction exist, otherwise false - */ - bool hasSendExchange(uint32_t ex) const - { - return ( (sendExchanges[ex] != nullptr) && (getSendMask().isSet(ex))); - } + /** + * Add Exchange in dedicated memory space. + * + * An Exchange is added to this GridBuffer. The exchange buffers use + * the their own memory instead of using the GridBuffer's memory space. + * + * @param receive a Mask which describes the directions for the exchange + * @param dataSpace size of the newly created exchange buffer in each dimension + * @param communicationTag unique tag/id for communication + * @param sizeOnDeviceSend if true, internal send buffers must store their + * size additionally on the device + * (as we keep this information coherent with the host, it influences + * performance on host-device copies, but some algorithms on the device + * might need to know the size of the buffer) + * @param sizeOnDeviceReceive if true, internal receive buffers must store their + * size additionally on the device + */ + void addExchangeBuffer( + const Mask& receive, + const DataSpace& dataSpace, + uint32_t communicationTag, + bool sizeOnDeviceSend, + bool sizeOnDeviceReceive) + { + if(hasOneExchange && (communicationTag != lastUsedCommunicationTag)) + throw std::runtime_error("It is not allowed to give the same GridBuffer different communicationTags"); + lastUsedCommunicationTag = communicationTag; - /** - * Returns whether this GridBuffer has an Exchange for receiving from ex direction. - * - * @param ex exchange direction to query - * @return true if receive exchanges with ex direction exist, otherwise false - */ - bool hasReceiveExchange(uint32_t ex) const - { - return ( (receiveExchanges[ex] != nullptr) && (getReceiveMask().isSet(ex))); - } - /** - * Returns the Exchange for sending data in ex direction. - * - * Returns an Exchange which for sending data from - * this GridBuffer in the direction described by ex. - * - * @param ex the direction to query - * @return the Exchange for sending data - */ - Exchange& getSendExchange(uint32_t ex) const - { - return *sendExchanges[ex]; - } + /*don't create buffer with 0 (zero) elements*/ + if(dataSpace.productOfComponents() != 0) + { + receiveMask = receiveMask + receive; + sendMask = this->receiveMask.getMirroredMask(); + Mask send = receive.getMirroredMask(); + for(uint32_t ex = 1; ex < 27; ++ex) + { + if(send.isSet(ex)) + { + uint32_t uniqCommunicationTag = (communicationTag << 5) | ex; + if(!hasOneExchange + && !privateGridBuffer::UniquTag::getInstance().isTagUniqu(uniqCommunicationTag)) + { + std::stringstream message; + message << "unique exchange communication tag (" << uniqCommunicationTag + << ") which is created from communicationTag (" << communicationTag + << ") already used for other GridBuffer exchange"; + throw std::runtime_error(message.str()); + } + hasOneExchange = true; + + if(sendExchanges[ex] != nullptr) + { + throw std::runtime_error("Exchange already added!"); + } + + // GridLayout memoryLayout(size); + maxExchange = std::max(maxExchange, ex + 1u); + sendExchanges[ex] = new ExchangeIntern( + /*memoryLayout*/ dataSpace, + ex, + uniqCommunicationTag, + sizeOnDeviceSend); + + ExchangeType recvex = Mask::getMirroredExchangeType(ex); + maxExchange = std::max(maxExchange, recvex + 1u); + receiveExchanges[recvex] = new ExchangeIntern( + /*memoryLayout*/ dataSpace, + recvex, + uniqCommunicationTag, + sizeOnDeviceReceive); + } + } + } + } - /** - * Returns the Exchange for receiving data from ex direction. - * - * Returns an Exchange which for receiving data to - * this GridBuffer from the direction described by ex. - * - * @param ex the direction to query - * @return the Exchange for receiving data - */ - Exchange& getReceiveExchange(uint32_t ex) const - { - return *receiveExchanges[ex]; - } + /** + * Add Exchange in dedicated memory space. + * + * An Exchange is added to this GridBuffer. The exchange buffers use + * the their own memory instead of using the GridBuffer's memory space. + * + * @param receive a Mask which describes the directions for the exchange + * @param dataSpace size of the newly created exchange buffer in each dimension + * @param communicationTag unique tag/id for communication + * @param sizeOnDevice if true, internal buffers must store their + * size additionally on the device + * (as we keep this information coherent with the host, it influences + * performance on host-device copies, but some algorithms on the device + * might need to know the size of the buffer) + */ + void addExchangeBuffer( + const Mask& receive, + const DataSpace& dataSpace, + uint32_t communicationTag, + bool sizeOnDevice = false) + { + addExchangeBuffer(receive, dataSpace, communicationTag, sizeOnDevice, sizeOnDevice); + } - /** - * Returns the Mask describing send exchanges - * - * @return Mask for send exchanges - */ - Mask getSendMask() const - { - return (Environment::get().EnvironmentController().getCommunicationMask() & sendMask); - } + /** + * Returns whether this GridBuffer has an Exchange for sending in ex direction. + * + * @param ex exchange direction to query + * @return true if send exchanges with ex direction exist, otherwise false + */ + bool hasSendExchange(uint32_t ex) const + { + return ((sendExchanges[ex] != nullptr) && (getSendMask().isSet(ex))); + } - /** - * Returns the Mask describing receive exchanges - * - * @return Mask for receive exchanges - */ - Mask getReceiveMask() const - { - return (Environment::get().EnvironmentController().getCommunicationMask() & receiveMask); - } + /** + * Returns whether this GridBuffer has an Exchange for receiving from ex direction. + * + * @param ex exchange direction to query + * @return true if receive exchanges with ex direction exist, otherwise false + */ + bool hasReceiveExchange(uint32_t ex) const + { + return ((receiveExchanges[ex] != nullptr) && (getReceiveMask().isSet(ex))); + } - /** - * Starts sync data from own device buffer to neighbor device buffer. - * - * Asynchronously starts synchronization data from internal DeviceBuffer using added - * Exchange buffers. - * This operation runs sequential to other code but intern asynchronous - * - */ - EventTask communication() - { - EventTask ev = this->asyncCommunication(__getTransactionEvent()); - __setTransactionEvent(ev); - return ev; - } + /** + * Returns the Exchange for sending data in ex direction. + * + * Returns an Exchange which for sending data from + * this GridBuffer in the direction described by ex. + * + * @param ex the direction to query + * @return the Exchange for sending data + */ + Exchange& getSendExchange(uint32_t ex) const + { + return *sendExchanges[ex]; + } - /** - * Starts sync data from own device buffer to neighbor device buffer. - * - * Asynchronously starts synchronization data from internal DeviceBuffer using added - * Exchange buffers. - * - */ - EventTask asyncCommunication(EventTask serialEvent) - { - EventTask evR; - for (uint32_t i = 0; i < maxExchange; ++i) + /** + * Returns the Exchange for receiving data from ex direction. + * + * Returns an Exchange which for receiving data to + * this GridBuffer from the direction described by ex. + * + * @param ex the direction to query + * @return the Exchange for receiving data + */ + Exchange& getReceiveExchange(uint32_t ex) const + { + return *receiveExchanges[ex]; + } + + /** + * Returns the Mask describing send exchanges + * + * @return Mask for send exchanges + */ + Mask getSendMask() const { + return (Environment::get().EnvironmentController().getCommunicationMask() & sendMask); + } + + /** + * Returns the Mask describing receive exchanges + * + * @return Mask for receive exchanges + */ + Mask getReceiveMask() const + { + return (Environment::get().EnvironmentController().getCommunicationMask() & receiveMask); + } - evR += asyncReceive(serialEvent, i); + /** + * Starts sync data from own device buffer to neighbor device buffer. + * + * Asynchronously starts synchronization data from internal DeviceBuffer using added + * Exchange buffers. + * This operation runs sequential to other code but intern asynchronous + * + */ + EventTask communication() + { + EventTask ev = this->asyncCommunication(__getTransactionEvent()); + __setTransactionEvent(ev); + return ev; + } - ExchangeType sendEx = Mask::getMirroredExchangeType(i); + /** + * Starts sync data from own device buffer to neighbor device buffer. + * + * Asynchronously starts synchronization data from internal DeviceBuffer using added + * Exchange buffers. + * + */ + EventTask asyncCommunication(EventTask serialEvent) + { + EventTask evR; + for(uint32_t i = 0; i < maxExchange; ++i) + { + evR += asyncReceive(serialEvent, i); - evR += asyncSend(serialEvent, sendEx); + ExchangeType sendEx = Mask::getMirroredExchangeType(i); + evR += asyncSend(serialEvent, sendEx); + } + return evR; } - return evR; - } - EventTask asyncSend(EventTask serialEvent, uint32_t sendEx) - { - if (hasSendExchange(sendEx)) + EventTask asyncSend(EventTask serialEvent, uint32_t sendEx) { - __startTransaction(serialEvent + sendEvents[sendEx]); - sendEvents[sendEx] = sendExchanges[sendEx]->startSend(); - __endTransaction(); - return sendEvents[sendEx]; + if(hasSendExchange(sendEx)) + { + __startTransaction(serialEvent + sendEvents[sendEx]); + sendEvents[sendEx] = sendExchanges[sendEx]->startSend(); + __endTransaction(); + return sendEvents[sendEx]; + } + return EventTask(); } - return EventTask(); - } - EventTask asyncReceive(EventTask serialEvent, uint32_t recvEx) - { - if (hasReceiveExchange(recvEx)) + EventTask asyncReceive(EventTask serialEvent, uint32_t recvEx) { - __startTransaction(serialEvent + receiveEvents[recvEx]); - receiveEvents[recvEx] = receiveExchanges[recvEx]->startReceive(); + if(hasReceiveExchange(recvEx)) + { + __startTransaction(serialEvent + receiveEvents[recvEx]); + receiveEvents[recvEx] = receiveExchanges[recvEx]->startReceive(); - __endTransaction(); - return receiveEvents[recvEx]; + __endTransaction(); + return receiveEvents[recvEx]; + } + return EventTask(); } - return EventTask(); - } - /** - * Returns the GridLayout describing this GridBuffer. - * - * @return the layout of this buffer - */ - GridLayout getGridLayout() - { - return gridLayout; - } - -private: + /** + * Returns the GridLayout describing this GridBuffer. + * + * @return the layout of this buffer + */ + GridLayout getGridLayout() + { + return gridLayout; + } - friend class Environment; + private: + friend class Environment; - void init() - { - for (uint32_t i = 0; i < 27; ++i) + void init() { - sendExchanges[i] = nullptr; - receiveExchanges[i] = nullptr; - /* fill array with valid empty events to avoid side effects if - * array is accessed without calling hasExchange() before usage */ - receiveEvents[i] = EventTask(); - sendEvents[i] = EventTask(); + for(uint32_t i = 0; i < 27; ++i) + { + sendExchanges[i] = nullptr; + receiveExchanges[i] = nullptr; + /* fill array with valid empty events to avoid side effects if + * array is accessed without calling hasExchange() before usage */ + receiveEvents[i] = EventTask(); + sendEvents[i] = EventTask(); + } } - } -protected: - /*if we have one exchange we don't check if communicationTag has been used before*/ - bool hasOneExchange; - uint32_t lastUsedCommunicationTag; - GridLayout gridLayout; + protected: + /*if we have one exchange we don't check if communicationTag has been used before*/ + bool hasOneExchange; + uint32_t lastUsedCommunicationTag; + GridLayout gridLayout; - Mask sendMask; - Mask receiveMask; + Mask sendMask; + Mask receiveMask; - ExchangeIntern* sendExchanges[27]; - ExchangeIntern* receiveExchanges[27]; - EventTask receiveEvents[27]; - EventTask sendEvents[27]; + ExchangeIntern* sendExchanges[27]; + ExchangeIntern* receiveExchanges[27]; + EventTask receiveEvents[27]; + EventTask sendEvents[27]; - uint32_t maxExchange; //use max exchanges and run over the array is faster as use set from stl -}; + uint32_t maxExchange; // use max exchanges and run over the array is faster as use set from stl + }; -} +} // namespace pmacc diff --git a/include/pmacc/memory/buffers/HostBuffer.hpp b/include/pmacc/memory/buffers/HostBuffer.hpp index 903dea0845..853c6b170a 100644 --- a/include/pmacc/memory/buffers/HostBuffer.hpp +++ b/include/pmacc/memory/buffers/HostBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Alexander Grund +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Alexander Grund * * This file is part of PMacc. * @@ -28,43 +28,40 @@ namespace pmacc { - template + template class HostBuffer; -namespace detail -{ - template< class TYPE > - container::HostBuffer< TYPE, 1u > - make_CartBuffer( HostBuffer & hb ) + namespace detail { - return container::HostBuffer(hb.getBasePointer(), hb.getDataSpace(), false); - } + template + container::HostBuffer make_CartBuffer(HostBuffer& hb) + { + return container::HostBuffer(hb.getBasePointer(), hb.getDataSpace(), false); + } - template< class TYPE > - container::HostBuffer< TYPE, 2u > - make_CartBuffer( HostBuffer & hb ) - { - math::Size_t<2u - 1u> pitch; - pitch[0] = hb.getPhysicalMemorySize()[0] * sizeof(TYPE); - return container::HostBuffer(hb.getBasePointer(), hb.getDataSpace(), false, pitch); - } + template + container::HostBuffer make_CartBuffer(HostBuffer& hb) + { + math::Size_t<2u - 1u> pitch; + pitch[0] = hb.getPhysicalMemorySize()[0] * sizeof(TYPE); + return container::HostBuffer(hb.getBasePointer(), hb.getDataSpace(), false, pitch); + } - template< class TYPE > - container::HostBuffer< TYPE, 3u > - make_CartBuffer( HostBuffer & hb ) - { - math::Size_t<3u - 1u> pitch; - pitch[0] = hb.getPhysicalMemorySize()[0] * sizeof(TYPE); - pitch[1] = pitch[0] * hb.getPhysicalMemorySize()[1]; - return container::HostBuffer(hb.getBasePointer(), hb.getDataSpace(), false, pitch); - } -} + template + container::HostBuffer make_CartBuffer(HostBuffer& hb) + { + math::Size_t<3u - 1u> pitch; + pitch[0] = hb.getPhysicalMemorySize()[0] * sizeof(TYPE); + pitch[1] = pitch[0] * hb.getPhysicalMemorySize()[1]; + return container::HostBuffer(hb.getBasePointer(), hb.getDataSpace(), false, pitch); + } + } // namespace detail class EventTask; - template + template class DeviceBuffer; - template + template class Buffer; /** @@ -73,7 +70,7 @@ namespace detail * @tparam TYPE datatype for buffer data * @tparam DIM dimension of the buffer */ - template + template class HostBuffer : public Buffer { public: @@ -98,9 +95,7 @@ namespace detail /** * Destructor. */ - virtual ~HostBuffer() - { - }; + virtual ~HostBuffer(){}; /** * Conversion to cuSTL HostBuffer. @@ -108,14 +103,12 @@ namespace detail * Returns a cuSTL HostBuffer with reference to the same data. */ HINLINE - container::HostBuffer - cartBuffer() + container::HostBuffer cartBuffer() { - return detail::make_CartBuffer( *this ); + return detail::make_CartBuffer(*this); } protected: - /** Constructor. * * @param size extent for each dimension (in elements) @@ -123,11 +116,10 @@ namespace detail * can be less than `physicalMemorySize` * @param physicalMemorySize size of the physical memory (in elements) */ - HostBuffer(DataSpace size, DataSpace physicalMemorySize) : - Buffer(size, physicalMemorySize) + HostBuffer(DataSpace size, DataSpace physicalMemorySize) + : Buffer(size, physicalMemorySize) { - } }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/memory/buffers/HostBufferIntern.hpp b/include/pmacc/memory/buffers/HostBufferIntern.hpp index ff867610b7..d89706600e 100644 --- a/include/pmacc/memory/buffers/HostBufferIntern.hpp +++ b/include/pmacc/memory/buffers/HostBufferIntern.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -26,125 +26,126 @@ #include "pmacc/eventSystem/tasks/Factory.hpp" #include "pmacc/eventSystem/EventSystem.hpp" #include "pmacc/memory/boxes/DataBoxDim1Access.hpp" +#include "pmacc/memory/Array.hpp" #include "pmacc/assert.hpp" namespace pmacc { - -/** - * Internal implementation of the HostBuffer interface. - */ -template -class HostBufferIntern : public HostBuffer -{ -public: - - typedef typename HostBuffer::DataBoxType DataBoxType; - - /** constructor - * - * @param size extent for each dimension (in elements) - */ - HostBufferIntern(DataSpace size) : - HostBuffer(size, size), - pointer(nullptr),ownPointer(true) - { - CUDA_CHECK(cudaMallocHost((void**)&pointer, size.productOfComponents() * sizeof (TYPE))); - reset(false); - } - - HostBufferIntern(HostBufferIntern& source, DataSpace size, DataSpace offset=DataSpace()) : - HostBuffer(size, source.getPhysicalMemorySize()), - pointer(nullptr),ownPointer(false) - { - pointer=&(source.getDataBox()(offset));/*fix me, this is a bad way*/ - reset(true); - } - /** - * destructor + * Internal implementation of the HostBuffer interface. */ - virtual ~HostBufferIntern() + template + class HostBufferIntern : public HostBuffer { - __startOperation(ITask::TASK_HOST); + public: + typedef typename HostBuffer::DataBoxType DataBoxType; + + /** constructor + * + * @param size extent for each dimension (in elements) + */ + HostBufferIntern(DataSpace size) : HostBuffer(size, size), pointer(nullptr), ownPointer(true) + { + CUDA_CHECK(cuplaMallocHost((void**) &pointer, size.productOfComponents() * sizeof(TYPE))); + reset(false); + } - if (pointer && ownPointer) + HostBufferIntern(HostBufferIntern& source, DataSpace size, DataSpace offset = DataSpace()) + : HostBuffer(size, source.getPhysicalMemorySize()) + , pointer(nullptr) + , ownPointer(false) { - CUDA_CHECK_NO_EXCEPT(cudaFreeHost(pointer)); + pointer = &(source.getDataBox()(offset)); /*fix me, this is a bad way*/ + reset(true); } - } - /*! Get pointer of memory - * @return pointer to memory - */ - TYPE* getBasePointer() - { - __startOperation(ITask::TASK_HOST); - return pointer; - } + /** + * destructor + */ + virtual ~HostBufferIntern() + { + __startOperation(ITask::TASK_HOST); - TYPE* getPointer() - { - __startOperation(ITask::TASK_HOST); - return pointer; - } + if(pointer && ownPointer) + { + CUDA_CHECK_NO_EXCEPT(cuplaFreeHost(pointer)); + } + } - void copyFrom(DeviceBuffer& other) - { - PMACC_ASSERT(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); - Environment<>::get().Factory().createTaskCopyDeviceToHost(other, *this); - } + /*! Get pointer of memory + * @return pointer to memory + */ + TYPE* getBasePointer() + { + __startOperation(ITask::TASK_HOST); + return pointer; + } - void reset(bool preserveData = true) - { - __startOperation(ITask::TASK_HOST); - this->setCurrentSize(this->getDataSpace().productOfComponents()); - if (!preserveData) + TYPE* getPointer() { - /* if it is a pointer out of other memory we can not assume that - * that the physical memory is contiguous - */ - if(ownPointer) - memset(pointer, 0, this->getDataSpace().productOfComponents() * sizeof (TYPE)); - else + __startOperation(ITask::TASK_HOST); + return pointer; + } + + void copyFrom(DeviceBuffer& other) + { + PMACC_ASSERT(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); + Environment<>::get().Factory().createTaskCopyDeviceToHost(other, *this); + } + + void reset(bool preserveData = true) + { + __startOperation(ITask::TASK_HOST); + this->setCurrentSize(this->getDataSpace().productOfComponents()); + if(!preserveData) { - TYPE value; - /* using `uint8_t` for byte-wise looping through tmp var value of `TYPE` */ - uint8_t* valuePtr = (uint8_t*)&value; - for( size_t b = 0; b < sizeof(TYPE); ++b) + /* if it is a pointer out of other memory we can not assume that + * that the physical memory is contiguous + */ + if(ownPointer) + memset( + reinterpret_cast(pointer), + 0, + this->getDataSpace().productOfComponents() * sizeof(TYPE)); + else { - valuePtr[b] = static_cast(0); + // Using Array is a workaround for types without default constructor + memory::Array tmp; + memset(reinterpret_cast(tmp.data()), 0, sizeof(tmp)); + // use first element to avoid issue because Array is aligned (sizeof can be larger than component + // type) + setValue(tmp[0]); } - /* set value with zero-ed `TYPE` */ - setValue(value); } } - } - void setValue(const TYPE& value) - { - __startOperation(ITask::TASK_HOST); - int64_t current_size = static_cast< int64_t >(this->getCurrentSize()); - auto memBox = getDataBox(); - typedef DataBoxDim1Access D1Box; - D1Box d1Box(memBox, this->getDataSpace()); - #pragma omp parallel for - for (int64_t i = 0; i < current_size; i++) + void setValue(const TYPE& value) { - d1Box[i] = value; + __startOperation(ITask::TASK_HOST); + int64_t current_size = static_cast(this->getCurrentSize()); + auto memBox = getDataBox(); + typedef DataBoxDim1Access D1Box; + D1Box d1Box(memBox, this->getDataSpace()); +#pragma omp parallel for + for(int64_t i = 0; i < current_size; i++) + { + d1Box[i] = value; + } } - } - DataBoxType getDataBox() - { - __startOperation(ITask::TASK_HOST); - return DataBoxType(PitchedBox (pointer, DataSpace (), - this->getPhysicalMemorySize(), this->getPhysicalMemorySize()[0] * sizeof (TYPE))); - } + DataBoxType getDataBox() + { + __startOperation(ITask::TASK_HOST); + return DataBoxType(PitchedBox( + pointer, + DataSpace(), + this->getPhysicalMemorySize(), + this->getPhysicalMemorySize()[0] * sizeof(TYPE))); + } -private: - TYPE* pointer; - bool ownPointer; -}; + private: + TYPE* pointer; + bool ownPointer; + }; -} +} // namespace pmacc diff --git a/include/pmacc/memory/buffers/HostDeviceBuffer.hpp b/include/pmacc/memory/buffers/HostDeviceBuffer.hpp index 852c92bee1..21ef71c0fd 100644 --- a/include/pmacc/memory/buffers/HostDeviceBuffer.hpp +++ b/include/pmacc/memory/buffers/HostDeviceBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Grund +/* Copyright 2016-2021 Alexander Grund * * This file is part of PMacc. * @@ -29,20 +29,23 @@ #include -namespace pmacc{ - +namespace pmacc +{ /** Buffer that contains a host and device buffer and allows synchronizing those 2 */ template class HostDeviceBuffer { typedef HostBufferIntern HostBufferType; typedef DeviceBufferIntern DeviceBufferType; + public: using ValueType = T_Type; typedef HostBuffer HBuffer; typedef DeviceBuffer DBuffer; typedef typename HostBufferType::DataBoxType DataBoxType; - PMACC_CASSERT_MSG(DataBoxTypes_must_match, boost::is_same::value); + PMACC_CASSERT_MSG( + DataBoxTypes_must_match, + boost::is_same::value); /** * Constructor that creates the buffers with the given size @@ -69,12 +72,12 @@ namespace pmacc{ * Passing a size bigger than the buffer (minus the offset) is undefined. */ HostDeviceBuffer( - HBuffer& otherHostBuffer, - const DataSpace& offsetHost, - DBuffer& otherDeviceBuffer, - const DataSpace& offsetDevice, - const GridLayout size, - bool sizeOnDevice = false); + HBuffer& otherHostBuffer, + const DataSpace& offsetHost, + DBuffer& otherDeviceBuffer, + const DataSpace& offsetDevice, + const GridLayout size, + bool sizeOnDevice = false); HINLINE virtual ~HostDeviceBuffer(); @@ -111,12 +114,12 @@ namespace pmacc{ * Asynchronously copies data from internal device to internal host buffer. */ HINLINE void deviceToHost(); + private: HBuffer* hostBuffer; DBuffer* deviceBuffer; - }; -} // namespace pmacc +} // namespace pmacc #include "pmacc/memory/buffers/HostDeviceBuffer.tpp" diff --git a/include/pmacc/memory/buffers/HostDeviceBuffer.tpp b/include/pmacc/memory/buffers/HostDeviceBuffer.tpp index 8d677c2538..a4c43c850b 100644 --- a/include/pmacc/memory/buffers/HostDeviceBuffer.tpp +++ b/include/pmacc/memory/buffers/HostDeviceBuffer.tpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Grund +/* Copyright 2016-2021 Alexander Grund * * This file is part of PMacc. * @@ -24,37 +24,37 @@ #include "HostDeviceBuffer.hpp" -namespace pmacc{ - +namespace pmacc +{ template HostDeviceBuffer::HostDeviceBuffer(const DataSpace& size, bool sizeOnDevice) { - hostBuffer = new HostBufferIntern(size); + hostBuffer = new HostBufferIntern(size); deviceBuffer = new DeviceBufferIntern(size, sizeOnDevice); } template HostDeviceBuffer::HostDeviceBuffer( - DBuffer& otherDeviceBuffer, - const DataSpace& size, - bool sizeOnDevice) + DBuffer& otherDeviceBuffer, + const DataSpace& size, + bool sizeOnDevice) { - hostBuffer = new HostBufferIntern(size); + hostBuffer = new HostBufferIntern(size); deviceBuffer = new DeviceBufferType(otherDeviceBuffer, size, DataSpace(), sizeOnDevice); } template HostDeviceBuffer::HostDeviceBuffer( - HBuffer& otherHostBuffer, - const DataSpace& offsetHost, - DBuffer& otherDeviceBuffer, - const DataSpace& offsetDevice, - const GridLayout size, - bool sizeOnDevice) - { - hostBuffer = new HostBufferType(otherHostBuffer, size, offsetHost); + HBuffer& otherHostBuffer, + const DataSpace& offsetHost, + DBuffer& otherDeviceBuffer, + const DataSpace& offsetDevice, + const GridLayout size, + bool sizeOnDevice) + { + hostBuffer = new HostBufferType(otherHostBuffer, size, offsetHost); deviceBuffer = new DeviceBufferType(otherDeviceBuffer, size, offsetDevice, sizeOnDevice); - } + } template HostDeviceBuffer::~HostDeviceBuffer() @@ -94,4 +94,4 @@ namespace pmacc{ hostBuffer->copyFrom(*deviceBuffer); } -} // namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/memory/buffers/MappedBufferIntern.hpp b/include/pmacc/memory/buffers/MappedBufferIntern.hpp index 5254250830..657b945a6e 100644 --- a/include/pmacc/memory/buffers/MappedBufferIntern.hpp +++ b/include/pmacc/memory/buffers/MappedBufferIntern.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Axel Huebl, Benjamin Worpitz, +/* Copyright 2014-2021 Rene Widera, Axel Huebl, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -30,50 +30,50 @@ namespace pmacc { - -/** Implementation of the DeviceBuffer interface for cuda mapped memory - * - * For all pmacc tasks and functions this buffer looks like native device buffer - * but in real it is stored in host memory. - */ -template -class MappedBufferIntern : public DeviceBuffer -{ - /** IMPORTANT: if someone implements that a MappedBufferIntern can points to an other - * mapped buffer then `getDataSpace()` in `getHostDataBox()` and `getDeviceDataBox` - * must be changed to `getPhysicalMemorySize` - */ -public: - - typedef typename DeviceBuffer::DataBoxType DataBoxType; - - /** constructor + /** Implementation of the DeviceBuffer interface for cuda mapped memory * - * @param size extent for each dimension (in elements) + * For all pmacc tasks and functions this buffer looks like native device buffer + * but in real it is stored in host memory. */ - MappedBufferIntern(DataSpace size): - DeviceBuffer(size, size), - pointer(nullptr), ownPointer(true) + template + class MappedBufferIntern : public DeviceBuffer { -#if( PMACC_CUDA_ENABLED == 1 ) - CUDA_CHECK((cuplaError_t)cudaHostAlloc(&pointer, size.productOfComponents() * sizeof (TYPE), cudaHostAllocMapped)); + /** IMPORTANT: if someone implements that a MappedBufferIntern can points to an other + * mapped buffer then `getDataSpace()` in `getHostDataBox()` and `getDeviceDataBox` + * must be changed to `getPhysicalMemorySize` + */ + public: + typedef typename DeviceBuffer::DataBoxType DataBoxType; + + /** constructor + * + * @param size extent for each dimension (in elements) + */ + MappedBufferIntern(DataSpace size) + : DeviceBuffer(size, size) + , pointer(nullptr) + , ownPointer(true) + { +#if(PMACC_CUDA_ENABLED == 1) + CUDA_CHECK(( + cuplaError_t) cudaHostAlloc(&pointer, size.productOfComponents() * sizeof(TYPE), cudaHostAllocMapped)); #else - pointer = new TYPE[size.productOfComponents()]; + pointer = new TYPE[size.productOfComponents()]; #endif - reset(false); - } - - /** - * destructor - */ - virtual ~MappedBufferIntern() - { - __startOperation(ITask::TASK_CUDA); - __startOperation(ITask::TASK_HOST); + reset(false); + } - if (pointer && ownPointer) + /** + * destructor + */ + virtual ~MappedBufferIntern() { -#if( PMACC_CUDA_ENABLED == 1 ) + __startOperation(ITask::TASK_DEVICE); + __startOperation(ITask::TASK_HOST); + + if(pointer && ownPointer) + { +#if(PMACC_CUDA_ENABLED == 1) /* cupla 0.2.0 does not support the function cudaHostAlloc to create mapped memory. * Therefore we need to call the native CUDA function cudaFreeHost to free memory. * Due to the renaming of cuda functions with cupla via macros we need to remove @@ -85,137 +85,139 @@ class MappedBufferIntern : public DeviceBuffer * https://github.com/ComputationalRadiationPhysics/alpaka/issues/296 * https://github.com/ComputationalRadiationPhysics/alpaka/issues/612 */ -# undef cudaFreeHost - CUDA_CHECK((cuplaError_t)cudaFreeHost(pointer)); +# undef cudaFreeHost + CUDA_CHECK((cuplaError_t) cudaFreeHost(pointer)); // re-introduce the cupla macro -# define cudaFreeHost(...) cuplaFreeHost(__VA_ARGS__) +# define cudaFreeHost(...) cuplaFreeHost(__VA_ARGS__) #else - __deleteArray(pointer); + __deleteArray(pointer); #endif + } } - } - /*! Get unchanged device pointer of memory - * @return device pointer to memory - */ - TYPE* getBasePointer() - { - __startOperation(ITask::TASK_HOST); - return (TYPE*) this->getCudaPitched().ptr; - } + /*! Get unchanged device pointer of memory + * @return device pointer to memory + */ + TYPE* getBasePointer() + { + __startOperation(ITask::TASK_HOST); + return (TYPE*) this->getCudaPitched().ptr; + } - /*! Get device pointer of memory - * - * This pointer is shifted by the offset, if this buffer points to other - * existing buffer - * - * @return device pointer to memory - */ - TYPE* getPointer() - { - __startOperation(ITask::TASK_HOST); - return (TYPE*) this->getCudaPitched().ptr; - } + /*! Get device pointer of memory + * + * This pointer is shifted by the offset, if this buffer points to other + * existing buffer + * + * @return device pointer to memory + */ + TYPE* getPointer() + { + __startOperation(ITask::TASK_HOST); + return (TYPE*) this->getCudaPitched().ptr; + } - void copyFrom(HostBuffer& other) - { - PMACC_ASSERT(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); - Environment<>::get().Factory().createTaskCopyHostToDevice(other, *this); - } + void copyFrom(HostBuffer& other) + { + PMACC_ASSERT(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); + Environment<>::get().Factory().createTaskCopyHostToDevice(other, *this); + } - void copyFrom(DeviceBuffer& other) - { - PMACC_ASSERT(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); - Environment<>::get().Factory().createTaskCopyDeviceToDevice(other, *this); - } + void copyFrom(DeviceBuffer& other) + { + PMACC_ASSERT(this->isMyDataSpaceGreaterThan(other.getCurrentDataSpace())); + Environment<>::get().Factory().createTaskCopyDeviceToDevice(other, *this); + } - void reset(bool preserveData = true) - { - __startOperation(ITask::TASK_HOST); - this->setCurrentSize(this->getDataSpace().productOfComponents()); - if (!preserveData) - memset(pointer, 0, this->getDataSpace().productOfComponents() * sizeof (TYPE)); - } + void reset(bool preserveData = true) + { + __startOperation(ITask::TASK_HOST); + this->setCurrentSize(this->getDataSpace().productOfComponents()); + if(!preserveData) + memset(pointer, 0, this->getDataSpace().productOfComponents() * sizeof(TYPE)); + } - void setValue(const TYPE& value) - { - __startOperation(ITask::TASK_HOST); - size_t current_size = this->getCurrentSize(); - for (size_t i = 0; i < current_size; i++) + void setValue(const TYPE& value) { - pointer[i] = value; + __startOperation(ITask::TASK_HOST); + size_t current_size = this->getCurrentSize(); + for(size_t i = 0; i < current_size; i++) + { + pointer[i] = value; + } } - } - bool hasCurrentSizeOnDevice() const - { - return false; - } + bool hasCurrentSizeOnDevice() const + { + return false; + } - virtual size_t* getCurrentSizeHostSidePointer() - { - return this->current_size; - } + virtual size_t* getCurrentSizeHostSidePointer() + { + return this->current_size; + } - size_t* getCurrentSizeOnDevicePointer() - { - return nullptr; - } + size_t* getCurrentSizeOnDevicePointer() + { + return nullptr; + } - DataSpace getOffset() const - { - return DataSpace(); - } + DataSpace getOffset() const + { + return DataSpace(); + } - void setCurrentSize(const size_t size) - { - Buffer::setCurrentSize(size); - } + void setCurrentSize(const size_t size) + { + Buffer::setCurrentSize(size); + } - const cudaPitchedPtr getCudaPitched() const - { - __startOperation(ITask::TASK_CUDA); - TYPE* dPointer; - cudaHostGetDevicePointer(&dPointer, pointer, 0); + const cuplaPitchedPtr getCudaPitched() const + { + __startOperation(ITask::TASK_DEVICE); + TYPE* dPointer; + cuplaHostGetDevicePointer(&dPointer, pointer, 0); + + /* on 1D memory we have no size for y, therefore we set y to 1 to + * get a valid cuplaPitchedPtr + */ + int size_y = 1; + if(DIM > DIM1) + size_y = this->data_space[1]; + + return make_cuplaPitchedPtr(dPointer, this->data_space.x() * sizeof(TYPE), this->data_space.x(), size_y); + } - /* on 1D memory we have no size for y, therefore we set y to 1 to - * get a valid cudaPitchedPtr - */ - int size_y=1; - if(DIM>DIM1) - size_y= this->data_space[1]; - - return make_cudaPitchedPtr(dPointer, - this->data_space.x() * sizeof (TYPE), - this->data_space.x(), - size_y - ); - } - - size_t getPitch() const - { - return this->data_space.x() * sizeof (TYPE); - } + size_t getPitch() const + { + return this->data_space.x() * sizeof(TYPE); + } - DataBoxType getHostDataBox() - { - __startOperation(ITask::TASK_HOST); - return DataBoxType(PitchedBox (pointer, DataSpace (), - this->data_space, this->data_space[0] * sizeof (TYPE))); - } + DataBoxType getHostDataBox() + { + __startOperation(ITask::TASK_HOST); + return DataBoxType(PitchedBox( + pointer, + DataSpace(), + this->data_space, + this->data_space[0] * sizeof(TYPE))); + } - DataBoxType getDataBox() - { - __startOperation(ITask::TASK_CUDA); - TYPE* dPointer; - cudaHostGetDevicePointer(&dPointer, pointer, 0); - return DataBoxType(PitchedBox (dPointer, DataSpace (), - this->data_space, this->data_space[0] * sizeof (TYPE))); - } - -private: - TYPE* pointer; - bool ownPointer; -}; - -} + DataBoxType getDataBox() + { + __startOperation(ITask::TASK_DEVICE); + TYPE* dPointer; + cuplaHostGetDevicePointer(&dPointer, pointer, 0); + return DataBoxType(PitchedBox( + dPointer, + DataSpace(), + this->data_space, + this->data_space[0] * sizeof(TYPE))); + } + + private: + TYPE* pointer; + bool ownPointer; + }; + +} // namespace pmacc diff --git a/include/pmacc/memory/buffers/MultiGridBuffer.hpp b/include/pmacc/memory/buffers/MultiGridBuffer.hpp index 22f46f41d5..f62dd4b0b3 100644 --- a/include/pmacc/memory/buffers/MultiGridBuffer.hpp +++ b/include/pmacc/memory/buffers/MultiGridBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -37,242 +37,241 @@ namespace pmacc { - -template - struct TypeDescriptionElement -{ - typedef Type_ Type; - static constexpr uint32_t communicationTag = communicationTag_; - static constexpr bool sizeOnDevice = sizeOnDevice_; - - -}; - -/** - * GridBuffer represents a DIM-dimensional buffer which exists on the host as well as on the device. - * - * GridBuffer combines a HostBuffer and a DeviceBuffer with equal sizes. - * Additionally, it allows sending data from and receiving data to these buffers. - * Buffers consist of core data which may be surrounded by border data. - * - * @tparam Type_ datatype for internal Host- and DeviceBuffer - * @tparam DIM dimension of the buffers - * @tparam BufferNames a class with a enum with the name "Names" and member with the name "Count" with number of elements in Names - * etc.: - * struct Mem - * { - * enum Names{VALUE1,VALUE2}; - * static constexpr uint32_t Count=2; - * }; - * @tparam BORDERTYPE optional type for border data in the buffers. TYPE is used by default. - */ -template < -typename Type_, -unsigned DIM, -class BufferNames, -class BORDERTYPE = Type_> -class MultiGridBuffer -{ -public: - - typedef Type_ Type; - typedef DataBox > DataBoxType; - typedef GridBuffer GridBufferType; - typedef typename BufferNames::Names NameType; - - /** - * Constructor. - * - * @param gridLayout layout of the buffers, including border-cells - * @param firstCommunicationTag optional value which can be used to tag ('name') this buffer in communications - * @param sizeOnDevice if true, size information exists on device, too. - */ - MultiGridBuffer(const GridLayout& gridLayout, bool sizeOnDevice = false) : blobDeviceBuffer(nullptr),blobHostBuffer(nullptr) + template + struct TypeDescriptionElement { - init(gridLayout, sizeOnDevice); - } + typedef Type_ Type; + static constexpr uint32_t communicationTag = communicationTag_; + static constexpr bool sizeOnDevice = sizeOnDevice_; + }; /** - * Constructor. + * GridBuffer represents a DIM-dimensional buffer which exists on the host as well as on the device. * - * @param dataSpace DataSpace representing buffer size without border-cells - * @param firstCommunicationTag optional value which can be used to tag ('name') this buffer in communications - * @param sizeOnDevice if true, size information exists on device, too. - */ - MultiGridBuffer(DataSpace& dataSpace, bool sizeOnDevice = false) : blobDeviceBuffer(nullptr),blobHostBuffer(nullptr) - { - init(GridLayout (dataSpace), sizeOnDevice); - } - - /** - * Add Exchange in MultiGridBuffer memory space. + * GridBuffer combines a HostBuffer and a DeviceBuffer with equal sizes. + * Additionally, it allows sending data from and receiving data to these buffers. + * Buffers consist of core data which may be surrounded by border data. * - * An Exchange is added to this MultiGridBuffer. The exchange buffers use - * the same memory as this MultiGridBuffer. - * - * @param dataPlace place where received data are stored [GUARD | BORDER] - * if dataPlace=GUARD than copy other BORDER to my GUARD - * if dataPlace=BORDER than copy other GUARD to my BORDER - * @param receive a Mask which describes the directions for the exchange - * @param guardingCells number of guarding cells in each dimension - * @param firstCommunicationTag a object unique number to connect same objects from different nodes - * (MultiGridBuffer reserves all tags from [firstCommunicationTag;firstCommunicationTag+BufferNames::Count] - * @param sizeOnDevice if true, internal buffers have their size information on the device, too + * @tparam Type_ datatype for internal Host- and DeviceBuffer + * @tparam DIM dimension of the buffers + * @tparam BufferNames a class with a enum with the name "Names" and member with the name "Count" with number of + * elements in Names etc.: struct Mem + * { + * enum Names{VALUE1,VALUE2}; + * static constexpr uint32_t Count=2; + * }; + * @tparam BORDERTYPE optional type for border data in the buffers. TYPE is used by default. */ - void addExchange(uint32_t dataPlace, const Mask &receive, DataSpace guardingCells, uint32_t firstCommunicationTag, bool sizeOnDevice = false) + template + class MultiGridBuffer { - for (uint32_t i = 0; i < BufferNames::Count; ++i) + public: + typedef Type_ Type; + typedef DataBox> DataBoxType; + typedef GridBuffer GridBufferType; + typedef typename BufferNames::Names NameType; + + /** + * Constructor. + * + * @param gridLayout layout of the buffers, including border-cells + * @param firstCommunicationTag optional value which can be used to tag ('name') this buffer in communications + * @param sizeOnDevice if true, size information exists on device, too. + */ + MultiGridBuffer(const GridLayout& gridLayout, bool sizeOnDevice = false) + : blobDeviceBuffer(nullptr) + , blobHostBuffer(nullptr) { - getGridBuffer(static_cast (i)).addExchange(dataPlace, receive, guardingCells, firstCommunicationTag + i, sizeOnDevice); + init(gridLayout, sizeOnDevice); } - } - /** - * Destructor. - */ - virtual ~MultiGridBuffer() - { - for (uint32_t i = 0; i < BufferNames::Count; ++i) + /** + * Constructor. + * + * @param dataSpace DataSpace representing buffer size without border-cells + * @param firstCommunicationTag optional value which can be used to tag ('name') this buffer in communications + * @param sizeOnDevice if true, size information exists on device, too. + */ + MultiGridBuffer(DataSpace& dataSpace, bool sizeOnDevice = false) + : blobDeviceBuffer(nullptr) + , blobHostBuffer(nullptr) { - __delete(gridBuffers[i]); + init(GridLayout(dataSpace), sizeOnDevice); } - __delete(blobDeviceBuffer); - __delete(blobHostBuffer); - } - /** - * Resets both internal buffers. - * - * See DeviceBuffer::reset and HostBuffer::reset for details. - * - * @param preserveData determines if data on internal buffers should not be erased - */ - void reset(bool preserveData = true) - { - for (uint32_t i = 0; i < BufferNames::Count; ++i) + /** + * Add Exchange in MultiGridBuffer memory space. + * + * An Exchange is added to this MultiGridBuffer. The exchange buffers use + * the same memory as this MultiGridBuffer. + * + * @param dataPlace place where received data are stored [GUARD | BORDER] + * if dataPlace=GUARD than copy other BORDER to my GUARD + * if dataPlace=BORDER than copy other GUARD to my BORDER + * @param receive a Mask which describes the directions for the exchange + * @param guardingCells number of guarding cells in each dimension + * @param firstCommunicationTag a object unique number to connect same objects from different nodes + * (MultiGridBuffer reserves all tags from [firstCommunicationTag;firstCommunicationTag+BufferNames::Count] + * @param sizeOnDevice if true, internal buffers have their size information on the device, too + */ + void addExchange( + uint32_t dataPlace, + const Mask& receive, + DataSpace guardingCells, + uint32_t firstCommunicationTag, + bool sizeOnDevice = false) { - getGridBuffer(static_cast (i)).reset(preserveData); + for(uint32_t i = 0; i < BufferNames::Count; ++i) + { + getGridBuffer(static_cast(i)) + .addExchange(dataPlace, receive, guardingCells, firstCommunicationTag + i, sizeOnDevice); + } } - } - /** - * Starts sync data from own device buffer to neighboring device buffer. - * - * Asynchronously starts synchronization of data from internal DeviceBuffer using added - * Exchange buffers. - * - */ - EventTask asyncCommunication(EventTask serialEvent) - { - EventTask ev; - - for (uint32_t i = 0; i < BufferNames::Count; ++i) + /** + * Destructor. + */ + virtual ~MultiGridBuffer() { - ev += getGridBuffer(static_cast (i)).asyncCommunication(serialEvent); + for(uint32_t i = 0; i < BufferNames::Count; ++i) + { + __delete(gridBuffers[i]); + } + __delete(blobDeviceBuffer); + __delete(blobHostBuffer); } - return ev; - } - /** - * Starts sync data from own device buffer to neighboring device buffer. - * - * Asynchronously starts synchronization of data from internal DeviceBuffer using added - * Exchange buffers. - * This operation runs sequentially to other code but uses asynchronous operations internally. - * - */ - EventTask communication() - { - EventTask ev; - EventTask serialEvent = __getTransactionEvent(); - - for (uint32_t i = 0; i < BufferNames::Count; ++i) + /** + * Resets both internal buffers. + * + * See DeviceBuffer::reset and HostBuffer::reset for details. + * + * @param preserveData determines if data on internal buffers should not be erased + */ + void reset(bool preserveData = true) { - ev += getGridBuffer(static_cast (i)).asyncCommunication(serialEvent); + for(uint32_t i = 0; i < BufferNames::Count; ++i) + { + getGridBuffer(static_cast(i)).reset(preserveData); + } } - __setTransactionEvent(ev); - return ev; - } - - /** - * Asynchronously copies data from internal host to internal device buffer. - * - */ - void hostToDevice() - { - for (uint32_t i = 0; i < BufferNames::Count; ++i) + /** + * Starts sync data from own device buffer to neighboring device buffer. + * + * Asynchronously starts synchronization of data from internal DeviceBuffer using added + * Exchange buffers. + * + */ + EventTask asyncCommunication(EventTask serialEvent) { - getGridBuffer(static_cast (i)).hostToDevice(); + EventTask ev; + + for(uint32_t i = 0; i < BufferNames::Count; ++i) + { + ev += getGridBuffer(static_cast(i)).asyncCommunication(serialEvent); + } + return ev; } - } - /** - * Asynchronously copies data from internal device to internal host buffer. - */ - void deviceToHost() - { - for (uint32_t i = 0; i < BufferNames::Count; ++i) + /** + * Starts sync data from own device buffer to neighboring device buffer. + * + * Asynchronously starts synchronization of data from internal DeviceBuffer using added + * Exchange buffers. + * This operation runs sequentially to other code but uses asynchronous operations internally. + * + */ + EventTask communication() { - getGridBuffer(static_cast (i)).deviceToHost(); + EventTask ev; + EventTask serialEvent = __getTransactionEvent(); + + for(uint32_t i = 0; i < BufferNames::Count; ++i) + { + ev += getGridBuffer(static_cast(i)).asyncCommunication(serialEvent); + } + __setTransactionEvent(ev); + return ev; } - } - - GridBuffer& getGridBuffer(typename BufferNames::Names name) - { - PMACC_ASSERT(name >= 0 && name < BufferNames::Count); - return *gridBuffers[name]; - } - - DataBoxType getHostDataBox() - { - __startOperation(ITask::TASK_HOST); - return DataBoxType(MultiBox (getGridBuffer(static_cast (0)).getHostBuffer().getBasePointer(), - DataSpace (), - getGridBuffer(static_cast (0)).getHostBuffer().getPhysicalMemorySize(), - getGridBuffer(static_cast (0)).getHostBuffer().getPhysicalMemorySize().x() * sizeof (Type))); - } - DataBoxType getDeviceDataBox() - { - __startOperation(ITask::TASK_CUDA); - return DataBoxType(MultiBox (getGridBuffer(static_cast (0)).getDeviceBuffer().getBasePointer(), - getGridBuffer(static_cast (0)).getDeviceBuffer().getOffset(), - getGridBuffer(static_cast (0)).getDeviceBuffer().getPhysicalMemorySize(), - getGridBuffer(static_cast (0)).getDeviceBuffer().getCudaPitched().pitch)); - } - -private: - - void init(GridLayout gridLayout, bool sizeOnDevice) - { - DataSpace blobOffset; - blobOffset[DIM - 1] = gridLayout.getDataSpace()[DIM - 1]; - - DataSpace blobSize = gridLayout.getDataSpace() + blobOffset * (BufferNames::Count - 1); - - blobDeviceBuffer = new DeviceBufferIntern (blobSize, false); - blobHostBuffer = new HostBufferIntern (blobSize); - - for (uint32_t i = 0; i < BufferNames::Count; ++i) + /** + * Asynchronously copies data from internal host to internal device buffer. + * + */ + void hostToDevice() { - DataSpace offset = blobOffset*i; - gridBuffers[i] = new GridBuffer ( - *blobHostBuffer, offset, - *blobDeviceBuffer, offset, - gridLayout, sizeOnDevice); + for(uint32_t i = 0; i < BufferNames::Count; ++i) + { + getGridBuffer(static_cast(i)).hostToDevice(); + } } - } + /** + * Asynchronously copies data from internal device to internal host buffer. + */ + void deviceToHost() + { + for(uint32_t i = 0; i < BufferNames::Count; ++i) + { + getGridBuffer(static_cast(i)).deviceToHost(); + } + } + GridBuffer& getGridBuffer(typename BufferNames::Names name) + { + PMACC_ASSERT(name >= 0 && name < BufferNames::Count); + return *gridBuffers[name]; + } -protected: + DataBoxType getHostDataBox() + { + __startOperation(ITask::TASK_HOST); + return DataBoxType(MultiBox( + getGridBuffer(static_cast(0)).getHostBuffer().getBasePointer(), + DataSpace(), + getGridBuffer(static_cast(0)).getHostBuffer().getPhysicalMemorySize(), + getGridBuffer(static_cast(0)).getHostBuffer().getPhysicalMemorySize().x() * sizeof(Type))); + } - DeviceBufferIntern* blobDeviceBuffer; - HostBufferIntern* blobHostBuffer; - GridBufferType* gridBuffers[BufferNames::Count]; + DataBoxType getDeviceDataBox() + { + __startOperation(ITask::TASK_DEVICE); + return DataBoxType(MultiBox( + getGridBuffer(static_cast(0)).getDeviceBuffer().getBasePointer(), + getGridBuffer(static_cast(0)).getDeviceBuffer().getOffset(), + getGridBuffer(static_cast(0)).getDeviceBuffer().getPhysicalMemorySize(), + getGridBuffer(static_cast(0)).getDeviceBuffer().getCudaPitched().pitch)); + } -}; -} + private: + void init(GridLayout gridLayout, bool sizeOnDevice) + { + DataSpace blobOffset; + blobOffset[DIM - 1] = gridLayout.getDataSpace()[DIM - 1]; + + DataSpace blobSize = gridLayout.getDataSpace() + blobOffset * (BufferNames::Count - 1); + + blobDeviceBuffer = new DeviceBufferIntern(blobSize, false); + blobHostBuffer = new HostBufferIntern(blobSize); + + for(uint32_t i = 0; i < BufferNames::Count; ++i) + { + DataSpace offset = blobOffset * i; + gridBuffers[i] = new GridBuffer( + *blobHostBuffer, + offset, + *blobDeviceBuffer, + offset, + gridLayout, + sizeOnDevice); + } + } + protected: + DeviceBufferIntern* blobDeviceBuffer; + HostBufferIntern* blobHostBuffer; + GridBufferType* gridBuffers[BufferNames::Count]; + }; +} // namespace pmacc diff --git a/include/pmacc/memory/dataTypes/Mask.hpp b/include/pmacc/memory/dataTypes/Mask.hpp index 7da82d5693..8c2431579a 100644 --- a/include/pmacc/memory/dataTypes/Mask.hpp +++ b/include/pmacc/memory/dataTypes/Mask.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, Wolfgang Hoenig, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, Wolfgang Hoenig, * Alexander Grund * * This file is part of PMacc. @@ -29,7 +29,6 @@ namespace pmacc { - /** * Mask is used to describe in which directions data must be * sent/received or where a grid node has neighbors. @@ -37,14 +36,12 @@ namespace pmacc class Mask { public: - /** * Constructor. * * Sets this mask to 0 (nothing). */ - Mask() : - bitMask(0u) + Mask() : bitMask(0u) { } @@ -55,8 +52,7 @@ namespace pmacc * * @param ex directions for this mask */ - Mask(ExchangeType ex) : - bitMask(1u << ex) + Mask(ExchangeType ex) : bitMask(1u << ex) { } @@ -67,8 +63,7 @@ namespace pmacc * * @param ex directions for this mask */ - Mask(uint32_t ex) : - bitMask(1u << ex) + Mask(uint32_t ex) : bitMask(1u << ex) { } @@ -90,7 +85,7 @@ namespace pmacc /** * Gives uint32_t value of this mask. */ - Mask & operator=(uint32_t other) + Mask& operator=(uint32_t other) { bitMask = other; return *this; @@ -104,7 +99,7 @@ namespace pmacc * @param other Mask with directions to join * @return the newly created mask */ - Mask operator+(const Mask &other) const + Mask operator+(const Mask& other) const { Mask result; result.bitMask = bitMask | other.bitMask; @@ -119,7 +114,7 @@ namespace pmacc * @param other Mask with directions to intersect with * @return the newly created mask */ - Mask operator&(const Mask &other) const + Mask operator&(const Mask& other) const { Mask result; result.bitMask = bitMask & other.bitMask; @@ -139,18 +134,18 @@ namespace pmacc */ HDINLINE bool containsExchangeType(uint32_t ex) const { - for (uint32_t i = 1; i < 27; i++) //first bit in mask is 1u<= 3) + while(tmp_ex >= 3) { tmp_ex /= 3; tmp /= 3; } - if (tmp % 3 == tmp_ex) + if(tmp % 3 == tmp_ex) return true; } } @@ -182,9 +177,9 @@ namespace pmacc Mask getMirroredMask() const { uint32_t tmp = 0; - for (uint32_t i = 1; i < 27; i++) //first bit in mask is 1u<= traits::NumberOfExchanges::value) + if(ex >= traits::NumberOfExchanges::value) throw std::runtime_error("parameter exceeds allowed maximum"); Mask mask(ex); uint32_t tmp = 0; - if (mask.containsExchangeType(RIGHT)) + if(mask.containsExchangeType(RIGHT)) tmp += LEFT; - if (mask.containsExchangeType(LEFT)) + if(mask.containsExchangeType(LEFT)) tmp += RIGHT; - if (mask.containsExchangeType(BOTTOM)) + if(mask.containsExchangeType(BOTTOM)) tmp += TOP; - if (mask.containsExchangeType(TOP)) + if(mask.containsExchangeType(TOP)) tmp += BOTTOM; - if (mask.containsExchangeType(FRONT)) + if(mask.containsExchangeType(FRONT)) tmp += BACK; - if (mask.containsExchangeType(BACK)) + if(mask.containsExchangeType(BACK)) tmp += FRONT; return (ExchangeType) tmp; @@ -237,11 +232,11 @@ namespace pmacc * @return DataSpace with relative offsets */ template - static HDINLINE DataSpace getRelativeDirections( uint32_t direction) + static HDINLINE DataSpace getRelativeDirections(uint32_t direction) { DataSpace tmp; - for( uint32_t d = 0; d < DIM; ++d ) + for(uint32_t d = 0; d < DIM; ++d) { const int dim_direction(direction % 3); tmp[d] = (dim_direction == 2 ? -1 : dim_direction); @@ -251,12 +246,10 @@ namespace pmacc } protected: - /** * mask which is a combination of the type \see ExchangeType */ uint32_t bitMask; - }; /** special implementation for `DIM1` @@ -264,9 +257,9 @@ namespace pmacc * optimization: no modulo is used */ template<> - HDINLINE DataSpace Mask::getRelativeDirections( uint32_t direction) + HDINLINE DataSpace Mask::getRelativeDirections(uint32_t direction) { - return (direction == 2 ? DataSpace (-1) : DataSpace (direction)); + return (direction == 2 ? DataSpace(-1) : DataSpace(direction)); } -} +} // namespace pmacc diff --git a/include/pmacc/memory/shared/Allocate.hpp b/include/pmacc/memory/shared/Allocate.hpp index d60c5e8b00..a8ef472773 100644 --- a/include/pmacc/memory/shared/Allocate.hpp +++ b/include/pmacc/memory/shared/Allocate.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera * * This file is part of PMacc. * @@ -28,87 +28,60 @@ namespace pmacc { -namespace memory -{ -namespace shared -{ - - /** allocate shared memory - * - * shared memory is always uninitialized - * - * @tparam T_uniqueId unique id for this object - * (is needed if more than one instance of shared memory in one kernel is used) - * @tparam T_Type type of the stored object - */ - template< - uint32_t T_uniqueId, - typename T_Type - > - struct Allocate + namespace memory { - /** get a shared memory - * - * @return reference to shared memory - */ - template< typename T_Acc > - static DINLINE T_Type & - get( T_Acc const & acc ) + namespace shared { - auto& smem = ::alpaka::block::shared::st::allocVar< - T_Type, - T_uniqueId - >( acc ); - return smem; - } - }; + /** allocate shared memory + * + * shared memory is always uninitialized + * + * @tparam T_uniqueId unique id for this object + * (is needed if more than one instance of shared memory in one kernel is used) + * @tparam T_Type type of the stored object + */ + template + struct Allocate + { + /** get a shared memory + * + * @return reference to shared memory + */ + template + static DINLINE T_Type& get(T_Acc const& acc) + { + auto& smem = ::alpaka::declareSharedVar(acc); + return smem; + } + }; - /** allocate shared memory - * - * shared memory is always uninitialized - * - * @tparam T_uniqueId unique id for this object - * (is needed if more than one instance of shared memory in one kernel is used) - * @tparam T_Type type of the stored object - * @return reference to shared memory - * - * @{ - */ - template< - uint32_t T_uniqueId, - typename T_Type, - typename T_Acc - > - DINLINE T_Type& - allocate( T_Acc const & acc ) - { - return Allocate< - T_uniqueId, - T_Type - >::get( acc ); - } + /** allocate shared memory + * + * shared memory is always uninitialized + * + * @tparam T_uniqueId unique id for this object + * (is needed if more than one instance of shared memory in one kernel is used) + * @tparam T_Type type of the stored object + * @return reference to shared memory + * + * @{ + */ + template + DINLINE T_Type& allocate(T_Acc const& acc) + { + return Allocate::get(acc); + } - /* @param instance of the type to store (is not to initialize the shared memory) */ - template< - uint32_t T_uniqueId, - typename T_Type, - typename T_Acc - > - DINLINE T_Type& - allocate( - T_Acc const & acc, - T_Type const & - ) - { - return Allocate< - T_uniqueId, - T_Type - >::get( ); - } - /** @} */ + /* @param instance of the type to store (is not to initialize the shared memory) */ + template + DINLINE T_Type& allocate(T_Acc const& acc, T_Type const&) + { + return Allocate::get(); + } + /** @} */ -} // namespace shared -} // namespace memory + } // namespace shared + } // namespace memory } // namespace pmacc /** allocate shared memory @@ -149,4 +122,4 @@ namespace shared * @param varName name of the variable * @param ... type of the variable */ -#define PMACC_SMEM( acc, varName, ... ) auto & varName = pmacc::memory::shared::allocate< __COUNTER__, __VA_ARGS__ >( acc ) +#define PMACC_SMEM(acc, varName, ...) auto& varName = pmacc::memory::shared::allocate<__COUNTER__, __VA_ARGS__>(acc) diff --git a/include/pmacc/meta/AllCombinations.hpp b/include/pmacc/meta/AllCombinations.hpp index 68879715a9..21399063c0 100644 --- a/include/pmacc/meta/AllCombinations.hpp +++ b/include/pmacc/meta/AllCombinations.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2014-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -38,148 +38,135 @@ namespace pmacc { -namespace bmpl = boost::mpl; + namespace bmpl = boost::mpl; -namespace detail -{ -/** Create tuples out of the elements of N sequences - * - * Combines all elements of N given sequences in T_MplSeq into N-tuples. - * If the number of elements in each sequence is S0, S1, ... S(N-1) - * than the resulting sequence will contain S0 * S1 * ... S(N-1) tuples. - * - * @tparam T_MplSeq sequence of input sequences - * @tparam T_TmpResult temporary result - * @tparam T_isEmpty true if T_MplSeq is empty else false - */ -template, -bool T_isEmpty = bmpl::empty::value -> -struct AllCombinations; - -/** implementation for inner recursive creation - */ -template -struct AllCombinations -{ - typedef T_MplSeq MplSeq; - typedef T_TmpResult TmpResult; - - static constexpr uint32_t rangeVectorSize = bmpl::size::value; - typedef typename bmpl::at > ::type LastElement; - typedef bmpl::empty IsLastElementEmpty; - typedef typename MakeSeq::type LastElementAsSequence; - typedef typename bmpl::pop_back::type ShrinkedRangeVector; - - /* copy last given sequence to a mpl::vector to be sure that we can later on - * call mpl::transform even if the input sequence is mpl::range_c + namespace detail + { + /** Create tuples out of the elements of N sequences + * + * Combines all elements of N given sequences in T_MplSeq into N-tuples. + * If the number of elements in each sequence is S0, S1, ... S(N-1) + * than the resulting sequence will contain S0 * S1 * ... S(N-1) tuples. + * + * @tparam T_MplSeq sequence of input sequences + * @tparam T_TmpResult temporary result + * @tparam T_isEmpty true if T_MplSeq is empty else false + */ + template< + typename T_MplSeq, + typename T_TmpResult = bmpl::vector0<>, + bool T_isEmpty = bmpl::empty::value> + struct AllCombinations; + + /** implementation for inner recursive creation + */ + template + struct AllCombinations + { + typedef T_MplSeq MplSeq; + typedef T_TmpResult TmpResult; + + static constexpr uint32_t rangeVectorSize = bmpl::size::value; + typedef typename bmpl::at>::type LastElement; + typedef bmpl::empty IsLastElementEmpty; + typedef typename MakeSeq::type LastElementAsSequence; + typedef typename bmpl::pop_back::type ShrinkedRangeVector; + + /* copy last given sequence to a mpl::vector to be sure that we can later on + * call mpl::transform even if the input sequence is mpl::range_c + */ + typedef typename bmpl::copy>>::type TmpVector; + + /** Assign to each element in a sequence of CT::Vector(s) a type at a given + * component position + * + * @tparam T_ComponentPos position of the component to be changed (type must be + * bmpl::integral_c) + * @tparam T_Element value (type) which should replace the component at position T_Component + * in the CT::Vector elements + */ + template + struct AssignToAnyElementInVector + { + typedef TmpResult InVector; + typedef T_Element Element; + + typedef typename bmpl:: + transform>::type type; + }; + + typedef typename bmpl::transform< + TmpVector, + AssignToAnyElementInVector, bmpl::_1>>::type NestedSeq; + + typedef typename MakeSeqFromNestedSeq::type OneSeq; + + typedef typename detail::AllCombinations::type ResultIfNotEmpty; + typedef typename bmpl::if_, ResultIfNotEmpty>::type type; + }; + + /** recursive end implementation + */ + template + struct AllCombinations + { + typedef T_TmpResult type; + }; + + } // namespace detail + + + /** Create tuples out of the elements of N sequences + * + * Combines all elements of N given sequences in T_MplSeq into N-tuples. + * If the number of elements in each sequence is S0, S1, ... S(N-1) + * than the resulting sequence will contain S0 * S1 * ... S(N-1) tuples. + * + * example: + * + * sequence == [ ] + * tuple == ( ) + * + * T_MplSeq = [[1,2],[1],[4,3]] + * combined to + * AllCombinations::type = [(1,1,4),(1,1,3),(2,1,4),(2,1,3)] + * + * @tparam T_MplSeq N-dimensional sequence with input values + * or single type (e.g. `bmpl::integral_c`) + * (if `T_MplSeq` is only one type it will be transformed to a sequence) + * @typedef AllCombinations::type + * MplSequence of N-tuples */ - typedef typename bmpl::copy > >::type TmpVector; - - /** Assign to each element in a sequence of CT::Vector(s) a type at a given - * component position - * - * @tparam T_ComponentPos position of the component to be changed (type must be bmpl::integral_c) - * @tparam T_Element value (type) which should replace the component at position T_Component - * in the CT::Vector elements - */ - template< - typename T_ComponentPos, - typename T_Element - > - struct AssignToAnyElementInVector + template + struct AllCombinations { - typedef TmpResult InVector; - typedef T_Element Element; - - typedef typename bmpl::transform< - InVector, - pmacc::math::CT::Assign< - bmpl::_1, - T_ComponentPos, - Element - > - >::type type; - }; - - typedef typename bmpl::transform< - TmpVector, - AssignToAnyElementInVector< - bmpl::integral_c, - bmpl::_1 - > - >::type NestedSeq; + /* if T_MplSeq is no sequence it is a single type, we put this type in + * a sequence because all next algorithms can only work with sequences */ + typedef typename MakeSeq::type MplSeq; - typedef typename MakeSeqFromNestedSeq::type OneSeq; + static constexpr uint32_t rangeVectorSize = bmpl::size::value; + typedef typename bmpl::at>::type LastElement; + typedef bmpl::empty IsLastElementEmpty; + typedef typename MakeSeq::type LastElementAsSequence; - typedef typename detail::AllCombinations::type ResultIfNotEmpty; - typedef typename bmpl::if_,ResultIfNotEmpty>::type type; -}; + typedef typename bmpl::pop_back::type ShrinkedRangeVector; + /* copy last given sequence to a mpl::vector to be sure that we can later on + * call mpl::transform even if the input sequence is mpl::range_c + */ + typedef typename bmpl::copy>>::type TmpVector; -/** recursive end implementation - */ -template -struct AllCombinations -{ - typedef T_TmpResult type; -}; -} //detail - - -/** Create tuples out of the elements of N sequences - * - * Combines all elements of N given sequences in T_MplSeq into N-tuples. - * If the number of elements in each sequence is S0, S1, ... S(N-1) - * than the resulting sequence will contain S0 * S1 * ... S(N-1) tuples. - * - * example: - * - * sequence == [ ] - * tuple == ( ) - * - * T_MplSeq = [[1,2],[1],[4,3]] - * combined to - * AllCombinations::type = [(1,1,4),(1,1,3),(2,1,4),(2,1,3)] - * - * @tparam T_MplSeq N-dimensional sequence with input values - * or single type (e.g. `bmpl::integral_c`) - * (if `T_MplSeq` is only one type it will be transformed to a sequence) - * @typedef AllCombinations::type - * MplSequence of N-tuples - */ -template -struct AllCombinations -{ - /* if T_MplSeq is no sequence it is a single type, we put this type in - * a sequence because all next algorithms can only work with sequences */ - typedef typename MakeSeq::type MplSeq; - - static constexpr uint32_t rangeVectorSize = bmpl::size::value; - typedef typename bmpl::at > ::type LastElement; - typedef bmpl::empty IsLastElementEmpty; - typedef typename MakeSeq::type LastElementAsSequence; - - typedef typename bmpl::pop_back::type ShrinkedRangeVector; - /* copy last given sequence to a mpl::vector to be sure that we can later on - * call mpl::transform even if the input sequence is mpl::range_c - */ - typedef typename bmpl::copy > >::type TmpVector; - - - - /* transform all elements in the vector to math::CT::vector<> */ - typedef math::CT::Vector<> EmptyVector; - typedef typename bmpl::transform< - TmpVector, - pmacc::math::CT::Assign, bmpl::_1> - >::type FirstList; + /* transform all elements in the vector to math::CT::vector<> */ + typedef math::CT::Vector<> EmptyVector; + typedef typename bmpl::transform< + TmpVector, + pmacc::math::CT::Assign, bmpl::_1>>::type + FirstList; - /* result type: MplSequence of N-tuples */ - typedef typename detail::AllCombinations::type ResultIfNotEmpty; - typedef typename bmpl::if_,ResultIfNotEmpty>::type type; -}; + /* result type: MplSequence of N-tuples */ + typedef typename detail::AllCombinations::type ResultIfNotEmpty; + typedef typename bmpl::if_, ResultIfNotEmpty>::type type; + }; -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/meta/ForEach.hpp b/include/pmacc/meta/ForEach.hpp index 72a6468301..2236ab3a4a 100644 --- a/include/pmacc/meta/ForEach.hpp +++ b/include/pmacc/meta/ForEach.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -34,158 +34,114 @@ namespace pmacc { -namespace meta -{ -namespace detail -{ - /** call the functor were itBegin points to - * - * \tparam itBegin iterator to an element in a mpl sequence - * \tparam itEnd iterator to the end of a mpl sequence - * \tparam isEnd true if itBegin == itEnd, else false - */ - template< - typename itBegin, - typename itEnd, - bool isEnd = boost::is_same< - itBegin, - itEnd - >::value - > - struct CallFunctorOfIterator + namespace meta { - typedef typename boost::mpl::next< itBegin >::type nextIt; - typedef typename boost::mpl::deref< itBegin >::type Functor; - typedef CallFunctorOfIterator< - nextIt, - itEnd - > NextCall; - - PMACC_NO_NVCC_HDWARNING - template< typename ... T_Types > - HDINLINE void - operator( )( T_Types && ... ts ) const + namespace detail { - Functor( )( std::forward< T_Types >( ts ) ... ); - NextCall( )( ts ... ); - } - - PMACC_NO_NVCC_HDWARNING - template< typename... T_Types > - HDINLINE void - operator( )( T_Types && ... ts ) - { - Functor( )( std::forward< T_Types >( ts ) ... ); - NextCall( )( ts ... ); - } - }; - - /** Recursion end of ForEach */ - template< - typename itBegin, - typename itEnd> - struct CallFunctorOfIterator< - itBegin, - itEnd, - true - > - { - PMACC_NO_NVCC_HDWARNING - template< typename ... T_Types > - HDINLINE void - operator()( T_Types && ... ) const - { - - } - - PMACC_NO_NVCC_HDWARNING - template< typename ... T_Types > - HDINLINE void - operator()( T_Types && ... ) + /** call the functor were itBegin points to + * + * \tparam itBegin iterator to an element in a mpl sequence + * \tparam itEnd iterator to the end of a mpl sequence + * \tparam isEnd true if itBegin == itEnd, else false + */ + template::value> + struct CallFunctorOfIterator + { + typedef typename boost::mpl::next::type nextIt; + typedef typename boost::mpl::deref::type Functor; + typedef CallFunctorOfIterator NextCall; + + PMACC_NO_NVCC_HDWARNING + template + HDINLINE void operator()(T_Types&&... ts) const + { + Functor()(std::forward(ts)...); + NextCall()(ts...); + } + + PMACC_NO_NVCC_HDWARNING + template + HDINLINE void operator()(T_Types&&... ts) + { + Functor()(std::forward(ts)...); + NextCall()(ts...); + } + }; + + /** Recursion end of ForEach */ + template + struct CallFunctorOfIterator + { + PMACC_NO_NVCC_HDWARNING + template + HDINLINE void operator()(T_Types&&...) const + { + } + + PMACC_NO_NVCC_HDWARNING + template + HDINLINE void operator()(T_Types&&...) + { + } + }; + + } // namespace detail + + /** Compile-Time for each for Boost::MPL Type Lists + * + * \tparam T_MPLSeq A mpl sequence that can be accessed by mpl::begin, mpl::end, mpl::next + * \tparam T_Functor An unary lambda functor with a HDINLINE void operator()(...) method + * _1 is substituted by Accessor's result using boost::mpl::apply with elements from T_MPLSeq. + * The maximum number of parameters for the operator() is limited by + * PMACC_MAX_FUNCTOR_OPERATOR_PARAMS + * \tparam T_Accessor An unary lambda operation + * + * Example: + * MPLSeq = boost::mpl::vector + * Functor = any unary lambda functor + * Accessor = lambda operation identity + * + * definition: F(X) means boost::apply + * + * call: ForEach()(42); + * unrolled code: Functor(Accessor(int))(42); + * Functor(Accessor(float))(42); + */ + template> + struct ForEach { + template + struct ReplacePlaceholder : bmpl::apply1::type> + { + }; - } - }; - -} // namespace detail - - /** Compile-Time for each for Boost::MPL Type Lists - * - * \tparam T_MPLSeq A mpl sequence that can be accessed by mpl::begin, mpl::end, mpl::next - * \tparam T_Functor An unary lambda functor with a HDINLINE void operator()(...) method - * _1 is substituted by Accessor's result using boost::mpl::apply with elements from T_MPLSeq. - * The maximum number of parameters for the operator() is limited by - * PMACC_MAX_FUNCTOR_OPERATOR_PARAMS - * \tparam T_Accessor An unary lambda operation - * - * Example: - * MPLSeq = boost::mpl::vector - * Functor = any unary lambda functor - * Accessor = lambda operation identity - * - * definition: F(X) means boost::apply - * - * call: ForEach()(42); - * unrolled code: Functor(Accessor(int))(42); - * Functor(Accessor(float))(42); - */ - template< - typename T_MPLSeq, - typename T_Functor, - typename T_Accessor = meta::accessors::Identity< > - > - struct ForEach - { - - template< typename X > - struct ReplacePlaceholder : bmpl::apply1< - T_Functor, - typename bmpl::apply1< - T_Accessor, - X - >::type - > - { - }; + typedef typename bmpl::transform>::type SolvedFunctors; - typedef typename bmpl::transform< - T_MPLSeq, - ReplacePlaceholder< bmpl::_1 > - >::type SolvedFunctors; + typedef typename boost::mpl::begin::type begin; + typedef typename boost::mpl::end::type end; - typedef typename boost::mpl::begin< SolvedFunctors >::type begin; - typedef typename boost::mpl::end< SolvedFunctors >::type end; + typedef detail::CallFunctorOfIterator NextCall; - typedef detail::CallFunctorOfIterator< - begin, - end - > NextCall; + /* this functor does nothing */ + typedef detail::CallFunctorOfIterator Functor; - /* this functor does nothing */ - typedef detail::CallFunctorOfIterator< - end, - end - > Functor; + PMACC_NO_NVCC_HDWARNING + template + HDINLINE void operator()(T_Types&&... ts) const + { + Functor()(std::forward(ts)...); + NextCall()(ts...); + } - PMACC_NO_NVCC_HDWARNING - template< typename ... T_Types > - HDINLINE void - operator( )( T_Types && ... ts ) const - { - Functor()( std::forward< T_Types >( ts ) ... ); - NextCall()( ts ... ); - } - - PMACC_NO_NVCC_HDWARNING - template< typename ... T_Types > - HDINLINE void - operator( )( T_Types && ... ts ) - { - Functor( )( std::forward< T_Types >( ts ) ... ); - NextCall( )( ts ... ); - } - }; + PMACC_NO_NVCC_HDWARNING + template + HDINLINE void operator()(T_Types&&... ts) + { + Functor()(std::forward(ts)...); + NextCall()(ts...); + } + }; -} // namespace meta + } // namespace meta } // namespace pmacc diff --git a/include/pmacc/meta/GetKeyFromAlias.hpp b/include/pmacc/meta/GetKeyFromAlias.hpp index 1a9c5341d0..af52d63867 100644 --- a/include/pmacc/meta/GetKeyFromAlias.hpp +++ b/include/pmacc/meta/GetKeyFromAlias.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Alexander Grund +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Alexander Grund * * This file is part of PMacc. * @@ -33,44 +33,38 @@ namespace pmacc { - -/** - * Returns the key type from an alias - * - * \tparam T_MPLSeq Sequence of keys to search - * \tparam T_Key Key or alias of a key in the sequence - * \tparam T_KeyNotFoundPolicy Binary meta-function that is called like (T_MPLSeq, T_Key) - * when T_Key is not found in the sequence. Default is to return bmpl::void_ - */ -template -> -struct GetKeyFromAlias -{ -private: - typedef T_KeyNotFoundPolicy KeyNotFoundPolicy; - /*create a map where Key is a undeclared alias and value is real type*/ - typedef typename SeqToMap >::type AliasMap; - /*create a map where Key and value is real type*/ - typedef typename SeqToMap >::type KeyMap; - /*combine both maps*/ - typedef bmpl::inserter< KeyMap, bmpl::insert > Map_inserter; - typedef typename bmpl::copy< - AliasMap, - Map_inserter - >::type FullMap; - /* search for given key, - * - we get the real type if key found - * - else we get boost::mpl::void_ + /** + * Returns the key type from an alias + * + * \tparam T_MPLSeq Sequence of keys to search + * \tparam T_Key Key or alias of a key in the sequence + * \tparam T_KeyNotFoundPolicy Binary meta-function that is called like (T_MPLSeq, T_Key) + * when T_Key is not found in the sequence. Default is to return bmpl::void_ */ - typedef typename bmpl::at::type MapType; -public: - /* Check for KeyNotFound and calculate final type. (Uses lazy evaluation) */ - typedef typename bmpl::if_< - boost::is_same, - bmpl::apply, - bmpl::identity >::type::type type; -}; + template> + struct GetKeyFromAlias + { + private: + typedef T_KeyNotFoundPolicy KeyNotFoundPolicy; + /*create a map where Key is a undeclared alias and value is real type*/ + typedef typename SeqToMap>::type AliasMap; + /*create a map where Key and value is real type*/ + typedef typename SeqToMap>::type KeyMap; + /*combine both maps*/ + typedef bmpl::inserter> Map_inserter; + typedef typename bmpl::copy::type FullMap; + /* search for given key, + * - we get the real type if key found + * - else we get boost::mpl::void_ + */ + typedef typename bmpl::at::type MapType; + + public: + /* Check for KeyNotFound and calculate final type. (Uses lazy evaluation) */ + typedef typename bmpl::if_< + boost::is_same, + bmpl::apply, + bmpl::identity>::type::type type; + }; -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/meta/String.hpp b/include/pmacc/meta/String.hpp index f0f285e7c8..a70f8f9714 100644 --- a/include/pmacc/meta/String.hpp +++ b/include/pmacc/meta/String.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PMacc. * @@ -26,92 +26,72 @@ namespace pmacc { -namespace meta -{ - /** get character of an C-string - * - * @tparam T_len length of the string - * - * @param cstr input string - * @param idx index of the character - * @return if x < T_len character at index idx, else '0' - */ - template< - int T_len - > - constexpr auto - elem_at( - char const ( & cstr )[ T_len ], - size_t const idx - ) - -> char - { - return idx < T_len ? cstr[ idx ] : 0; - } - - /** compile time string - * - * The size of the instance is 1 byte. - */ - template< char ... T_c > - struct String + namespace meta { - /** get stored string */ - static auto - str() - -> std::string + /** get character of an C-string + * + * @tparam T_len length of the string + * + * @param cstr input string + * @param idx index of the character + * @return if x < T_len character at index idx, else '0' + */ + template + constexpr auto elem_at(char const (&cstr)[T_len], size_t const idx) -> char { - return std::string( - std::array< - char, - sizeof...( T_c ) + 1 - >( { - T_c ..., - // at terminal zero to support empty strings - 0 - } ).data( ) - ); + return idx < T_len ? cstr[idx] : 0; } - }; + + /** compile time string + * + * The size of the instance is 1 byte. + */ + template + struct String + { + /** get stored string */ + static auto str() -> std::string + { + return std::string(std::array({T_c..., + // at terminal zero to support empty strings + 0}) + .data()); + } + }; -#define PMACC_CHAR_AT_N(z, n, name ) pmacc::meta::elem_at< sizeof(name) >( name, n ), +#define PMACC_CHAR_AT_N(z, n, name) pmacc::meta::elem_at(name, n), -/** create a compile time string type - * - * Support strings with up to 64 characters. - * Longer strings are cropped to 64 characters. - * - * usage example: - * @code{.cpp} - * // create an instance of the compile time string - * auto particleName = PMACC_CSTRING( "electrons" ){}; - * // create a C++ type (can be used as template parameter) - * using Electrons = PMACC_CSTRING( "electrons" ); - * @endcode - */ + /** create a compile time string type + * + * Support strings with up to 64 characters. + * Longer strings are cropped to 64 characters. + * + * usage example: + * @code{.cpp} + * // create an instance of the compile time string + * auto particleName = PMACC_CSTRING( "electrons" ){}; + * // create a C++ type (can be used as template parameter) + * using Electrons = PMACC_CSTRING( "electrons" ); + * @endcode + */ -#define PMACC_CSTRING( str ) \ - /* // PMACC_CSTRING("example") is transformed in \ - * pmacc::meta::String< \ - * pmacc::meta::elem_at< sizeof("example") >( sizeof("example", 0 ), \ - * pmacc::meta::elem_at< sizeof("example") >( sizeof("example", 1 ), \ - * ... \ - * pmacc::meta::elem_at< sizeof("example") >( sizeof("example", 63 ), \ - * 0 \ - * > \ - */ \ - pmacc::meta::String< \ - BOOST_PP_REPEAT_FROM_TO( \ - 0, \ - /* support up to 64 charactres */ \ - 64, \ - PMACC_CHAR_AT_N, \ - str \ - ) \ - /* add a end zero because PMACC_CHAR_AT_N end with a comma */ \ - 0 \ - > +#define PMACC_CSTRING(str) \ + /* // PMACC_CSTRING("example") is transformed in \ + * pmacc::meta::String< \ + * pmacc::meta::elem_at< sizeof("example") >( sizeof("example", 0 ), \ + * pmacc::meta::elem_at< sizeof("example") >( sizeof("example", 1 ), \ + * ... \ + * pmacc::meta::elem_at< sizeof("example") >( sizeof("example", 63 ), \ + * 0 \ + * > \ + */ \ + pmacc::meta::String -} // namespace meta + } // namespace meta } // namespace pmacc diff --git a/include/pmacc/meta/accessors/First.hpp b/include/pmacc/meta/accessors/First.hpp index de971482cb..2cf253999c 100644 --- a/include/pmacc/meta/accessors/First.hpp +++ b/include/pmacc/meta/accessors/First.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -26,26 +26,24 @@ namespace pmacc { -namespace meta -{ - -namespace accessors -{ - -/** Get first type of the given type - * - * \tparam T type from which we return the first held type - * - * T must have defined ::first - */ -template -struct First -{ - typedef typename T::first type; -}; - -}//namespace accessors - -}//namespace meta - -}//namespace pmacc + namespace meta + { + namespace accessors + { + /** Get first type of the given type + * + * \tparam T type from which we return the first held type + * + * T must have defined ::first + */ + template + struct First + { + typedef typename T::first type; + }; + + } // namespace accessors + + } // namespace meta + +} // namespace pmacc diff --git a/include/pmacc/meta/accessors/Identity.hpp b/include/pmacc/meta/accessors/Identity.hpp index 12dbbbcee9..f7c1f6c64c 100644 --- a/include/pmacc/meta/accessors/Identity.hpp +++ b/include/pmacc/meta/accessors/Identity.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -27,25 +27,22 @@ namespace pmacc { -namespace meta -{ - -namespace accessors -{ - -/** Get the type of a given type without changes - * - * \tparam T in type - * - */ -template -struct Identity : bmpl::identity -{ - -}; - -}//namespace accessors - -}//namespace meta - -}//namespace pmacc + namespace meta + { + namespace accessors + { + /** Get the type of a given type without changes + * + * \tparam T in type + * + */ + template + struct Identity : bmpl::identity + { + }; + + } // namespace accessors + + } // namespace meta + +} // namespace pmacc diff --git a/include/pmacc/meta/accessors/Second.hpp b/include/pmacc/meta/accessors/Second.hpp index 2b870f4a7a..fa2d1c8b93 100644 --- a/include/pmacc/meta/accessors/Second.hpp +++ b/include/pmacc/meta/accessors/Second.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -26,26 +26,24 @@ namespace pmacc { -namespace meta -{ - -namespace accessors -{ - -/** Get second type of the given type - * - * \tparam T type from which we return the second held type - * - * T must have defined ::second - */ -template -struct Second -{ - typedef typename T::second type; -}; - -}//namespace accessors - -}//namespace meta - -}//namespace pmacc + namespace meta + { + namespace accessors + { + /** Get second type of the given type + * + * \tparam T type from which we return the second held type + * + * T must have defined ::second + */ + template + struct Second + { + typedef typename T::second type; + }; + + } // namespace accessors + + } // namespace meta + +} // namespace pmacc diff --git a/include/pmacc/meta/accessors/Type.hpp b/include/pmacc/meta/accessors/Type.hpp index 0870ceb3f3..8a30460431 100644 --- a/include/pmacc/meta/accessors/Type.hpp +++ b/include/pmacc/meta/accessors/Type.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PMacc. * @@ -27,22 +27,22 @@ namespace pmacc { -namespace meta -{ -namespace accessors -{ - /** Get ::type member of the given type - * - * @tparam T type from which we return the type held in ::type - * - * T must have defined ::type - */ - template< typename T = bmpl::_1 > - struct Type + namespace meta { - using type = typename T::type; - }; + namespace accessors + { + /** Get ::type member of the given type + * + * @tparam T type from which we return the type held in ::type + * + * T must have defined ::type + */ + template + struct Type + { + using type = typename T::type; + }; -} // namespace accessors -} // namespace meta + } // namespace accessors + } // namespace meta } // namespace pmacc diff --git a/include/pmacc/meta/conversion/JoinToSeq.hpp b/include/pmacc/meta/conversion/JoinToSeq.hpp index bbe8bcdd19..fe9ad36fe0 100644 --- a/include/pmacc/meta/conversion/JoinToSeq.hpp +++ b/include/pmacc/meta/conversion/JoinToSeq.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -30,24 +30,21 @@ namespace pmacc { + /** Join both input types to one boost mpl sequence + * + * @tparam T_1 a boost mpl sequence or single type + * @tparam T_2 a boost mpl sequence or single type + */ -/** Join both input types to one boost mpl sequence - * - * @tparam T_1 a boost mpl sequence or single type - * @tparam T_2 a boost mpl sequence or single type - */ + template> + struct JoinToSeq + { + private: + typedef typename ToSeq::type Seq1; + typedef typename ToSeq::type Seq2; -template > -struct JoinToSeq -{ -private: - typedef typename ToSeq::type Seq1; - typedef typename ToSeq::type Seq2; -public: - typedef typename bmpl::copy< - Seq2, - bmpl::back_inserter< Seq1> - >::type type; -}; + public: + typedef typename bmpl::copy>::type type; + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/meta/conversion/MakeSeq.hpp b/include/pmacc/meta/conversion/MakeSeq.hpp index 9724fb5eaf..fd5b9c98b5 100644 --- a/include/pmacc/meta/conversion/MakeSeq.hpp +++ b/include/pmacc/meta/conversion/MakeSeq.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -28,30 +28,27 @@ namespace pmacc { - -/** combine all input types to one sequence - * - * Note: if the input type is a sequence itself, its elements will be unfolded - * and added separately - * - * @tparam T_Args a boost mpl sequence or single type - * - * @code - * using MyType = typename MakeSeq< A, B >::type - * using MyType2 = typename MakeSeq< boost::mpl::vector, C >::type - * @endcode - * - */ -template< typename... T_Args > -struct MakeSeq -{ - typedef typename MakeSeqFromNestedSeq< - bmpl::vector< T_Args... > - >::type type; -}; - -/** short hand definition for @see MakeSeq<> */ -template< typename... T_Args > -using MakeSeq_t = typename MakeSeq< T_Args... >::type; - -} //namespace pmacc + /** combine all input types to one sequence + * + * Note: if the input type is a sequence itself, its elements will be unfolded + * and added separately + * + * @tparam T_Args a boost mpl sequence or single type + * + * @code + * using MyType = typename MakeSeq< A, B >::type + * using MyType2 = typename MakeSeq< boost::mpl::vector, C >::type + * @endcode + * + */ + template + struct MakeSeq + { + typedef typename MakeSeqFromNestedSeq>::type type; + }; + + /** short hand definition for @see MakeSeq<> */ + template + using MakeSeq_t = typename MakeSeq::type; + +} // namespace pmacc diff --git a/include/pmacc/meta/conversion/MakeSeqFromNestedSeq.hpp b/include/pmacc/meta/conversion/MakeSeqFromNestedSeq.hpp index 4bcc3431d3..2d15997fb7 100644 --- a/include/pmacc/meta/conversion/MakeSeqFromNestedSeq.hpp +++ b/include/pmacc/meta/conversion/MakeSeqFromNestedSeq.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -29,26 +29,21 @@ namespace pmacc { + /** combine all elements of the input type to a single vector + * + * If elements of the input sequence are a sequence themself, all of their + * elements will be added to the resulting sequence + * + * @tparam T_In a boost mpl sequence or single type + */ + template + struct MakeSeqFromNestedSeq + { + private: + typedef typename ToSeq::type Seq; -/** combine all elements of the input type to a single vector - * - * If elements of the input sequence are a sequence themself, all of their - * elements will be added to the resulting sequence - * - * @tparam T_In a boost mpl sequence or single type - */ -template -struct MakeSeqFromNestedSeq -{ -private: - typedef typename ToSeq::type Seq; - -public: - typedef typename bmpl::fold< - Seq, - bmpl::vector0<>, - JoinToSeq - >::type type; -}; + public: + typedef typename bmpl::fold, JoinToSeq>::type type; + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/meta/conversion/OperateOnSeq.hpp b/include/pmacc/meta/conversion/OperateOnSeq.hpp index 565d07564a..ccc7a308b7 100644 --- a/include/pmacc/meta/conversion/OperateOnSeq.hpp +++ b/include/pmacc/meta/conversion/OperateOnSeq.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PMacc. * @@ -34,35 +34,26 @@ namespace pmacc { - -/** run an unary operator on each element of a sequence - * - * @tparam T_MPLSeq any boost mpl sequence - * @tparam T_UnaryOperator unary operator to translate type from the sequence - * to a mpl pair - * @tparam T_Accessor an unary lambda operator that is used before the type - * from the sequence is passed to T_UnaryOperator - * @return ::type bmpl::vector - */ -template -> -struct OperateOnSeq -{ - - template - struct Op :bmpl::apply1::type > + /** run an unary operator on each element of a sequence + * + * @tparam T_MPLSeq any boost mpl sequence + * @tparam T_UnaryOperator unary operator to translate type from the sequence + * to a mpl pair + * @tparam T_Accessor an unary lambda operator that is used before the type + * from the sequence is passed to T_UnaryOperator + * @return ::type bmpl::vector + */ + template> + struct OperateOnSeq { + template + struct Op : bmpl::apply1::type> + { + }; + + typedef T_MPLSeq MPLSeq; + typedef bmpl::back_inserter> Inserter; + typedef typename bmpl::transform, Inserter>::type type; }; - typedef T_MPLSeq MPLSeq; - typedef bmpl::back_inserter< bmpl::vector<> > Inserter; - typedef typename bmpl::transform< - MPLSeq, - Op, - Inserter - >::type type; -}; - -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/meta/conversion/RemoveFromSeq.hpp b/include/pmacc/meta/conversion/RemoveFromSeq.hpp index 8f63d88dc3..8f16641137 100644 --- a/include/pmacc/meta/conversion/RemoveFromSeq.hpp +++ b/include/pmacc/meta/conversion/RemoveFromSeq.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -30,25 +30,21 @@ namespace pmacc { - -/* remove types from a sequence - * - * @tparam T_MPLSeqSrc source sequence from were we delete types - * @tparam T_MPLSeqObjectsToRemove sequence with types which shuld be deleted - */ -template< -typename T_MPLSeqSrc, -typename T_MPLSeqObjectsToRemove -> -struct RemoveFromSeq -{ - template - struct hasId + /* remove types from a sequence + * + * @tparam T_MPLSeqSrc source sequence from were we delete types + * @tparam T_MPLSeqObjectsToRemove sequence with types which shuld be deleted + */ + template + struct RemoveFromSeq { - typedef bmpl::contains type; - }; + template + struct hasId + { + typedef bmpl::contains type; + }; - typedef typename bmpl::remove_if< T_MPLSeqSrc, hasId >::type type; -}; + typedef typename bmpl::remove_if>::type type; + }; -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/meta/conversion/ResolveAliases.hpp b/include/pmacc/meta/conversion/ResolveAliases.hpp index 7586c218c4..3db857941b 100644 --- a/include/pmacc/meta/conversion/ResolveAliases.hpp +++ b/include/pmacc/meta/conversion/ResolveAliases.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Alexander Grund +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Alexander Grund * * This file is part of PMacc. * @@ -32,37 +32,32 @@ namespace pmacc { - -/** Translate all pmacc alias types to full specialized types - * - * Use lookup sequence to translate types - * The policy is used if the type from T_MPLSeq is not in T_MPLSeqLookup a compile time error is triggered - * - * @tparam T_MPLSeq source sequence with types to translate - * @tparam T_MPLSeqLookup lookup sequence to translate aliases - */ -template< - typename T_MPLSeq, - typename T_MPLSeqLookup, - typename T_AliasNotFoundPolicy = errorHandlerPolicies::ThrowValueNotFound -> -struct ResolveAliases -{ - typedef T_MPLSeq MPLSeq; - typedef T_MPLSeqLookup MPLSeqLookup; - typedef T_AliasNotFoundPolicy AliasNotFoundPolicy; - typedef bmpl::back_inserter< bmpl::vector<> > Inserter; - - template - struct GetKeyFromAliasAccessor + /** Translate all pmacc alias types to full specialized types + * + * Use lookup sequence to translate types + * The policy is used if the type from T_MPLSeq is not in T_MPLSeqLookup a compile time error is triggered + * + * @tparam T_MPLSeq source sequence with types to translate + * @tparam T_MPLSeqLookup lookup sequence to translate aliases + */ + template< + typename T_MPLSeq, + typename T_MPLSeqLookup, + typename T_AliasNotFoundPolicy = errorHandlerPolicies::ThrowValueNotFound> + struct ResolveAliases { - typedef typename GetKeyFromAlias::type type; + typedef T_MPLSeq MPLSeq; + typedef T_MPLSeqLookup MPLSeqLookup; + typedef T_AliasNotFoundPolicy AliasNotFoundPolicy; + typedef bmpl::back_inserter> Inserter; + + template + struct GetKeyFromAliasAccessor + { + typedef typename GetKeyFromAlias::type type; + }; + + typedef typename bmpl::transform>::type type; }; - typedef typename bmpl::transform< - MPLSeq, - GetKeyFromAliasAccessor - >::type type; -}; - -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/meta/conversion/ResolveAndRemoveFromSeq.hpp b/include/pmacc/meta/conversion/ResolveAndRemoveFromSeq.hpp index 92713a8c25..f181e18b6e 100644 --- a/include/pmacc/meta/conversion/ResolveAndRemoveFromSeq.hpp +++ b/include/pmacc/meta/conversion/ResolveAndRemoveFromSeq.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Alexander Grund +/* Copyright 2014-2021 Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -29,22 +29,19 @@ namespace pmacc { + /** Resolve and remove types from a sequence + * + * @tparam T_MPLSeqSrc source sequence from were we delete types + * @tparam T_MPLSeqObjectsToRemove sequence with types which should be deleted (pmacc aliases are allowed) + */ + template + struct ResolveAndRemoveFromSeq + { + typedef T_MPLSeqSrc MPLSeqSrc; + typedef T_MPLSeqObjectsToRemove MPLSeqObjectsToRemove; + typedef typename ResolveAliases::type + ResolvedSeqWithObjectsToRemove; + typedef typename RemoveFromSeq::type type; + }; -/** Resolve and remove types from a sequence - * - * @tparam T_MPLSeqSrc source sequence from were we delete types - * @tparam T_MPLSeqObjectsToRemove sequence with types which should be deleted (pmacc aliases are allowed) - */ -template< -typename T_MPLSeqSrc, -typename T_MPLSeqObjectsToRemove -> -struct ResolveAndRemoveFromSeq -{ - typedef T_MPLSeqSrc MPLSeqSrc; - typedef T_MPLSeqObjectsToRemove MPLSeqObjectsToRemove; - typedef typename ResolveAliases::type ResolvedSeqWithObjectsToRemove; - typedef typename RemoveFromSeq::type type; -}; - -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/meta/conversion/SeqToMap.hpp b/include/pmacc/meta/conversion/SeqToMap.hpp index a42f9fbd3c..80889b9cc5 100644 --- a/include/pmacc/meta/conversion/SeqToMap.hpp +++ b/include/pmacc/meta/conversion/SeqToMap.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -37,35 +37,26 @@ namespace pmacc { - -/** convert boost mpl sequence to a mpl map - * - * @tparam T_MPLSeq any boost mpl sequence - * @tparam T_UnaryOperator unary operator to translate type from the sequence - * to a mpl pair - * @tparam T_Accessor An unary lambda operator which is used before the type - * from the sequence is passed to T_UnaryOperator - * @return ::type mpl map - */ -template -> -struct SeqToMap -{ - - template - struct Op :bmpl::apply1::type > + /** convert boost mpl sequence to a mpl map + * + * @tparam T_MPLSeq any boost mpl sequence + * @tparam T_UnaryOperator unary operator to translate type from the sequence + * to a mpl pair + * @tparam T_Accessor An unary lambda operator which is used before the type + * from the sequence is passed to T_UnaryOperator + * @return ::type mpl map + */ + template> + struct SeqToMap { + template + struct Op : bmpl::apply1::type> + { + }; + + typedef T_MPLSeq MPLSeq; + typedef bmpl::inserter, bmpl::insert> Map_inserter; + typedef typename bmpl::transform, Map_inserter>::type type; }; - typedef T_MPLSeq MPLSeq; - typedef bmpl::inserter< bmpl::map<>, bmpl::insert > Map_inserter; - typedef typename bmpl::transform< - MPLSeq, - Op , - Map_inserter - >::type type; -}; - -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/meta/conversion/ToSeq.hpp b/include/pmacc/meta/conversion/ToSeq.hpp index d40c324891..dbd397ce68 100644 --- a/include/pmacc/meta/conversion/ToSeq.hpp +++ b/include/pmacc/meta/conversion/ToSeq.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -29,15 +29,14 @@ namespace pmacc { + /** cast type to boost mpl vector + * @return ::type if T_Type is sequence then identity of T_Type + * else boost::mpl::vector + */ + template + struct ToSeq + { + typedef typename bmpl::if_, T_Type, bmpl::vector1>::type type; + }; -/** cast type to boost mpl vector - * @return ::type if T_Type is sequence then identity of T_Type - * else boost::mpl::vector - */ -template -struct ToSeq -{ - typedef typename bmpl::if_,T_Type,bmpl::vector1 >::type type; -}; - -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/meta/conversion/TypeToAliasPair.hpp b/include/pmacc/meta/conversion/TypeToAliasPair.hpp index bc5cc8f7be..e96f558595 100644 --- a/include/pmacc/meta/conversion/TypeToAliasPair.hpp +++ b/include/pmacc/meta/conversion/TypeToAliasPair.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -28,31 +28,27 @@ namespace pmacc { - -/** create boost mpl pair - * - * If T_Type is a pmacc alias than first is set to anonym alias name - * and second is set to T_Type. - * If T_Type is no alias than TypeToPair is used. - * - * @tparam T_Type any type - * @resturn ::type - */ -template -struct TypeToAliasPair -{ - typedef typename TypeToPair::type type; -}; - -/** specialisation if T_Type is a pmacc alias*/ -template class T_Alias,typename T_Type> -struct TypeToAliasPair< T_Alias > -{ - typedef - bmpl::pair< T_Alias , - T_Alias > - type; -}; - - -}//namespace pmacc + /** create boost mpl pair + * + * If T_Type is a pmacc alias than first is set to anonym alias name + * and second is set to T_Type. + * If T_Type is no alias than TypeToPair is used. + * + * @tparam T_Type any type + * @resturn ::type + */ + template + struct TypeToAliasPair + { + typedef typename TypeToPair::type type; + }; + + /** specialisation if T_Type is a pmacc alias*/ + template class T_Alias, typename T_Type> + struct TypeToAliasPair> + { + typedef bmpl::pair, T_Alias> type; + }; + + +} // namespace pmacc diff --git a/include/pmacc/meta/conversion/TypeToPair.hpp b/include/pmacc/meta/conversion/TypeToPair.hpp index 9265d9742e..f2dc965b81 100644 --- a/include/pmacc/meta/conversion/TypeToPair.hpp +++ b/include/pmacc/meta/conversion/TypeToPair.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -27,23 +27,16 @@ namespace pmacc { - - - -/** create boost mpl pair - * - * @tparam T_Type any type - * @resturn ::type boost mpl pair where first and second is set to T_Type - */ -template -struct TypeToPair -{ - typedef - bmpl::pair< T_Type, - T_Type > - type; -}; - - - -}//namespace pmacc + /** create boost mpl pair + * + * @tparam T_Type any type + * @resturn ::type boost mpl pair where first and second is set to T_Type + */ + template + struct TypeToPair + { + typedef bmpl::pair type; + }; + + +} // namespace pmacc diff --git a/include/pmacc/meta/conversion/TypeToPointerPair.hpp b/include/pmacc/meta/conversion/TypeToPointerPair.hpp index c1fe30a994..910a142b52 100644 --- a/include/pmacc/meta/conversion/TypeToPointerPair.hpp +++ b/include/pmacc/meta/conversion/TypeToPointerPair.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -28,48 +28,47 @@ namespace pmacc { + /** Wrapper to use any type as identifier + * + * Wrap a type thus we can call default constructor on every class + * This is needed to support that any type can used as identifier in for math::MapTuple + */ + template + struct TypeAsIdentifier + { + typedef T_Type type; + }; -/** Wrapper to use any type as identifier - * - * Wrap a type thus we can call default constructor on every class - * This is needed to support that any type can used as identifier in for math::MapTuple - */ -template -struct TypeAsIdentifier -{ - typedef T_Type type; -}; + /** Unary functor to wrap any type with TypeAsIdentifier + * + * @tparam T_Type to to wrap + */ + template + struct MakeIdentifier + { + typedef TypeAsIdentifier type; + }; -/** Unary functor to wrap any type with TypeAsIdentifier - * - * @tparam T_Type to to wrap - */ -template -struct MakeIdentifier -{ - typedef TypeAsIdentifier type; -}; + /** Pass through of an already existing Identifier + * + * Avoids double-wrapping of an Identifier + */ + template + struct MakeIdentifier> + { + typedef TypeAsIdentifier type; + }; -/** Pass through of an already existing Identifier - * - * Avoids double-wrapping of an Identifier - */ -template -struct MakeIdentifier > -{ - typedef TypeAsIdentifier type; -}; - -/** create boost mpl pair ,PointerOfType> - * - * @tparam T_Type any type - * @return ::type boost::mpl::pair,PointerOfType> - */ -template -struct TypeToPointerPair -{ - typedef T_Type* TypePtr; - typedef bmpl::pair< typename MakeIdentifier::type , TypePtr > type; -}; + /** create boost mpl pair ,PointerOfType> + * + * @tparam T_Type any type + * @return ::type boost::mpl::pair,PointerOfType> + */ + template + struct TypeToPointerPair + { + typedef T_Type* TypePtr; + typedef bmpl::pair::type, TypePtr> type; + }; -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/meta/errorHandlerPolicies/ReturnType.hpp b/include/pmacc/meta/errorHandlerPolicies/ReturnType.hpp index f3bfaa8b51..d39aac572d 100644 --- a/include/pmacc/meta/errorHandlerPolicies/ReturnType.hpp +++ b/include/pmacc/meta/errorHandlerPolicies/ReturnType.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -25,21 +25,20 @@ namespace pmacc { -namespace errorHandlerPolicies -{ - -/** Returns the given type - * Binary meta function that takes any boost mpl sequence and a type - */ -template -struct ReturnType -{ - template - struct apply + namespace errorHandlerPolicies { - typedef T_ReturnType type; - }; -}; + /** Returns the given type + * Binary meta function that takes any boost mpl sequence and a type + */ + template + struct ReturnType + { + template + struct apply + { + typedef T_ReturnType type; + }; + }; -} // namespace errorHandlerPolicies + } // namespace errorHandlerPolicies } // namespace pmacc diff --git a/include/pmacc/meta/errorHandlerPolicies/ReturnValue.hpp b/include/pmacc/meta/errorHandlerPolicies/ReturnValue.hpp index cefda11bda..88c98581f3 100644 --- a/include/pmacc/meta/errorHandlerPolicies/ReturnValue.hpp +++ b/include/pmacc/meta/errorHandlerPolicies/ReturnValue.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -25,20 +25,19 @@ namespace pmacc { -namespace errorHandlerPolicies -{ - -/** Returns the second parameter (normally the value that the sequence was searched for - * Binary meta function that takes any boost mpl sequence and a type - */ -struct ReturnValue -{ - template - struct apply + namespace errorHandlerPolicies { - typedef T_Value type; - }; -}; + /** Returns the second parameter (normally the value that the sequence was searched for + * Binary meta function that takes any boost mpl sequence and a type + */ + struct ReturnValue + { + template + struct apply + { + typedef T_Value type; + }; + }; -} // namespace errorHandlerPolicies + } // namespace errorHandlerPolicies } // namespace pmacc diff --git a/include/pmacc/meta/errorHandlerPolicies/ThrowValueNotFound.hpp b/include/pmacc/meta/errorHandlerPolicies/ThrowValueNotFound.hpp index fdf14d7d41..649e48c919 100644 --- a/include/pmacc/meta/errorHandlerPolicies/ThrowValueNotFound.hpp +++ b/include/pmacc/meta/errorHandlerPolicies/ThrowValueNotFound.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PMacc. * @@ -26,29 +26,28 @@ namespace pmacc { -namespace errorHandlerPolicies -{ - -/** Throws an assertion that the value was not found in the sequence - * Binary meta function that takes any boost mpl sequence and a type - */ -struct ThrowValueNotFound -{ - template - struct apply + namespace errorHandlerPolicies { - /* The compiler is allowed to evaluate an expression that does not depend on a template parameter - * even if the class is never instantiated. In that case static assert is always - * evaluated (e.g. with clang), this results in an error if the condition is false. - * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html - * - * A workaround is to add a template dependency to the expression. - * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. + /** Throws an assertion that the value was not found in the sequence + * Binary meta function that takes any boost mpl sequence and a type */ - PMACC_CASSERT_MSG_TYPE(value_not_found_in_seq, T_Value, false && ( sizeof(T_MPLSeq) != 0 ) ); - typedef bmpl::void_ type; - }; -}; + struct ThrowValueNotFound + { + template + struct apply + { + /* The compiler is allowed to evaluate an expression that does not depend on a template parameter + * even if the class is never instantiated. In that case static assert is always + * evaluated (e.g. with clang), this results in an error if the condition is false. + * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html + * + * A workaround is to add a template dependency to the expression. + * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. + */ + PMACC_CASSERT_MSG_TYPE(value_not_found_in_seq, T_Value, false && (sizeof(T_MPLSeq) != 0)); + typedef bmpl::void_ type; + }; + }; -} // namespace errorHandlerPolicies + } // namespace errorHandlerPolicies } // namespace pmacc diff --git a/include/pmacc/misc/splitString.hpp b/include/pmacc/misc/splitString.hpp index fe59a0ec3a..dd964d3f03 100644 --- a/include/pmacc/misc/splitString.hpp +++ b/include/pmacc/misc/splitString.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -28,37 +28,26 @@ namespace pmacc { -namespace misc -{ - /** split a string in a vector of strings - * - * Based on Stack Overflow post: - * source: https://stackoverflow.com/a/28142357 - * author: Marcin - * date: Jan 25 '15 - * - * @param input string to split - * @param regex separator between two elements - */ - HINLINE std::vector< std::string > splitString( - std::string const & input, - std::string const & delimiter = "," - ) + namespace misc { - std::regex re( delimiter ); - // passing -1 as the submatch index parameter performs splitting - std::sregex_token_iterator first{ - input.begin(), - input.end(), - re, - -1 - }; - std::sregex_token_iterator last; + /** split a string in a vector of strings + * + * Based on Stack Overflow post: + * source: https://stackoverflow.com/a/28142357 + * author: Marcin + * date: Jan 25 '15 + * + * @param input string to split + * @param regex separator between two elements + */ + HINLINE std::vector splitString(std::string const& input, std::string const& delimiter = ",") + { + std::regex re(delimiter); + // passing -1 as the submatch index parameter performs splitting + std::sregex_token_iterator first{input.begin(), input.end(), re, -1}; + std::sregex_token_iterator last; - return { - first, - last - }; - } -} // namespace misc + return {first, last}; + } + } // namespace misc } // namespace pmacc diff --git a/include/pmacc/mpi/GetMPI_Op.hpp b/include/pmacc/mpi/GetMPI_Op.hpp index 8baf3b789f..5d49369795 100644 --- a/include/pmacc/mpi/GetMPI_Op.hpp +++ b/include/pmacc/mpi/GetMPI_Op.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -28,8 +28,7 @@ namespace pmacc { namespace mpi { - template MPI_Op getMPI_Op(); } -} +} // namespace pmacc diff --git a/include/pmacc/mpi/GetMPI_StructAsArray.hpp b/include/pmacc/mpi/GetMPI_StructAsArray.hpp index a63db93b19..93c5fd830a 100644 --- a/include/pmacc/mpi/GetMPI_StructAsArray.hpp +++ b/include/pmacc/mpi/GetMPI_StructAsArray.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -26,26 +26,23 @@ namespace pmacc { -namespace mpi -{ -namespace def -{ - -template -struct GetMPI_StructAsArray; + namespace mpi + { + namespace def + { + template + struct GetMPI_StructAsArray; -}//namespace intern + } // namespace def -template -pmacc::mpi::MPI_StructAsArray getMPI_StructAsArray() -{ - return def::GetMPI_StructAsArray ()(); -} + template + pmacc::mpi::MPI_StructAsArray getMPI_StructAsArray() + { + return def::GetMPI_StructAsArray()(); + } -} //namespace mpi + } // namespace mpi -}//namespace pmacc +} // namespace pmacc #include "pmacc/mpi/GetMPI_StructAsArray.tpp" - - diff --git a/include/pmacc/mpi/GetMPI_StructAsArray.tpp b/include/pmacc/mpi/GetMPI_StructAsArray.tpp index 1c05c140f0..d4f6c8d836 100644 --- a/include/pmacc/mpi/GetMPI_StructAsArray.tpp +++ b/include/pmacc/mpi/GetMPI_StructAsArray.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Alexander Grund +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Alexander Grund * * This file is part of PMacc. * @@ -25,93 +25,83 @@ namespace pmacc { -namespace mpi -{ -namespace def -{ - -template<> -struct GetMPI_StructAsArray -{ - - MPI_StructAsArray operator()() const - { - return MPI_StructAsArray(MPI_INT, 1); - } -}; - -template<> -struct GetMPI_StructAsArray -{ - - MPI_StructAsArray operator()() const - { - return MPI_StructAsArray(MPI_UNSIGNED, 1); - } -}; - -template<> -struct GetMPI_StructAsArray -{ - - MPI_StructAsArray operator()() const - { - return MPI_StructAsArray(MPI_LONG, 1); - } -}; - -template<> -struct GetMPI_StructAsArray -{ - - MPI_StructAsArray operator()() const - { - return MPI_StructAsArray(MPI_UNSIGNED_LONG, 1); - } -}; - -template<> -struct GetMPI_StructAsArray -{ - - MPI_StructAsArray operator()() const - { - return MPI_StructAsArray(MPI_LONG_LONG, 1); - } -}; - -template<> -struct GetMPI_StructAsArray -{ - - MPI_StructAsArray operator()() const - { - return MPI_StructAsArray(MPI_UNSIGNED_LONG_LONG, 1); - } -}; - -template<> -struct GetMPI_StructAsArray -{ - - MPI_StructAsArray operator()() const + namespace mpi { - return MPI_StructAsArray(MPI_FLOAT, 1); - } -}; - -template<> -struct GetMPI_StructAsArray -{ - - MPI_StructAsArray operator()() const - { - return MPI_StructAsArray(MPI_DOUBLE, 1); - } -}; - -} //namespace def -}//namespace mpi - -}//namespace pmacc - + namespace def + { + template<> + struct GetMPI_StructAsArray + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_INT, 1); + } + }; + + template<> + struct GetMPI_StructAsArray + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_UNSIGNED, 1); + } + }; + + template<> + struct GetMPI_StructAsArray + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_LONG, 1); + } + }; + + template<> + struct GetMPI_StructAsArray + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_UNSIGNED_LONG, 1); + } + }; + + template<> + struct GetMPI_StructAsArray + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_LONG_LONG, 1); + } + }; + + template<> + struct GetMPI_StructAsArray + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_UNSIGNED_LONG_LONG, 1); + } + }; + + template<> + struct GetMPI_StructAsArray + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_FLOAT, 1); + } + }; + + template<> + struct GetMPI_StructAsArray + { + MPI_StructAsArray operator()() const + { + return MPI_StructAsArray(MPI_DOUBLE, 1); + } + }; + + } // namespace def + } // namespace mpi + +} // namespace pmacc diff --git a/include/pmacc/mpi/MPIReduce.hpp b/include/pmacc/mpi/MPIReduce.hpp index 4d05b6a5fe..bc0cd19fae 100644 --- a/include/pmacc/mpi/MPIReduce.hpp +++ b/include/pmacc/mpi/MPIReduce.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -33,166 +33,158 @@ namespace pmacc { -namespace mpi -{ - -/** reduce data over selected mpi ranks */ -struct MPIReduce -{ - - MPIReduce() : mpiRank(-1), numRanks(0), comm(MPI_COMM_NULL), isMPICommInitialized(false) + namespace mpi { + /** reduce data over selected mpi ranks */ + struct MPIReduce + { + MPIReduce() : mpiRank(-1), numRanks(0), comm(MPI_COMM_NULL), isMPICommInitialized(false) + { + } - } + virtual ~MPIReduce() + { + if(isMPICommInitialized) + { + MPI_CHECK_NO_EXCEPT(MPI_Comm_free(&comm)); + } + } - virtual ~MPIReduce() - { - if (isMPICommInitialized) - { - MPI_CHECK_NO_EXCEPT(MPI_Comm_free(&comm)); - } - } - - /* defines if the result of the MPI operation is valid - * - * @tparam MPIMethod type of the reduction method - * @param method used reduction method e.g., - * reduceMethods::AllReduce, reduceMethods::Reduce - * @return if resut of operator() is valid*/ - template - bool hasResult(const MPIMethod & method) - { - if (!isMPICommInitialized) - participate(true); - return method.hasResult(mpiRank); - } - - /** defines if the result of the MPI operation is valid - * - * The reduction method reduceMethods::Reduce is used. - * - * @return if result of operator() is valid - */ - bool hasResult() - { - if (!isMPICommInitialized) - participate(true); - return this->hasResult(::pmacc::mpi::reduceMethods::AllReduce()); - } - - /* Activate participation for reduce algorithm. - * Must called from any mpi process. This function use global blocking mpi calls. - * @param isActive true if mpi rank should be part of reduce operation, else false - */ - void participate(bool isActive) - { - /*free old communicator of init is called again*/ - if (isMPICommInitialized) - { - MPI_CHECK(MPI_Comm_free(&comm)); - mpiRank = -1; - numRanks = 0; - isMPICommInitialized = false; - } - - int countRanks; - MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &countRanks)); - std::vector reduceRank(countRanks); - std::vector groupRanks(countRanks); - MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank)); - - if (!isActive) - mpiRank = -1; - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Allgather(&mpiRank, 1, MPI_INT, &reduceRank[0], 1, MPI_INT, MPI_COMM_WORLD)); - - for (int i = 0; i < countRanks; ++i) - { - if (reduceRank[i] != -1) + /* defines if the result of the MPI operation is valid + * + * @tparam MPIMethod type of the reduction method + * @param method used reduction method e.g., + * reduceMethods::AllReduce, reduceMethods::Reduce + * @return if resut of operator() is valid*/ + template + bool hasResult(const MPIMethod& method) { - groupRanks[numRanks] = reduceRank[i]; - numRanks++ ; + if(!isMPICommInitialized) + participate(true); + return method.hasResult(mpiRank); } - } - MPI_Group group = MPI_GROUP_NULL; - MPI_Group newgroup = MPI_GROUP_NULL; - MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &group)); - MPI_CHECK(MPI_Group_incl(group, numRanks, &groupRanks[0], &newgroup)); + /** defines if the result of the MPI operation is valid + * + * The reduction method reduceMethods::Reduce is used. + * + * @return if result of operator() is valid + */ + bool hasResult() + { + if(!isMPICommInitialized) + participate(true); + return this->hasResult(::pmacc::mpi::reduceMethods::AllReduce()); + } - MPI_CHECK(MPI_Comm_create(MPI_COMM_WORLD, newgroup, &comm)); + /* Activate participation for reduce algorithm. + * Must called from any mpi process. This function use global blocking mpi calls. + * @param isActive true if mpi rank should be part of reduce operation, else false + */ + void participate(bool isActive) + { + /*free old communicator of init is called again*/ + if(isMPICommInitialized) + { + MPI_CHECK(MPI_Comm_free(&comm)); + mpiRank = -1; + numRanks = 0; + isMPICommInitialized = false; + } + + int countRanks; + MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &countRanks)); + std::vector reduceRank(countRanks); + std::vector groupRanks(countRanks); + MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &mpiRank)); + + if(!isActive) + mpiRank = -1; + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather(&mpiRank, 1, MPI_INT, &reduceRank[0], 1, MPI_INT, MPI_COMM_WORLD)); + + for(int i = 0; i < countRanks; ++i) + { + if(reduceRank[i] != -1) + { + groupRanks[numRanks] = reduceRank[i]; + numRanks++; + } + } + + MPI_Group group = MPI_GROUP_NULL; + MPI_Group newgroup = MPI_GROUP_NULL; + MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &group)); + MPI_CHECK(MPI_Group_incl(group, numRanks, &groupRanks[0], &newgroup)); + + MPI_CHECK(MPI_Comm_create(MPI_COMM_WORLD, newgroup, &comm)); + + if(mpiRank != -1) + { + MPI_CHECK(MPI_Comm_rank(comm, &mpiRank)); + isMPICommInitialized = true; + } + MPI_CHECK(MPI_Group_free(&group)); + MPI_CHECK(MPI_Group_free(&newgroup)); + } - if (mpiRank != -1) - { - MPI_CHECK(MPI_Comm_rank(comm, &mpiRank)); - isMPICommInitialized = true; - } - MPI_CHECK(MPI_Group_free(&group)); - MPI_CHECK(MPI_Group_free(&newgroup)); - } - - /* Reduce elements on cpu memory - * call hasResult to see if returned value is valid - * - * @param func binary functor for reduce which takes two arguments, first argument is the source and get the new reduced value. - * Functor must specialize the function getMPI_Op. - * @param dest buffer for result data - * @param src a class or a pointer where the reduce algorithm can access the value by operator [] (one dimension access) - * @param n number of elements to reduce - * @param method mpi method for reduce - * - */ - template - HINLINE void operator()(Functor func, - Type* dest, - Type* src, - const size_t n, - const ReduceMethod method) - { - if (!isMPICommInitialized) - participate(true); - typedef Type ValueType; - - method(func, - dest, - src, - n * ::pmacc::mpi::getMPI_StructAsArray ().sizeMultiplier, - ::pmacc::mpi::getMPI_StructAsArray ().dataType, - ::pmacc::mpi::getMPI_Op (), - comm); - } - - /* Reduce elements on cpu memory - * the default reduce method is allReduce which means that any host get the reduced value back - * - * @param func binary functor for reduce which takes two arguments, first argument is the source and get the new reduced value. - * Functor must specialize the function getMPI_Op. - * @param dest buffer for result data - * @param src a class or a pointer where the reduce algorithm can access the value by operator [] (one dimension access) - * @param n number of elements to reduce - * - * @return reduced value - */ - template - HINLINE void operator()(Functor func, - Type* dest, - Type* src, - const size_t n) - { - if (!isMPICommInitialized) - participate(true); - this->operator ()(func, dest, src, n, ::pmacc::mpi::reduceMethods::AllReduce()); - } + /* Reduce elements on cpu memory + * call hasResult to see if returned value is valid + * + * @param func binary functor for reduce which takes two arguments, first argument is the source and get + * the new reduced value. Functor must specialize the function getMPI_Op. + * @param dest buffer for result data + * @param src a class or a pointer where the reduce algorithm can access the value by operator [] (one + * dimension access) + * @param n number of elements to reduce + * @param method mpi method for reduce + * + */ + template + HINLINE void operator()(Functor func, Type* dest, Type* src, const size_t n, const ReduceMethod method) + { + if(!isMPICommInitialized) + participate(true); + typedef Type ValueType; + + method( + func, + dest, + src, + n * ::pmacc::mpi::getMPI_StructAsArray().sizeMultiplier, + ::pmacc::mpi::getMPI_StructAsArray().dataType, + ::pmacc::mpi::getMPI_Op(), + comm); + } + /* Reduce elements on cpu memory + * the default reduce method is allReduce which means that any host get the reduced value back + * + * @param func binary functor for reduce which takes two arguments, first argument is the source and get + * the new reduced value. Functor must specialize the function getMPI_Op. + * @param dest buffer for result data + * @param src a class or a pointer where the reduce algorithm can access the value by operator [] (one + * dimension access) + * @param n number of elements to reduce + * + * @return reduced value + */ + template + HINLINE void operator()(Functor func, Type* dest, Type* src, const size_t n) + { + if(!isMPICommInitialized) + participate(true); + this->operator()(func, dest, src, n, ::pmacc::mpi::reduceMethods::AllReduce()); + } -private: - MPI_Comm comm; - int mpiRank; - int numRanks; - bool isMPICommInitialized; -}; -} // namespace mpi + private: + MPI_Comm comm; + int mpiRank; + int numRanks; + bool isMPICommInitialized; + }; + } // namespace mpi } // namespace pmacc diff --git a/include/pmacc/mpi/MPI_StructAsArray.hpp b/include/pmacc/mpi/MPI_StructAsArray.hpp index c5af978b04..94c0b59fb3 100644 --- a/include/pmacc/mpi/MPI_StructAsArray.hpp +++ b/include/pmacc/mpi/MPI_StructAsArray.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -31,12 +31,11 @@ namespace pmacc { struct MPI_StructAsArray { - MPI_StructAsArray(MPI_Datatype type, uint32_t factor) : dataType(type), sizeMultiplier(factor) { } MPI_Datatype dataType; uint32_t sizeMultiplier; }; - } -} + } // namespace mpi +} // namespace pmacc diff --git a/include/pmacc/mpi/SeedPerRank.hpp b/include/pmacc/mpi/SeedPerRank.hpp index d4804ab0a1..6e7acf6591 100644 --- a/include/pmacc/mpi/SeedPerRank.hpp +++ b/include/pmacc/mpi/SeedPerRank.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Alexander Grund +/* Copyright 2014-2021 Axel Huebl, Alexander Grund * * This file is part of PMacc. * @@ -28,53 +28,52 @@ namespace pmacc { -namespace mpi -{ - /** Calculate a Seed per Rank - * - * This functor derives a unique seed for each MPI rank (or GPU) from - * a given global seed in a deterministic manner. - * - * \tparam T_DIM Dimensionality of the simulation (1-3 D) - */ - template - struct SeedPerRank + namespace mpi { - /** Functor implementation + /** Calculate a Seed per Rank * - * This method provides a guaranteed unique number per MPI rank - * (or GPU). When a (only locally unique) localSeed parameter is used - * it is furthermore guaranteed that this number does not collide - * with an other seed. + * This functor derives a unique seed for each MPI rank (or GPU) from + * a given global seed in a deterministic manner. * - * \param localSeed Initial seed to vary two identical simulations - * can have been xor'ed with e.g. a unique species id - * to get an unique seed per species - * \return uint32_t seed + * \tparam T_DIM Dimensionality of the simulation (1-3 D) */ - uint32_t - operator()( uint32_t localSeed ) + template + struct SeedPerRank { - auto& gc = pmacc::Environment::get().GridController(); - - uint32_t rank = gc.getGlobalRank( ); - /* We put the rank into the upper bits to allow values which start - * from zero (e.g. cellIdxs, time steps) to be used as additional seed contributors - * Those would then write to the lower bits leaving the upper bits alone - * which still results in globally unique seeds - */ - uint32_t globalUniqueSeed = reverseBits(rank); - /* localSeed often contains a counted number, so we rotate it by some bits to not "destroy" - * the counted rank that is already there. Also it is not reversed to get a different pattern + /** Functor implementation + * + * This method provides a guaranteed unique number per MPI rank + * (or GPU). When a (only locally unique) localSeed parameter is used + * it is furthermore guaranteed that this number does not collide + * with an other seed. + * + * \param localSeed Initial seed to vary two identical simulations + * can have been xor'ed with e.g. a unique species id + * to get an unique seed per species + * \return uint32_t seed */ - localSeed = (localSeed << 16) | (localSeed >> (sizeof(uint32_t) * CHAR_BIT - 16)); - globalUniqueSeed ^= localSeed; - /* For any globally constant localSeed globalUniqueSeed is now guaranteed - * to be globally unique - */ - return globalUniqueSeed; - } - }; + uint32_t operator()(uint32_t localSeed) + { + auto& gc = pmacc::Environment::get().GridController(); + + uint32_t rank = gc.getGlobalRank(); + /* We put the rank into the upper bits to allow values which start + * from zero (e.g. cellIdxs, time steps) to be used as additional seed contributors + * Those would then write to the lower bits leaving the upper bits alone + * which still results in globally unique seeds + */ + uint32_t globalUniqueSeed = reverseBits(rank); + /* localSeed often contains a counted number, so we rotate it by some bits to not "destroy" + * the counted rank that is already there. Also it is not reversed to get a different pattern + */ + localSeed = (localSeed << 16) | (localSeed >> (sizeof(uint32_t) * CHAR_BIT - 16)); + globalUniqueSeed ^= localSeed; + /* For any globally constant localSeed globalUniqueSeed is now guaranteed + * to be globally unique + */ + return globalUniqueSeed; + } + }; -} /* namespace mpi */ -} /* namespace picongpu */ + } /* namespace mpi */ +} // namespace pmacc diff --git a/include/pmacc/mpi/reduceMethods/AllReduce.hpp b/include/pmacc/mpi/reduceMethods/AllReduce.hpp index bbd298f251..4c91bb481b 100644 --- a/include/pmacc/mpi/reduceMethods/AllReduce.hpp +++ b/include/pmacc/mpi/reduceMethods/AllReduce.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,37 +27,35 @@ namespace pmacc { -namespace mpi -{ - -namespace reduceMethods -{ - -struct AllReduce -{ - - HINLINE bool hasResult(int mpiRank) const + namespace mpi { - return mpiRank != -1; - } - - template - HINLINE void operator()(Functor, Type* dest, Type* src, const size_t count, MPI_Datatype type, MPI_Op op, MPI_Comm comm) const - { - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Allreduce((void*) src, - (void*) dest, - count, - type, - op, comm)); - } -}; - -} /*namespace reduceMethods*/ - -} /*namespace mpi*/ + namespace reduceMethods + { + struct AllReduce + { + HINLINE bool hasResult(int mpiRank) const + { + return mpiRank != -1; + } + + template + HINLINE void operator()( + Functor, + Type* dest, + Type* src, + const size_t count, + MPI_Datatype type, + MPI_Op op, + MPI_Comm comm) const + { + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allreduce((void*) src, (void*) dest, count, type, op, comm)); + } + }; + + } /*namespace reduceMethods*/ + + } /*namespace mpi*/ } /*namespace pmacc*/ - - diff --git a/include/pmacc/mpi/reduceMethods/Reduce.hpp b/include/pmacc/mpi/reduceMethods/Reduce.hpp index 542c908fd9..3d8ad894ac 100644 --- a/include/pmacc/mpi/reduceMethods/Reduce.hpp +++ b/include/pmacc/mpi/reduceMethods/Reduce.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,38 +27,36 @@ namespace pmacc { -namespace mpi -{ - -namespace reduceMethods -{ - -struct Reduce -{ - - HINLINE bool hasResult(int mpiRank) const + namespace mpi { - return mpiRank == 0; - } - - template - HINLINE void operator()(Functor, Type* dest, Type* src, const size_t count, MPI_Datatype type, MPI_Op op, MPI_Comm comm) const - { - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - - MPI_CHECK(MPI_Reduce((void*) src, - (void*) dest, - count, - type, - op, 0, comm)); - } -}; - -} /*namespace reduceMethods*/ - -} /*namespace mpi*/ + namespace reduceMethods + { + struct Reduce + { + HINLINE bool hasResult(int mpiRank) const + { + return mpiRank == 0; + } + + template + HINLINE void operator()( + Functor, + Type* dest, + Type* src, + const size_t count, + MPI_Datatype type, + MPI_Op op, + MPI_Comm comm) const + { + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + + MPI_CHECK(MPI_Reduce((void*) src, (void*) dest, count, type, op, 0, comm)); + } + }; + + } /*namespace reduceMethods*/ + + } /*namespace mpi*/ } /*namespace pmacc*/ - - diff --git a/include/pmacc/nvidia/atomic.hpp b/include/pmacc/nvidia/atomic.hpp index 134f7989c6..f7673538dc 100644 --- a/include/pmacc/nvidia/atomic.hpp +++ b/include/pmacc/nvidia/atomic.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Alexander Grund +/* Copyright 2015-2021 Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -21,182 +21,246 @@ #pragma once - #include "pmacc/types.hpp" -#if( PMACC_CUDA_ENABLED == 1 ) -# include "pmacc/nvidia/warp.hpp" -#endif +#include "pmacc/memory/Array.hpp" +#include "pmacc/nvidia/warp.hpp" + +#include +#include + #include + +#include #include namespace pmacc { -namespace nvidia -{ + namespace nvidia + { + namespace detail + { + /** optimized atomic operation without return value + * + * For some backends PMacc is using optimized intrinsics to perform this operation. + * + * @tparam T_Op atomic alpaka operation type + * @tparam T_Acc alpaka accelerator context type + * @tparam T_Type value type + * @tparam T_Hierarchy alpaka hierarchy type of the atomic operation + */ + template + struct AtomicOpNoRet + { + /** perform the atomic operation + * + * @param acc alpaka accelerator context + * @param ptr pointer to destination memory + * @param value input value + * @param hierarchy alpaka hierarchy scope for atomics + */ + DINLINE void operator()( + T_Acc const& acc, + T_Type* ptr, + T_Type const value, + T_Hierarchy const& hierarchy) + { + ::alpaka::atomicOp(acc, ptr, value, hierarchy); + } + }; - namespace detail { +#if(!defined(__CUDA__) && ALPAKA_ACC_GPU_HIP_ENABLED == 1) + /** HIP backend specialization for atomic add + * + * Uses the intrinsic atomicAddNoRet available for AMD gpus only. + * Not compatible with HIP-nvcc. + */ + template + struct AtomicOpNoRet<::alpaka::AtomicAdd, alpaka::AccGpuHipRt, float, T_Hierarchy> + { + DINLINE void operator()( + alpaka::AccGpuHipRt const& acc, + float* ptr, + float const value, + T_Hierarchy const& hierarchy) + { + ::atomicAddNoRet(ptr, value); + } + }; +#endif - template - struct AtomicAllInc - { - template< typename T_Acc, typename T_Hierarchy > - HDINLINE T_Type - operator()(const T_Acc& acc, T_Type* ptr, const T_Hierarchy& hierarchy) + template + struct AtomicAllInc { - return ::alpaka::atomic::atomicOp<::alpaka::atomic::op::Add>(acc, ptr, T_Type(1), hierarchy); - } - }; - -#if PMACC_CUDA_ARCH >= 300 - /** - * Trait that returns whether an optimized version of AtomicAllInc - * exists for Kepler architectures (and up) - */ - template - struct AtomicAllIncIsOptimized - { - enum{ - value = boost::is_same::value || - boost::is_same::value || - boost::is_same::value || - boost::is_same::value || - boost::is_same::value + template + HDINLINE T_Type operator()(const T_Acc& acc, T_Type* ptr, const T_Hierarchy& hierarchy) + { + return ::alpaka::atomicOp<::alpaka::AtomicAdd>(acc, ptr, T_Type(1), hierarchy); + } }; - }; - /** - * AtomicAllInc for Kepler and up - * Defaults to unoptimized version for unsupported types - */ - template::value> - struct AtomicAllIncKepler: public AtomicAllInc - {}; +#if CUPLA_DEVICE_COMPILE == 1 + /** + * Trait that returns whether an optimized version of AtomicAllInc + * exists for Kepler architectures (and up) + */ + template + struct AtomicAllIncIsOptimized + { + enum + { + value = boost::is_same::value || boost::is_same::value + || boost::is_same::value || boost::is_same::value + || boost::is_same::value + }; + }; - /** - * Optimized version - * - * This warp aggregated atomic increment implementation based on nvidia parallel forall example - * http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-optimized-filtering-warp-aggregated-atomics/ - * (author: Andrew Adinetz, date: October 1th, 2014) - * - */ - template - struct AtomicAllIncKepler - { - template< typename T_Acc, typename T_Hierarchy > - HDINLINE T_Type - operator()(const T_Acc& acc,T_Type* ptr, const T_Hierarchy& hierarchy) + /** + * AtomicAllInc for Kepler and up + * Defaults to unoptimized version for unsupported types + */ + template::value> + struct AtomicAllIncKepler : public AtomicAllInc { - /* Get a bitmask with 1 for each thread in the warp, that executes this */ -#if(__CUDACC_VER_MAJOR__ >= 9) - const int mask = __activemask(); -#else - const int mask = __ballot(1); -#endif - /* select the leader */ - const int leader = __ffs(mask) - 1; - T_Type result; - const int laneId = getLaneId(); - /* Get the start value for this warp */ - if (laneId == leader) - result = ::alpaka::atomic::atomicOp<::alpaka::atomic::op::Add>(acc,ptr, static_cast(__popc(mask)), hierarchy); - result = warpBroadcast(result, leader); - /* Add offset per thread */ - return result + static_cast(__popc(mask & ((1 << laneId) - 1))); - } - }; - - /** - * Optimized version for int64. - * As CUDA atomicAdd does not support int64 directly we just cast it - * and call the uint64 implementation - */ - template<> - struct AtomicAllIncKepler - { - template< typename T_Acc, typename T_Hierarchy > - HDINLINE long long int - operator()(const T_Acc& acc, long long int* ptr, const T_Hierarchy&, const T_Hierarchy& hierarchy ) + }; + + /** + * Optimized version + * + * This warp aggregated atomic increment implementation based on nvidia parallel forall example + * http://devblogs.nvidia.com/parallelforall/cuda-pro-tip-optimized-filtering-warp-aggregated-atomics/ + * (author: Andrew Adinetz, date: October 1th, 2014) + * + */ + template + struct AtomicAllIncKepler { - return static_cast( - AtomicAllIncKepler()( + template + HDINLINE T_Type operator()(const T_Acc& acc, T_Type* ptr, const T_Hierarchy& hierarchy) + { + const auto mask = alpaka::warp::activemask(acc); + const auto leader = alpaka::ffs(acc, static_cast>(mask)) - 1; + + T_Type result; + const int laneId = getLaneId(); + /* Get the start value for this warp */ + if(laneId == leader) + result = ::alpaka::atomicOp<::alpaka::AtomicAdd>( acc, - reinterpret_cast(ptr), - hierarchy - ) - ); - } - }; + ptr, + static_cast(alpaka::popcount(acc, mask)), + hierarchy); + result = warpBroadcast(result, leader); + /* Add offset per thread */ + return result + + static_cast( + alpaka::popcount(acc, mask & ((static_cast(1u) << laneId) - 1u))); + } + }; - template - struct AtomicAllInc: public AtomicAllIncKepler - {}; -#endif /* PMACC_CUDA_ARCH >= 300 */ + /** + * Optimized version for int64. + * As CUDA atomicAdd does not support int64 directly we just cast it + * and call the uint64 implementation + */ + template<> + struct AtomicAllIncKepler + { + template + HDINLINE long long int operator()( + const T_Acc& acc, + long long int* ptr, + const T_Hierarchy&, + const T_Hierarchy& hierarchy) + { + return static_cast(AtomicAllIncKepler()( + acc, + reinterpret_cast(ptr), + hierarchy)); + } + }; - } // namespace detail + template + struct AtomicAllInc : public AtomicAllIncKepler + { + }; +#endif // CUPLA_DEVICE_COMPILE == 1 -/** optimized atomic increment - * - * - only optimized if PTX ISA >=3.0 - * - this atomic uses warp aggregation to speedup the operation compared to cuda `atomicInc()` - * - cuda `atomicAdd()` is used if the compute architecture does not support warp aggregation - * - all participate threads must change the same pointer (ptr) else the result is unspecified - * - * @param ptr pointer to memory (must be the same address for all threads in a block) - * - */ -template -HDINLINE -T atomicAllInc(const T_Acc& acc, T *ptr, const T_Hierarchy& hierarchy) -{ - return detail::AtomicAllInc= 300) >()(acc, ptr, hierarchy); -} + } // namespace detail -template -HDINLINE -T atomicAllInc(T *ptr) -{ -#ifdef __CUDA_ARCH__ - return atomicAllInc(alpaka::atomic::AtomicCudaBuiltIn(), ptr, ::alpaka::hierarchy::Grids()); -#else - // assume that we can use the standard library atomics if we are not on gpu - return atomicAllInc(alpaka::atomic::AtomicStdLibLock<16>(), ptr, ::alpaka::hierarchy::Grids()); -#endif -} + /** optimized atomic increment + * + * - only optimized if PTX ISA >=3.0 + * - this atomic uses warp aggregation to speedup the operation compared to cuda `atomicInc()` + * - cuda `atomicAdd()` is used if the compute architecture does not support warp aggregation + * - all participate threads must change the same pointer (ptr) else the result is unspecified + * + * @param ptr pointer to memory (must be the same address for all threads in a block) + * + */ + template + HDINLINE T atomicAllInc(const T_Acc& acc, T* ptr, const T_Hierarchy& hierarchy) + { + return detail::AtomicAllInc= 300 || BOOST_COMP_HIP)>()(acc, ptr, hierarchy); + } -/** optimized atomic value exchange - * - * - only optimized if PTX ISA >=2.0 - * - this atomic uses warp vote function to speedup the operation - * compared to cuda `atomicExch()` - * - cuda `atomicExch()` is used if the compute architecture not supports - * warps vote functions - * - all participate threads must change the same - * pointer (ptr) and set the same value, else the - * result is unspecified - * - * @param ptr pointer to memory (must be the same address for all threads in a block) - * @param value new value (must be the same for all threads in a block) - */ -template -DINLINE void -atomicAllExch(const T_Acc& acc, T_Type* ptr, const T_Type value, const T_Hierarchy& hierarchy) -{ -#if (__CUDA_ARCH__ >= 200) -# if(__CUDACC_VER_MAJOR__ >= 9) - const int mask = __activemask(); -# else - const int mask = __ballot(1); -# endif - // select the leader - const int leader = __ffs(mask) - 1; - // leader does the update - if (getLaneId() == leader) -#endif - ::alpaka::atomic::atomicOp<::alpaka::atomic::op::Exch>(acc, ptr, value, hierarchy); -} + template + HDINLINE T atomicAllInc(T* ptr) + { + /* Dirty hack to call an alpaka accelerator based function. + * Members of the fakeAcc will be uninitialized and must not be accessed. + * + * The id provider for particles is the only code where atomicAllInc is used without an accelerator. + * @todo remove the unsafe faked accelerator + */ + pmacc::memory::Array fakeAcc; + return atomicAllInc(fakeAcc[0], ptr, ::alpaka::hierarchy::Grids()); + } + /** optimized atomic value exchange + * + * - only optimized if PTX ISA >=2.0 + * - this atomic uses warp vote function to speedup the operation + * compared to cuda `atomicExch()` + * - cuda `atomicExch()` is used if the compute architecture not supports + * warps vote functions + * - all participate threads must change the same + * pointer (ptr) and set the same value, else the + * result is unspecified + * + * @param ptr pointer to memory (must be the same address for all threads in a block) + * @param value new value (must be the same for all threads in a block) + */ + template + DINLINE void atomicAllExch(const T_Acc& acc, T_Type* ptr, const T_Type value, const T_Hierarchy& hierarchy) + { + const auto mask = alpaka::warp::activemask(acc); + const auto leader = alpaka::ffs(acc, static_cast>(mask)) - 1; + +#if CUPLA_DEVICE_COMPILE == 1 + if(getLaneId() == leader) +#endif + ::alpaka::atomicOp<::alpaka::AtomicExch>(acc, ptr, value, hierarchy); + } -} //namespace nvidia -} //namespace pmacc + /** optimized atomic operation without return value + * + * Executes an alpaka atomic operation but without giving the old value back. + * For some backends PMacc is using optimized intrinsics to perform this operation. + * + * @tparam T_Op atomic alpaka operation type + * @tparam T_Acc alpaka accelerator context type + * @tparam T_Type value type + * @tparam T_Hierarchy alpaka hierarchy type of the atomic operation + * @param acc alpaka accelerator context + * @param ptr pointer to memory + * @param value input value + * @param hierarchy alpaka hierarchy scope for atomics + */ + template + DINLINE void atomicOpNoRet(T_Acc const& acc, T_Type* ptr, T_Type const value, T_Hierarchy const& hierarchy) + { + return detail::AtomicOpNoRet{}(acc, ptr, value, hierarchy); + } + } // namespace nvidia +} // namespace pmacc diff --git a/include/pmacc/nvidia/functors/Add.hpp b/include/pmacc/nvidia/functors/Add.hpp index ee4b391857..52a0d13b58 100644 --- a/include/pmacc/nvidia/functors/Add.hpp +++ b/include/pmacc/nvidia/functors/Add.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -26,36 +26,36 @@ namespace pmacc { -namespace nvidia -{ -namespace functors -{ - struct Add + namespace nvidia { - template - HDINLINE void operator()(Dst & dst, const Src & src) const + namespace functors { - dst += src; - } + struct Add + { + template + HDINLINE void operator()(Dst& dst, const Src& src) const + { + dst += src; + } - template - HDINLINE void operator()(const T_Acc &, Dst & dst, const Src & src) const - { - dst += src; - } - }; -} // namespace functors -} // namespace nvidia + template + HDINLINE void operator()(const T_Acc&, Dst& dst, const Src& src) const + { + dst += src; + } + }; + } // namespace functors + } // namespace nvidia } // namespace pmacc namespace pmacc { -namespace mpi -{ - template<> - HINLINE MPI_Op getMPI_Op() + namespace mpi { - return MPI_SUM; - } -} // namespace mpi + template<> + HINLINE MPI_Op getMPI_Op() + { + return MPI_SUM; + } + } // namespace mpi } // namespace pmacc diff --git a/include/pmacc/nvidia/functors/Assign.hpp b/include/pmacc/nvidia/functors/Assign.hpp index 796e02c65c..18cc959784 100644 --- a/include/pmacc/nvidia/functors/Assign.hpp +++ b/include/pmacc/nvidia/functors/Assign.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -25,24 +25,24 @@ namespace pmacc { -namespace nvidia -{ -namespace functors -{ - struct Assign + namespace nvidia { - template - HDINLINE void operator()(Dst & dst, const Src & src) const + namespace functors { - dst = src; - } + struct Assign + { + template + HDINLINE void operator()(Dst& dst, const Src& src) const + { + dst = src; + } - template - HDINLINE void operator()(const T_Acc &, Dst & dst, const Src & src) const - { - dst = src; - } - }; -} // namespace functors -} // namespace nvidia + template + HDINLINE void operator()(const T_Acc&, Dst& dst, const Src& src) const + { + dst = src; + } + }; + } // namespace functors + } // namespace nvidia } // namespace pmacc diff --git a/include/pmacc/nvidia/functors/Atomic.hpp b/include/pmacc/nvidia/functors/Atomic.hpp new file mode 100644 index 0000000000..7969fdb1cc --- /dev/null +++ b/include/pmacc/nvidia/functors/Atomic.hpp @@ -0,0 +1,74 @@ +/* Copyright 2020-2021 Rene Widera + * + * This file is part of PMacc. + * + * PMacc is free software: you can redistribute it and/or modify + * it under the terms of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PMacc is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with PMacc. + * If not, see . + */ + +#pragma once + +#include "pmacc/types.hpp" + +#include "pmacc/nvidia/atomic.hpp" + +namespace pmacc +{ + namespace nvidia + { + namespace functors + { + /** Addition of two values + * + * @tparam T_AlpakaOperation alpaka atomic operation [::alpaka::op] + * @tparam T_AlpakaHierarchy alpaka atomic hierarchy [::alpaka::hierarchy] + */ + template + struct Atomic + { + /** Execute generic atomic operation */ + template + HDINLINE void operator()(T_Acc const& acc, T_Dst& dst, T_Src const& src) const + { + atomicOpNoRet(acc, &dst, src, T_AlpakaHierarchy{}); + } + + /** Execute atomic operation for pmacc::math::Vector */ + template< + typename T_Acc, + typename T_Type, + int T_dim, + typename T_DstAccessor, + typename T_DstNavigator, + template + class T_DstStorage, + typename T_SrcAccessor, + typename T_SrcNavigator, + template + class T_SrcStorage> + HDINLINE void operator()( + T_Acc const& acc, + pmacc::math::Vector& dst, + pmacc::math::Vector const& src) const + { + for(int i = 0; i < T_dim; ++i) + atomicOpNoRet(acc, &dst[i], src[i], T_AlpakaHierarchy{}); + } + }; + + } // namespace functors + } // namespace nvidia +} // namespace pmacc diff --git a/include/pmacc/nvidia/functors/Max.hpp b/include/pmacc/nvidia/functors/Max.hpp index 355fd8ddc1..47dd4c5cbd 100644 --- a/include/pmacc/nvidia/functors/Max.hpp +++ b/include/pmacc/nvidia/functors/Max.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -28,36 +28,36 @@ namespace pmacc { -namespace nvidia -{ -namespace functors -{ - struct Max + namespace nvidia { - template - DINLINE void operator()(Dst & dst, const Src & src) const + namespace functors { - dst = algorithms::math::max(dst, src); - } + struct Max + { + template + DINLINE void operator()(Dst& dst, const Src& src) const + { + dst = math::max(dst, src); + } - template - DINLINE void operator()(const T_Acc &, Dst & dst, const Src & src) const - { - dst = algorithms::math::max(dst, src); - } - }; -} // namespace functors -} // namespace nvidia + template + DINLINE void operator()(const T_Acc&, Dst& dst, const Src& src) const + { + dst = math::max(dst, src); + } + }; + } // namespace functors + } // namespace nvidia } // namespace pmacc namespace pmacc { -namespace mpi -{ - template<> - HINLINE MPI_Op getMPI_Op() + namespace mpi { - return MPI_MAX; - } -} // namespace mpi + template<> + HINLINE MPI_Op getMPI_Op() + { + return MPI_MAX; + } + } // namespace mpi } // namespace pmacc diff --git a/include/pmacc/nvidia/functors/Min.hpp b/include/pmacc/nvidia/functors/Min.hpp index 6431843a55..dfec74251e 100644 --- a/include/pmacc/nvidia/functors/Min.hpp +++ b/include/pmacc/nvidia/functors/Min.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -29,36 +29,36 @@ namespace pmacc { -namespace nvidia -{ -namespace functors -{ - struct Min + namespace nvidia { - template - DINLINE void operator()(Dst & dst, const Src & src) const + namespace functors { - dst = algorithms::math::max(dst, src); - } + struct Min + { + template + DINLINE void operator()(Dst& dst, const Src& src) const + { + dst = math::min(dst, src); + } - template - DINLINE void operator()(const T_Acc &, Dst & dst, const Src & src) const - { - dst = algorithms::math::max(dst, src); - } - }; -} // namespace functors -} // namespace nvidia + template + DINLINE void operator()(const T_Acc&, Dst& dst, const Src& src) const + { + dst = math::min(dst, src); + } + }; + } // namespace functors + } // namespace nvidia } // namespace pmacc namespace pmacc { -namespace mpi -{ - template<> - HINLINE MPI_Op getMPI_Op() + namespace mpi { - return MPI_MIN; - } -} // namespace mpi + template<> + HINLINE MPI_Op getMPI_Op() + { + return MPI_MIN; + } + } // namespace mpi } // namespace pmacc diff --git a/include/pmacc/nvidia/functors/Mul.hpp b/include/pmacc/nvidia/functors/Mul.hpp index 8a67aa192b..8ccafd42ee 100644 --- a/include/pmacc/nvidia/functors/Mul.hpp +++ b/include/pmacc/nvidia/functors/Mul.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl +/* Copyright 2014-2021 Axel Huebl * * This file is part of PMacc. * @@ -26,38 +26,36 @@ namespace pmacc { -namespace nvidia -{ -namespace functors -{ - struct Mul + namespace nvidia { - template - HDINLINE void - operator()( Dst& dst, const Src& src ) const + namespace functors { - dst *= src; - } + struct Mul + { + template + HDINLINE void operator()(Dst& dst, const Src& src) const + { + dst *= src; + } - template - HDINLINE void - operator()( const T_Acc &, Dst& dst, const Src& src ) const - { - dst *= src; - } - }; -} // namespace functors -} // namespace nvidia + template + HDINLINE void operator()(const T_Acc&, Dst& dst, const Src& src) const + { + dst *= src; + } + }; + } // namespace functors + } // namespace nvidia } // namespace pmacc namespace pmacc { -namespace mpi -{ - template<> - HINLINE MPI_Op getMPI_Op() + namespace mpi { - return MPI_PROD; - } -} // namespace mpi + template<> + HINLINE MPI_Op getMPI_Op() + { + return MPI_PROD; + } + } // namespace mpi } // namespace pmacc diff --git a/include/pmacc/nvidia/functors/Sub.hpp b/include/pmacc/nvidia/functors/Sub.hpp index 9b62ad0c86..cc0822efe4 100644 --- a/include/pmacc/nvidia/functors/Sub.hpp +++ b/include/pmacc/nvidia/functors/Sub.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl +/* Copyright 2014-2021 Axel Huebl * * This file is part of PMacc. * @@ -25,26 +25,24 @@ namespace pmacc { -namespace nvidia -{ -namespace functors -{ - struct Sub + namespace nvidia { - template - HDINLINE void - operator()( Dst& dst, const Src& src ) const + namespace functors { - dst -= src; - } + struct Sub + { + template + HDINLINE void operator()(Dst& dst, const Src& src) const + { + dst -= src; + } - template - HDINLINE void - operator()( const T_Acc &, Dst& dst, const Src& src ) const - { - dst -= src; - } - }; -} // namespace functors -} // namespace nvidia + template + HDINLINE void operator()(const T_Acc&, Dst& dst, const Src& src) const + { + dst -= src; + } + }; + } // namespace functors + } // namespace nvidia } // namespace pmacc diff --git a/include/pmacc/nvidia/gpuEntryFunction.hpp b/include/pmacc/nvidia/gpuEntryFunction.hpp index e5b44a133a..716f490b97 100644 --- a/include/pmacc/nvidia/gpuEntryFunction.hpp +++ b/include/pmacc/nvidia/gpuEntryFunction.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Felix Rene Widera +/* Copyright 2016-2021 Felix Rene Widera * * This file is part of PMacc. * @@ -28,32 +28,25 @@ namespace pmacc { -namespace nvidia -{ - - /** - * - * @tparam T_KernelFunctor type of the functor for device execution - */ - template< typename T_KernelFunctor > - struct PMaccKernel + namespace nvidia { /** * - * @param acc functor for device execution - * @param args arguments for the functor + * @tparam T_KernelFunctor type of the functor for device execution */ - template< - typename T_Acc, - typename ... T_Args - > - DINLINE void operator()( - T_Acc const acc, - T_Args ... args - ) const + template + struct PMaccKernel { - T_KernelFunctor{}( acc, args ... ); - } - }; -} //namespace nvidia -} //namespace pmacc + /** + * + * @param acc functor for device execution + * @param args arguments for the functor + */ + template + DINLINE void operator()(T_Acc const acc, T_Args... args) const + { + T_KernelFunctor{}(acc, args...); + } + }; + } // namespace nvidia +} // namespace pmacc diff --git a/include/pmacc/nvidia/memory/MemoryInfo.hpp b/include/pmacc/nvidia/memory/MemoryInfo.hpp index c87152f95a..a4af3ee54c 100644 --- a/include/pmacc/nvidia/memory/MemoryInfo.hpp +++ b/include/pmacc/nvidia/memory/MemoryInfo.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Rene Widera * * This file is part of PMacc. * @@ -28,106 +28,99 @@ namespace pmacc { - -namespace nvidia -{ -namespace memory -{ - -/** - * Provides convenience methods for querying memory information. - * Singleton class. - */ -class MemoryInfo -{ -public: - /** - * Returns information about device memory. - * - * @param free amount of free memory in bytes. can be nullptr - * @param total total amount of memory in bytes. can be nullptr. (nullptr by default) - */ - void getMemoryInfo(size_t *free, size_t *total = nullptr) + namespace nvidia { - size_t freeInternal = 0; - size_t totalInternal = 0; - - CUDA_CHECK(cudaMemGetInfo(&freeInternal, &totalInternal)); - - if (free != nullptr) - { - if (reservedMem > freeInternal) - freeInternal = 0; - else - freeInternal -= reservedMem; - - *free = freeInternal; - } - if (total != nullptr) + namespace memory { - if (reservedMem > totalInternal) - totalInternal = 0; - else - totalInternal -= reservedMem; - - *total = totalInternal; - } - } - - /** Returns true if the memory pool is shared by host and device */ - bool isSharedMemoryPool() - { -#if( PMACC_CUDA_ENABLED != 1 ) - return true; + /** + * Provides convenience methods for querying memory information. + * Singleton class. + */ + class MemoryInfo + { + public: + /** + * Returns information about device memory. + * + * @param free amount of free memory in bytes. can be nullptr + * @param total total amount of memory in bytes. can be nullptr. (nullptr by default) + */ + void getMemoryInfo(size_t* free, size_t* total = nullptr) + { + size_t freeInternal = 0; + size_t totalInternal = 0; + + CUDA_CHECK(cuplaMemGetInfo(&freeInternal, &totalInternal)); + + if(free != nullptr) + { + if(reservedMem > freeInternal) + freeInternal = 0; + else + freeInternal -= reservedMem; + + *free = freeInternal; + } + if(total != nullptr) + { + if(reservedMem > totalInternal) + totalInternal = 0; + else + totalInternal -= reservedMem; + + *total = totalInternal; + } + } + + /** Returns true if the memory pool is shared by host and device */ + bool isSharedMemoryPool() + { +#if(PMACC_CUDA_ENABLED != 1 && ALPAKA_ACC_GPU_HIP_ENABLED != 1) + return true; #else - size_t freeInternal = 0; - size_t freeAtStart = 0; + size_t freeInternal = 0; + size_t freeAtStart = 0; - getMemoryInfo(&freeAtStart); + getMemoryInfo(&freeAtStart); - /* alloc 90%, since allocating 100% is a bit risky on a SoC-like device */ - size_t allocSth = size_t( 0.9 * double(freeAtStart) ); - uint8_t* c = new uint8_t[allocSth]; - memset(c, 0, allocSth); + /* alloc 90%, since allocating 100% is a bit risky on a SoC-like device */ + size_t allocSth = size_t(0.9 * double(freeAtStart)); + uint8_t* c = new uint8_t[allocSth]; + memset(c, 0, allocSth); - getMemoryInfo(&freeInternal); - delete [] c; + getMemoryInfo(&freeInternal); + delete[] c; - /* if we allocated 90% of available mem, we should have "lost" more - * than 50% of memory, even with fluctuations from the OS */ - if( double(freeInternal)/double(freeAtStart) < 0.5 ) - return true; + /* if we allocated 90% of available mem, we should have "lost" more + * than 50% of memory, even with fluctuations from the OS */ + if(double(freeInternal) / double(freeAtStart) < 0.5) + return true; - return false; + return false; #endif - } - - void setReservedMemory(size_t reservedMem) - { - this->reservedMem = reservedMem; - } - -protected: - size_t reservedMem; - -private: - - friend struct detail::Environment; - - static MemoryInfo& getInstance() - { - static MemoryInfo instance; - return instance; - } - - MemoryInfo() : - reservedMem(0) - { - - } -}; -} //namespace memory -} //namespace nvidia -} //namespace pmacc - - + } + + void setReservedMemory(size_t reservedMem) + { + this->reservedMem = reservedMem; + } + + protected: + size_t reservedMem; + + private: + friend struct detail::Environment; + + static MemoryInfo& getInstance() + { + static MemoryInfo instance; + return instance; + } + + MemoryInfo() : reservedMem(0) + { + } + }; + } // namespace memory + } // namespace nvidia +} // namespace pmacc diff --git a/include/pmacc/nvidia/reduce/Reduce.hpp b/include/pmacc/nvidia/reduce/Reduce.hpp index 1b3448aef7..4d6fcc0a38 100644 --- a/include/pmacc/nvidia/reduce/Reduce.hpp +++ b/include/pmacc/nvidia/reduce/Reduce.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -37,551 +37,440 @@ namespace pmacc { -namespace nvidia -{ -namespace reduce -{ - -namespace kernel -{ - /** reduce elements within a buffer - * - * @tparam type element type within the buffer - * @tparam T_blockSize minimum number of elements which will be reduced - * within a CUDA block - * @tparam T_numWorkers number of workers - */ - template< - typename Type, - uint32_t T_blockSize, - uint32_t T_numWorkers - > - struct Reduce + namespace nvidia { - - /** reduce buffer - * - * This method can be used to reduce a chunk of an array. - * This method is a **collective** method and needs to be called by all - * threads within a CUDA block. - * - * @tparam T_SrcBuffer type of the buffer - * @tparam T_DestBuffer type of result buffer - * @tparam T_Functor type of the binary functor to reduce two elements to the intermediate buffer - * @tparam T_DestFunctor type of the binary functor to reduce two elements to @destBuffer - * @tparam T_Acc alpaka accelerator type - * - * @param acc alpaka accelerator - * @param srcBuffer a class or a pointer with the `operator[](size_t)` (one dimensional access) - * @param bufferSize number of elements in @p srcBuffer - * @param destBuffer a class or a pointer with the `operator[](size_t)` (one dimensional access), - * number of elements within the buffer must be at least one - * @param func binary functor for reduce which takes two arguments, - * first argument is the source and get the new reduced value. - * @param destFunc binary functor for reduce which takes two arguments, - * first argument is the source and get the new reduced value. - * - * @result void intermediate results are stored in @destBuffer, - * the final result is stored in the first slot of @destBuffer - * if the operator is called with one CUDA block - */ - template< - typename T_SrcBuffer, - typename T_DestBuffer, - typename T_Functor, - typename T_DestFunctor, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_SrcBuffer const & srcBuffer, - uint32_t const bufferSize, - T_DestBuffer destBuffer, - T_Functor func, - T_DestFunctor destFunc - ) const - { - using namespace mappings::threads; - - constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; - - uint32_t const numGlobalVirtualThreadCount = gridDim.x * T_blockSize; - WorkerCfg< numWorkers > workerCfg( workerIdx ); - - sharedMemExtern(s_mem,Type); - - this->operator()( - acc, - workerCfg, - numGlobalVirtualThreadCount, - srcBuffer, - bufferSize, - func, - s_mem, - blockIdx.x - ); - - using MasterOnly = IdxConfig< - 1, - numWorkers - >; - - ForEachIdx< MasterOnly >{ workerIdx }( - [&]( - uint32_t const, - uint32_t const - ) - { - destFunc( - acc, - destBuffer[ blockIdx.x ], - s_mem[ 0 ] - ); - } - ); - } - - /** reduce a buffer - * - * This method can be used to reduce a chunk of an array. - * This method is a **collective** method and needs to be called by all - * threads within a cuda block. - * - * @tparam T_SrcBuffer type of the buffer - * @tparam T_Functor type of the binary functor to reduce two elements - * @tparam T_SharedBuffer type of the shared memory buffer - * @tparam T_WorkerCfg worker configuration type - * @tparam T_Acc alpaka accelerator type - * - * @param acc alpaka accelerator - * @param workerCfg lockstep worker configuration - * @param numReduceThreads Number of threads which working together to reduce the array. - * For a reduction within a block the value must be equal to T_blockSize - * @param srcBuffer a class or a pointer with the `operator[](size_t)` (one dimensional access) - * @param bufferSize number of elements in @p srcBuffer - * @param func binary functor for reduce which takes two arguments, - * first argument is the source and get the new reduced value. - * @param sharedMem shared memory buffer with storage for `linearThreadIdxInBlock` elements, - * buffer must implement `operator[](size_t)` (one dimensional access) - * @param blockIndex index of the cuda block, - * for a global reduce: `blockIdx.x`, - * for a reduce within a block: `0` - * - * @result void the result is stored in the first slot of @p sharedMem - */ - template< - typename T_SrcBuffer, - typename T_Functor, - typename T_SharedBuffer, - typename T_WorkerCfg, - typename T_Acc - > - DINLINE void - operator()( - T_Acc const & acc, - T_WorkerCfg const workerCfg, - size_t const numReduceThreads, - T_SrcBuffer const & srcBuffer, - size_t const bufferSize, - T_Functor const & func, - T_SharedBuffer & sharedMem, - size_t const blockIndex = 0u - ) const + namespace reduce { - using namespace mappings::threads; - - using VirtualWorkerCfg = IdxConfig< - T_blockSize, - T_WorkerCfg::numWorkers - >; - - pmacc::memory::CtxArray< - uint32_t, - VirtualWorkerCfg - > - linearReduceThreadIdxCtx( - workerCfg.getWorkerIdx( ), - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - return blockIndex * T_blockSize + linearIdx; - } - ); - - pmacc::memory::CtxArray< - bool, - VirtualWorkerCfg - > - isActiveCtx( - workerCfg.getWorkerIdx(), - [&]( - uint32_t const, - uint32_t const idx - ) + namespace kernel + { + /** reduce elements within a buffer + * + * @tparam type element type within the buffer + * @tparam T_blockSize minimum number of elements which will be reduced + * within a CUDA block + * @tparam T_numWorkers number of workers + */ + template + struct Reduce { - return linearReduceThreadIdxCtx[ idx ] < bufferSize; - } - ); + /** reduce buffer + * + * This method can be used to reduce a chunk of an array. + * This method is a **collective** method and needs to be called by all + * threads within a CUDA block. + * + * @tparam T_SrcBuffer type of the buffer + * @tparam T_DestBuffer type of result buffer + * @tparam T_Functor type of the binary functor to reduce two elements to the intermediate buffer + * @tparam T_DestFunctor type of the binary functor to reduce two elements to @destBuffer + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param srcBuffer a class or a pointer with the `operator[](size_t)` (one dimensional access) + * @param bufferSize number of elements in @p srcBuffer + * @param destBuffer a class or a pointer with the `operator[](size_t)` (one dimensional access), + * number of elements within the buffer must be at least one + * @param func binary functor for reduce which takes two arguments, + * first argument is the source and get the new reduced value. + * @param destFunc binary functor for reduce which takes two arguments, + * first argument is the source and get the new reduced value. + * + * @result void intermediate results are stored in @destBuffer, + * the final result is stored in the first slot of @destBuffer + * if the operator is called with one CUDA block + */ + template< + typename T_SrcBuffer, + typename T_DestBuffer, + typename T_Functor, + typename T_DestFunctor, + typename T_Acc> + DINLINE void operator()( + T_Acc const& acc, + T_SrcBuffer const& srcBuffer, + uint32_t const bufferSize, + T_DestBuffer destBuffer, + T_Functor func, + T_DestFunctor destFunc) const + { + using namespace mappings::threads; - ForEachIdx< VirtualWorkerCfg > forEachVirtualThread( workerCfg.getWorkerIdx() ); + constexpr uint32_t numWorkers = T_numWorkers; + uint32_t const workerIdx = cupla::threadIdx(acc).x; - forEachVirtualThread( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if( isActiveCtx[ idx ] ) + uint32_t const numGlobalVirtualThreadCount = cupla::gridDim(acc).x * T_blockSize; + WorkerCfg workerCfg(workerIdx); + + sharedMemExtern(s_mem, Type); + + this->operator()( + acc, + workerCfg, + numGlobalVirtualThreadCount, + srcBuffer, + bufferSize, + func, + s_mem, + cupla::blockIdx(acc).x); + + using MasterOnly = IdxConfig<1, numWorkers>; + + ForEachIdx{workerIdx}([&](uint32_t const, uint32_t const) { + destFunc(acc, destBuffer[cupla::blockIdx(acc).x], s_mem[0]); + }); + } + + /** reduce a buffer + * + * This method can be used to reduce a chunk of an array. + * This method is a **collective** method and needs to be called by all + * threads within a cupla block. + * + * @tparam T_SrcBuffer type of the buffer + * @tparam T_Functor type of the binary functor to reduce two elements + * @tparam T_SharedBuffer type of the shared memory buffer + * @tparam T_WorkerCfg worker configuration type + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param workerCfg lockstep worker configuration + * @param numReduceThreads Number of threads which working together to reduce the array. + * For a reduction within a block the value must be equal to T_blockSize + * @param srcBuffer a class or a pointer with the `operator[](size_t)` (one dimensional access) + * @param bufferSize number of elements in @p srcBuffer + * @param func binary functor for reduce which takes two arguments, + * first argument is the source and get the new reduced value. + * @param sharedMem shared memory buffer with storage for `linearThreadIdxInBlock` elements, + * buffer must implement `operator[](size_t)` (one dimensional access) + * @param blockIndex index of the cupla block, + * for a global reduce: `cupla::blockIdx(acc).x`, + * for a reduce within a block: `0` + * + * @result void the result is stored in the first slot of @p sharedMem + */ + template< + typename T_SrcBuffer, + typename T_Functor, + typename T_SharedBuffer, + typename T_WorkerCfg, + typename T_Acc> + DINLINE void operator()( + T_Acc const& acc, + T_WorkerCfg const workerCfg, + size_t const numReduceThreads, + T_SrcBuffer const& srcBuffer, + size_t const bufferSize, + T_Functor const& func, + T_SharedBuffer& sharedMem, + size_t const blockIndex = 0u) const { - /*fill shared mem*/ - Type r_value = srcBuffer[ linearReduceThreadIdxCtx[ idx ] ]; - /*reduce not read global memory to shared*/ - uint32_t i = linearReduceThreadIdxCtx[ idx ] + numReduceThreads; - while( i < bufferSize ) + using namespace mappings::threads; + + using VirtualWorkerCfg = IdxConfig; + + pmacc::memory::CtxArray linearReduceThreadIdxCtx( + workerCfg.getWorkerIdx(), + [&](uint32_t const linearIdx, uint32_t const) { + return blockIndex * T_blockSize + linearIdx; + }); + + pmacc::memory::CtxArray isActiveCtx( + workerCfg.getWorkerIdx(), + [&](uint32_t const, uint32_t const idx) { + return linearReduceThreadIdxCtx[idx] < bufferSize; + }); + + ForEachIdx forEachVirtualThread(workerCfg.getWorkerIdx()); + + forEachVirtualThread([&](uint32_t const linearIdx, uint32_t const idx) { + if(isActiveCtx[idx]) + { + /*fill shared mem*/ + Type r_value = srcBuffer[linearReduceThreadIdxCtx[idx]]; + /*reduce not read global memory to shared*/ + uint32_t i = linearReduceThreadIdxCtx[idx] + numReduceThreads; + while(i < bufferSize) + { + func(acc, r_value, srcBuffer[i]); + i += numReduceThreads; + } + sharedMem[linearIdx] = r_value; + } + }); + + cupla::__syncthreads(acc); + /*now reduce shared memory*/ + uint32_t chunk_count = T_blockSize; + + while(chunk_count != 1u) { - func( - acc, - r_value, - srcBuffer[ i ] - ); - i += numReduceThreads; + /* Half number of chunks (rounded down) */ + uint32_t active_threads = chunk_count / 2u; + + /* New chunks is half number of chunks rounded up for uneven counts + * --> linearThreadIdxInBlock == 0 will reduce the single element for + * an odd number of values at the end + */ + chunk_count = (chunk_count + 1u) / 2u; + + forEachVirtualThread([&](uint32_t const linearIdx, uint32_t const idx) { + isActiveCtx[idx] = (linearReduceThreadIdxCtx[idx] < bufferSize) + && !(linearIdx != 0u && linearIdx >= active_threads); + if(isActiveCtx[idx]) + func(acc, sharedMem[linearIdx], sharedMem[linearIdx + chunk_count]); + + cupla::__syncthreads(acc); + }); } - sharedMem[ linearIdx ] = r_value; } - } - ); - - __syncthreads( ); - /*now reduce shared memory*/ - uint32_t chunk_count = T_blockSize; + }; + } // namespace kernel - while( chunk_count != 1u ) + class Reduce { - /* Half number of chunks (rounded down) */ - uint32_t active_threads = chunk_count / 2u; - - /* New chunks is half number of chunks rounded up for uneven counts - * --> linearThreadIdxInBlock == 0 will reduce the single element for - * an odd number of values at the end + public: + /* Constructor + * Don't create a instance before you have set you cupla device! + * @param byte how many bytes in global gpu memory can reserved for the reduce algorithm + * @param sharedMemByte limit the usage of shared memory per block on gpu */ - chunk_count = ( chunk_count + 1u ) / 2u; + HINLINE Reduce(const uint32_t byte, const uint32_t sharedMemByte = 4 * 1024) + : byte(byte) + , sharedMemByte(sharedMemByte) + , reduceBuffer(nullptr) + { + reduceBuffer = new GridBuffer(DataSpace(byte)); + } - forEachVirtualThread( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) + /* Reduce elements in global gpu memory + * + * @param func binary functor for reduce which takes two arguments, first argument is the source and + * get the new reduced value. Functor must specialize the function getMPI_Op. + * @param src a class or a pointer where the reduce algorithm can access the value by operator [] (one + * dimensional access) + * @param n number of elements to reduce + * + * @return reduced value + */ + template + HINLINE typename traits::GetValueType::ValueType operator()(Functor func, Src src, uint32_t n) + { + /* - the result of a functor can be a reference or a const value + * - it is not allowed to create const or reference memory + * thus we remove `references` and `const` qualifiers */ + typedef typename boost::remove_const< + typename boost::remove_reference::ValueType>::type>::type + Type; + + uint32_t blockcount = optimalThreadsPerBlock(n, sizeof(Type)); + + uint32_t n_buffer = byte / sizeof(Type); + + uint32_t threads = n_buffer * blockcount + * 2; /* x2 is used thus we can use all byte in Buffer, after we calculate threads/2 */ + + + if(threads > n) + threads = n; + Type* dest = (Type*) reduceBuffer->getDeviceBuffer().getBasePointer(); + + uint32_t blocks = threads / 2 / blockcount; + if(blocks == 0) + blocks = 1; + callReduceKernel( + blocks, + blockcount, + blockcount * sizeof(Type), + src, + n, + dest, + func, + pmacc::nvidia::functors::Assign()); + n = blocks; + blockcount = optimalThreadsPerBlock(n, sizeof(Type)); + blocks = n / 2 / blockcount; + if(blocks == 0 && n > 1) + blocks = 1; + + + while(blocks != 0) { - isActiveCtx[ idx ] = ( linearReduceThreadIdxCtx[ idx ] < bufferSize ) && - !( - linearIdx != 0u && - linearIdx >= active_threads - ); - if( isActiveCtx[ idx ] ) - func( - acc, - sharedMem[ linearIdx ], - sharedMem[ linearIdx + chunk_count ] - ); - - __syncthreads(); - } - ); - } - } - }; -} // namespace kernel - - class Reduce - { - public: - - /* Constructor - * Don't create a instance before you have set you cuda device! - * @param byte how many bytes in global gpu memory can reserved for the reduce algorithm - * @param sharedMemByte limit the usage of shared memory per block on gpu - */ - HINLINE Reduce(const uint32_t byte, const uint32_t sharedMemByte = 4 * 1024) : - byte(byte), sharedMemByte(sharedMemByte), reduceBuffer(nullptr) - { - - reduceBuffer = new GridBuffer (DataSpace (byte)); - } - - /* Reduce elements in global gpu memory - * - * @param func binary functor for reduce which takes two arguments, first argument is the source and get the new reduced value. - * Functor must specialize the function getMPI_Op. - * @param src a class or a pointer where the reduce algorithm can access the value by operator [] (one dimensional access) - * @param n number of elements to reduce - * - * @return reduced value - */ - template - HINLINE typename traits::GetValueType::ValueType operator()(Functor func, Src src, uint32_t n) - { - /* - the result of a functor can be a reference or a const value - * - it is not allowed to create const or reference memory - * thus we remove `references` and `const` qualifiers */ - typedef typename boost::remove_const< - typename boost::remove_reference< - typename traits::GetValueType::ValueType - >::type - >::type Type; - - uint32_t blockcount = optimalThreadsPerBlock(n, sizeof (Type)); - - uint32_t n_buffer = byte / sizeof (Type); - - uint32_t threads = n_buffer * blockcount * 2; /* x2 is used thus we can use all byte in Buffer, after we calculate threads/2 */ - - - - if (threads > n) threads = n; - Type* dest = (Type*) reduceBuffer->getDeviceBuffer().getBasePointer(); + if(blocks > 1) + { + uint32_t blockOffset = ceil((double) blocks / blockcount); + uint32_t useBlocks = blocks - blockOffset; + uint32_t problemSize = n - (blockOffset * blockcount); + Type* srcPtr = dest + (blockOffset * blockcount); + + callReduceKernel( + useBlocks, + blockcount, + blockcount * sizeof(Type), + srcPtr, + problemSize, + dest, + func, + func); + blocks = blockOffset * blockcount; + } + else + { + callReduceKernel( + blocks, + blockcount, + blockcount * sizeof(Type), + dest, + n, + dest, + func, + pmacc::nvidia::functors::Assign()); + } - uint32_t blocks = threads / 2 / blockcount; - if (blocks == 0) blocks = 1; - callReduceKernel< Type >(blocks, blockcount, blockcount * sizeof (Type), - src, n, dest, func, pmacc::nvidia::functors::Assign()); - n = blocks; - blockcount = optimalThreadsPerBlock(n, sizeof (Type)); - blocks = n / 2 / blockcount; - if (blocks == 0 && n > 1) blocks = 1; + n = blocks; + blockcount = optimalThreadsPerBlock(n, sizeof(Type)); + blocks = n / 2 / blockcount; + if(blocks == 0 && n > 1) + blocks = 1; + } + reduceBuffer->deviceToHost(); + __getTransactionEvent().waitForFinished(); + return *((Type*) (reduceBuffer->getHostBuffer().getBasePointer())); + } - while (blocks != 0) - { - if (blocks > 1) + virtual ~Reduce() { - uint32_t blockOffset = ceil((double) blocks / blockcount); - uint32_t useBlocks = blocks - blockOffset; - uint32_t problemSize = n - (blockOffset * blockcount); - Type* srcPtr = dest + (blockOffset * blockcount); - - callReduceKernel< Type >(useBlocks, blockcount, blockcount * sizeof (Type), - srcPtr, problemSize, dest, func, func); - blocks = blockOffset*blockcount; + __delete(reduceBuffer); } - else - { - callReduceKernel< Type >(blocks, blockcount, blockcount * sizeof (Type), - dest, n, dest, func, pmacc::nvidia::functors::Assign()); + private: + /* calculate number of threads per block + * @param threads maximal number of threads per block + * @return number of threads per block + */ + HINLINE uint32_t getThreadsPerBlock(uint32_t threads) + { + /// \todo this list is not complete + /// extend it and maybe check for sm_version + /// and add possible threads accordingly. + /// maybe this function should be exported + /// to a more general nvidia class, too. + if(threads >= 512) + return 512; + if(threads >= 256) + return 256; + if(threads >= 128) + return 128; + if(threads >= 64) + return 64; + if(threads >= 32) + return 32; + if(threads >= 16) + return 16; + if(threads >= 8) + return 8; + if(threads >= 4) + return 4; + if(threads >= 2) + return 2; + + return 1; } - n = blocks; - blockcount = optimalThreadsPerBlock(n, sizeof (Type)); - blocks = n / 2 / blockcount; - if (blocks == 0 && n > 1) blocks = 1; - } - - reduceBuffer->deviceToHost(); - __getTransactionEvent().waitForFinished(); - return *((Type*) (reduceBuffer->getHostBuffer().getBasePointer())); - } + /* start the reduce kernel + * + * The minimal number of elements reduced within a CUDA block is chosen at + * compile time. + */ + template + HINLINE void callReduceKernel( + uint32_t blocks, + uint32_t threads, + uint32_t sharedMemSize, + T_Args&&... args) + { + if(threads >= 512u) + { + constexpr uint32_t numWorkers = traits::GetNumWorkers<512u>::value; + PMACC_KERNEL(kernel::Reduce{}) + (blocks, numWorkers, sharedMemSize)(args...); + } + else if(threads >= 256u) + { + constexpr uint32_t numWorkers = traits::GetNumWorkers<256u>::value; + PMACC_KERNEL(kernel::Reduce{}) + (blocks, numWorkers, sharedMemSize)(args...); + } + else if(threads >= 128u) + { + constexpr uint32_t numWorkers = traits::GetNumWorkers<128u>::value; + PMACC_KERNEL(kernel::Reduce{}) + (blocks, numWorkers, sharedMemSize)(args...); + } + else if(threads >= 64u) + { + constexpr uint32_t numWorkers = traits::GetNumWorkers<64u>::value; + PMACC_KERNEL(kernel::Reduce{}) + (blocks, numWorkers, sharedMemSize)(args...); + } + else if(threads >= 32u) + { + constexpr uint32_t numWorkers = traits::GetNumWorkers<32u>::value; + PMACC_KERNEL(kernel::Reduce{}) + (blocks, numWorkers, sharedMemSize)(args...); + } + else if(threads >= 16u) + { + constexpr uint32_t numWorkers = traits::GetNumWorkers<16u>::value; + PMACC_KERNEL(kernel::Reduce{}) + (blocks, numWorkers, sharedMemSize)(args...); + } + else if(threads >= 8u) + { + constexpr uint32_t numWorkers = traits::GetNumWorkers<8u>::value; + PMACC_KERNEL(kernel::Reduce{}) + (blocks, numWorkers, sharedMemSize)(args...); + } + else if(threads >= 4u) + { + constexpr uint32_t numWorkers = traits::GetNumWorkers<4u>::value; + PMACC_KERNEL(kernel::Reduce{}) + (blocks, numWorkers, sharedMemSize)(args...); + } + else if(threads >= 2u) + { + constexpr uint32_t numWorkers = traits::GetNumWorkers<2u>::value; + PMACC_KERNEL(kernel::Reduce{}) + (blocks, numWorkers, sharedMemSize)(args...); + } + else + { + constexpr uint32_t numWorkers = traits::GetNumWorkers<1u>::value; + PMACC_KERNEL(kernel::Reduce{}) + (blocks, numWorkers, sharedMemSize)(args...); + } + } - virtual ~Reduce() - { - __delete(reduceBuffer); - } - private: + /*calculate optimal number of threads per block with respect to shared memory limitations + * @param n number of elements to reduce + * @param sizePerElement size in bytes per elements + * @return optimal count of threads per block to solve the problem + */ + HINLINE uint32_t optimalThreadsPerBlock(uint32_t n, uint32_t sizePerElement) + { + uint32_t const sharedBorder = sharedMemByte / sizePerElement; + return getThreadsPerBlock(std::min(sharedBorder, n)); + } - /* calculate number of threads per block - * @param threads maximal number of threads per block - * @return number of threads per block - */ - HINLINE uint32_t getThreadsPerBlock(uint32_t threads) - { - /// \todo this list is not complete - /// extend it and maybe check for sm_version - /// and add possible threads accordingly. - /// maybe this function should be exported - /// to a more general nvidia class, too. - if (threads >= 512) return 512; - if (threads >= 256) return 256; - if (threads >= 128) return 128; - if (threads >= 64) return 64; - if (threads >= 32) return 32; - if (threads >= 16) return 16; - if (threads >= 8) return 8; - if (threads >= 4) return 4; - if (threads >= 2) return 2; - - return 1; - } - - - /* start the reduce kernel - * - * The minimal number of elements reduced within a CUDA block is chosen at - * compile time. - */ - template< typename Type, typename ... T_Args > - HINLINE void callReduceKernel( - uint32_t blocks, - uint32_t threads, - uint32_t sharedMemSize, - T_Args && ... args - ) - { - if(threads >= 512u) - { - constexpr uint32_t numWorkers = traits::GetNumWorkers< - 512u - >::value; - PMACC_KERNEL( kernel::Reduce< Type, 512u, numWorkers >{ } )( - blocks, - numWorkers, - sharedMemSize - )( - args ... - ); - } - else if(threads >= 256u) - { - constexpr uint32_t numWorkers = traits::GetNumWorkers< - 256u - >::value; - PMACC_KERNEL( kernel::Reduce< Type, 256u, numWorkers >{ } )( - blocks, - numWorkers, - sharedMemSize - )( - args ... - ); - } - else if(threads >= 128u) - { - constexpr uint32_t numWorkers = traits::GetNumWorkers< - 128u - >::value; - PMACC_KERNEL( kernel::Reduce< Type, 128u, numWorkers >{ } )( - blocks, - numWorkers, - sharedMemSize - )( - args ... - ); - } - else if(threads >= 64u) - { - constexpr uint32_t numWorkers = traits::GetNumWorkers< - 64u - >::value; - PMACC_KERNEL( kernel::Reduce< Type, 64u, numWorkers >{ } )( - blocks, - numWorkers, - sharedMemSize - )( - args ... - ); - } - else if(threads >= 32u) - { - constexpr uint32_t numWorkers = traits::GetNumWorkers< - 32u - >::value; - PMACC_KERNEL( kernel::Reduce< Type, 32u, numWorkers >{ } )( - blocks, - numWorkers, - sharedMemSize - )( - args ... - ); - } - else if(threads >= 16u) - { - constexpr uint32_t numWorkers = traits::GetNumWorkers< - 16u - >::value; - PMACC_KERNEL( kernel::Reduce< Type, 16u, numWorkers >{ } )( - blocks, - numWorkers, - sharedMemSize - )( - args ... - ); - } - else if(threads >= 8u) - { - constexpr uint32_t numWorkers = traits::GetNumWorkers< - 8u - >::value; - PMACC_KERNEL( kernel::Reduce< Type, 8u, numWorkers >{ } )( - blocks, - numWorkers, - sharedMemSize - )( - args ... - ); - } - else if(threads >= 4u) - { - constexpr uint32_t numWorkers = traits::GetNumWorkers< - 4u - >::value; - PMACC_KERNEL( kernel::Reduce< Type, 4u, numWorkers >{ } )( - blocks, - numWorkers, - sharedMemSize - )( - args ... - ); - } - else if(threads >= 2u) - { - constexpr uint32_t numWorkers = traits::GetNumWorkers< - 2u - >::value; - PMACC_KERNEL( kernel::Reduce< Type, 2u, numWorkers >{ } )( - blocks, - numWorkers, - sharedMemSize - )( - args ... - ); - } - else - { - constexpr uint32_t numWorkers = traits::GetNumWorkers< - 1u - >::value; - PMACC_KERNEL( kernel::Reduce< Type, 1u, numWorkers >{ } )( - blocks, - numWorkers, - sharedMemSize - )( - args ... - ); - } - } - - - /*calculate optimal number of threads per block with respect to shared memory limitations - * @param n number of elements to reduce - * @param sizePerElement size in bytes per elements - * @return optimal count of threads per block to solve the problem - */ - HINLINE uint32_t optimalThreadsPerBlock(uint32_t n, uint32_t sizePerElement) - { - uint32_t const sharedBorder = sharedMemByte / sizePerElement; - return getThreadsPerBlock(std::min(sharedBorder, n)); - } - - /*global gpu buffer for reduce steps*/ - GridBuffer *reduceBuffer; - /*buffer size limit in bytes on gpu*/ - uint32_t byte; - /*shared memory limit in byte for one block*/ - uint32_t sharedMemByte; - - }; -} -} -} + /*global gpu buffer for reduce steps*/ + GridBuffer* reduceBuffer; + /*buffer size limit in bytes on gpu*/ + uint32_t byte; + /*shared memory limit in byte for one block*/ + uint32_t sharedMemByte; + }; + } // namespace reduce + } // namespace nvidia +} // namespace pmacc diff --git a/include/pmacc/nvidia/rng/RNG.hpp b/include/pmacc/nvidia/rng/RNG.hpp deleted file mode 100644 index 2a47f857c9..0000000000 --- a/include/pmacc/nvidia/rng/RNG.hpp +++ /dev/null @@ -1,94 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - - -#pragma once - -#include "pmacc/types.hpp" - -namespace pmacc -{ -namespace nvidia -{ -namespace rng -{ - - /* create a random number generator on gpu - * \tparam RngMethod method to generate random number - * \tparam Distribution functor for distribution - */ - template - class RNG : public RNGMethod - { - public: - - typedef RNGMethod MethodType; - typedef Distribution DistributionType; - typedef RNG This; - - HDINLINE RNG() - { - } - - /* - * \param rngMethod instance of generator - * \param distribution instance of distribution functor - */ - DINLINE RNG(const RNGMethod& rng_method, const Distribution& rng_operation) : - RNGMethod(rng_method), op(rng_operation) - { - } - - HDINLINE RNG(const This& other) : - RNGMethod(static_cast(other)), op(other.op) - { - } - - /* default method to generate a random number - * @return random number - */ - DINLINE typename Distribution::Type operator()() - { - return this->op(this->getState()); - } - - private: - PMACC_ALIGN(op, Distribution); - }; - - /* create a random number generator on gpu - * \tparam RngMethod method to generate random number - * \tparam Distribution functor for distribution - * - * \param rngMethod instance of generator - * \param distribution instance of distribution functor - * \return class which can used to generate random numbers - */ - template - DINLINE typename pmacc::nvidia::rng::RNG create(const RngMethod & rngMethod, - const Distribution & distribution) - { - return pmacc::nvidia::rng::RNG(rngMethod, distribution); - } - -} // namespace rng -} // namespace nvidia -} // namespace pmacc diff --git a/include/pmacc/nvidia/rng/distributions/Normal_float.hpp b/include/pmacc/nvidia/rng/distributions/Normal_float.hpp deleted file mode 100644 index 2f09df70a7..0000000000 --- a/include/pmacc/nvidia/rng/distributions/Normal_float.hpp +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - - -#pragma once - -#include "pmacc/types.hpp" - -namespace pmacc -{ -namespace nvidia -{ -namespace rng -{ -namespace distributions -{ -namespace detail -{ - /*Return normally distributed floats with mean 0.0f and standard deviation 1.0f - */ - template< typename T_Acc> - class Normal_float - { - public: - typedef float Type; - private: - using Dist = - decltype( - ::alpaka::rand::distribution::createNormalReal( - alpaka::core::declval())); - PMACC_ALIGN(dist, Dist); - public: - HDINLINE Normal_float() - { - } - - HDINLINE Normal_float(const T_Acc& acc) : dist(::alpaka::rand::distribution::createNormalReal(acc)) - { - } - - template - DINLINE Type operator()(RNGState& state) - { - return dist(state); - } - - }; -} // namespace detail - - struct Normal_float - { - template< typename T_Acc> - static HDINLINE detail::Normal_float< T_Acc > - get( T_Acc const & acc) - { - return detail::Normal_float< T_Acc >( acc ); - } - }; -} // namespace distributions -} // namespace rng -} // namespace nvidia -} // namespace pmacc diff --git a/include/pmacc/nvidia/rng/distributions/Uniform_float.hpp b/include/pmacc/nvidia/rng/distributions/Uniform_float.hpp deleted file mode 100644 index b757c9f04a..0000000000 --- a/include/pmacc/nvidia/rng/distributions/Uniform_float.hpp +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - - -#pragma once - -#include "pmacc/types.hpp" - -namespace pmacc -{ -namespace nvidia -{ -namespace rng -{ -namespace distributions -{ -namespace detail -{ - /*create a random float number from [0.0,1.0) - */ - template< typename T_Acc> - class Uniform_float - { - public: - typedef float Type; - private: - using Dist = - decltype( - ::alpaka::rand::distribution::createUniformReal( - alpaka::core::declval())); - PMACC_ALIGN(dist, Dist); - public: - - HDINLINE Uniform_float() - { - } - - HDINLINE Uniform_float(const T_Acc& acc) : dist(::alpaka::rand::distribution::createUniformReal(acc)) - { - } - - template - DINLINE Type operator()(RNGState& state) - { - // (0.f, 1.0f] - const Type raw = dist(state); - - /// \warn hack, are is that really ok? I say, yes, since - /// it shifts just exactly one number. Axel - /// - /// Note: (1.0f - raw) does not work, since - /// nvidia seems to return denormalized - /// floats around 0.f (thats not as they - /// state it out in their documentation) - // [0.f, 1.0f) - const Type r = raw * static_cast( raw != Type(1.0) ); - return r; - } - - }; -} // namespace detail - - struct Uniform_float - { - template< typename T_Acc> - static HDINLINE detail::Uniform_float< T_Acc > - get( T_Acc const & acc) - { - return detail::Uniform_float< T_Acc >( acc ); - } - }; -} // namespace distributions -} // namespace rng -} // namespace nvidia -} // namespace pmacc diff --git a/include/pmacc/nvidia/rng/distributions/Uniform_int32.hpp b/include/pmacc/nvidia/rng/distributions/Uniform_int32.hpp deleted file mode 100644 index e0569ecb26..0000000000 --- a/include/pmacc/nvidia/rng/distributions/Uniform_int32.hpp +++ /dev/null @@ -1,83 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - - -#pragma once - -#include "pmacc/types.hpp" - -namespace pmacc -{ -namespace nvidia -{ -namespace rng -{ -namespace distributions -{ -namespace detail -{ - /*create a 32Bit random int number - * Range: [INT_MIN,INT_MAX] - */ - template< typename T_Acc> - class Uniform_int32 - { - public: - typedef int32_t Type; - - private: - typedef uint32_t RngType; - using Dist = - decltype( - ::alpaka::rand::distribution::createUniformUint( - alpaka::core::declval())); - PMACC_ALIGN(dist, Dist); - public: - HDINLINE Uniform_int() - { - } - - HDINLINE Uniform_int(const T_Acc& acc) : dist(::alpaka::rand::distribution::createUniformUint(acc)) - { - } - - template - DINLINE Type operator()(RNGState& state) - { - /*curand create a random 32Bit int value*/ - return static_cast(dist(state)); - } - }; -} // namespace detail - - struct Normal_float - { - template< typename T_Acc> - static HDINLINE detail::Uniform_int32< T_Acc > - get( T_Acc const & acc) - { - return detail::Uniform_int32< T_Acc >( acc ); - } - }; -} // namespace distributions -} // namespace rng -} // namespace nvidia -} // namespace pmacc diff --git a/include/pmacc/nvidia/rng/methods/Xor.hpp b/include/pmacc/nvidia/rng/methods/Xor.hpp deleted file mode 100644 index f48e6c3714..0000000000 --- a/include/pmacc/nvidia/rng/methods/Xor.hpp +++ /dev/null @@ -1,75 +0,0 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - - -#pragma once - -#include "pmacc/types.hpp" - -namespace pmacc -{ -namespace nvidia -{ -namespace rng -{ -namespace methods -{ - - template< typename T_Acc > - class Xor - { - private: - using Gen = - decltype( - ::alpaka::rand::generator::createDefault( - alpaka::core::declval(), - alpaka::core::declval(), - alpaka::core::declval())); - PMACC_ALIGN(gen, Gen); - public: - typedef Gen StateType; - typedef T_Acc Acc; - - HDINLINE Xor() : gen (0) - { - } - - DINLINE Xor(const T_Acc& acc, uint32_t seed, uint32_t subsequence = 0) - { - gen = ::alpaka::rand::generator::createDefault(acc, seed, subsequence); - } - - HDINLINE Xor(const Xor& other): gen(other.gen) - { - - } - - protected: - - DINLINE StateType& getState() - { - return gen; - } - }; -} // namespace methods -} // namespace rng -} // namespace nvidia -} // namespace pmacc diff --git a/include/pmacc/nvidia/warp.hpp b/include/pmacc/nvidia/warp.hpp index 48c58dbad6..d15bd93f2f 100644 --- a/include/pmacc/nvidia/warp.hpp +++ b/include/pmacc/nvidia/warp.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Alexander Grund +/* Copyright 2015-2021 Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -21,96 +21,99 @@ #pragma once +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) -#include "pmacc/types.hpp" +# include "pmacc/types.hpp" namespace pmacc { -namespace nvidia -{ - + namespace nvidia + { /** get lane id of a thread within a warp * * id is in range [0,WAPRSIZE-1] * required PTX ISA >=1.3 */ -#if (__CUDA_ARCH__ >= 130) -DINLINE uint32_t getLaneId() -{ - uint32_t id; - asm("mov.u32 %0, %%laneid;" : "=r" (id)); - return id; -} -#endif +# if(__CUDA_ARCH__ >= 130) + DINLINE uint32_t getLaneId() + { + uint32_t id; + asm("mov.u32 %0, %%laneid;" : "=r"(id)); + return id; + } +# elif BOOST_COMP_HIP + DINLINE uint32_t getLaneId() + { + return __lane_id(); + } +# endif -#if (__CUDA_ARCH__ >= 300) -/** broadcast data within a warp - * - * required PTX ISA >=3.0 - * - * @param data value to broadcast - * @param srcLaneId lane id of the source thread - * @return value send by the source thread - * - * \{ - */ -//! broadcast a 32bit integer -DINLINE int32_t warpBroadcast(const int32_t data, const int32_t srcLaneId) -{ -#if(__CUDACC_VER_MAJOR__ >= 9) - return __shfl_sync(__activemask(), data, srcLaneId); -#else - return __shfl(data, srcLaneId); -#endif -} +# if(__CUDA_ARCH__ >= 300 || BOOST_COMP_HIP) + /** broadcast data within a warp + * + * required PTX ISA >=3.0 + * + * @param data value to broadcast + * @param srcLaneId lane id of the source thread + * @return value send by the source thread + * + * \{ + */ + //! broadcast a 32bit integer + DINLINE int32_t warpBroadcast(const int32_t data, const int32_t srcLaneId) + { +# if(__CUDACC_VER_MAJOR__ >= 9) + return __shfl_sync(__activemask(), data, srcLaneId); +# else + return __shfl(data, srcLaneId); +# endif + } -//! Broadcast a 64bit integer by using 2 32bit broadcasts -DINLINE int64_cu warpBroadcast(int64_cu data, const int32_t srcLaneId) -{ - int32_t* const pData = reinterpret_cast(&data); - pData[0] = warpBroadcast(pData[0], srcLaneId); - pData[1] = warpBroadcast(pData[1], srcLaneId); - return data; -} + //! Broadcast a 64bit integer by using 2 32bit broadcasts + DINLINE int64_cu warpBroadcast(int64_cu data, const int32_t srcLaneId) + { + int32_t* const pData = reinterpret_cast(&data); + pData[0] = warpBroadcast(pData[0], srcLaneId); + pData[1] = warpBroadcast(pData[1], srcLaneId); + return data; + } -//! Broadcast a 32bit unsigned int -DINLINE uint32_t warpBroadcast(const uint32_t data, const int32_t srcLaneId) -{ - return static_cast( - warpBroadcast(static_cast(data), srcLaneId) - ); -} + //! Broadcast a 32bit unsigned int + DINLINE uint32_t warpBroadcast(const uint32_t data, const int32_t srcLaneId) + { + return static_cast(warpBroadcast(static_cast(data), srcLaneId)); + } -//!Broadcast a 64bit unsigned int -DINLINE uint64_cu warpBroadcast(const uint64_cu data, const int32_t srcLaneId) -{ - return static_cast( - warpBroadcast(static_cast(data), srcLaneId) - ); -} + //! Broadcast a 64bit unsigned int + DINLINE uint64_cu warpBroadcast(const uint64_cu data, const int32_t srcLaneId) + { + return static_cast(warpBroadcast(static_cast(data), srcLaneId)); + } -//! Broadcast a 32bit float -DINLINE float warpBroadcast(const float data, const int32_t srcLaneId) -{ -#if(__CUDACC_VER_MAJOR__ >= 9) - return __shfl_sync(__activemask(), data, srcLaneId); -#else - return __shfl(data, srcLaneId); -#endif -} + //! Broadcast a 32bit float + DINLINE float warpBroadcast(const float data, const int32_t srcLaneId) + { +# if(__CUDACC_VER_MAJOR__ >= 9) + return __shfl_sync(__activemask(), data, srcLaneId); +# else + return __shfl(data, srcLaneId); +# endif + } -//! Broadcast a 64bit float by using 2 32bit broadcasts -DINLINE double warpBroadcast(double data, const int32_t srcLaneId) -{ - float* const pData = reinterpret_cast(&data); - pData[0] = warpBroadcast(pData[0], srcLaneId); - pData[1] = warpBroadcast(pData[1], srcLaneId); - return data; -} + //! Broadcast a 64bit float by using 2 32bit broadcasts + DINLINE double warpBroadcast(double data, const int32_t srcLaneId) + { + float* const pData = reinterpret_cast(&data); + pData[0] = warpBroadcast(pData[0], srcLaneId); + pData[1] = warpBroadcast(pData[1], srcLaneId); + return data; + } //! @} -#endif +# endif -} //namespace nvidia -} //namespace pmacc + } // namespace nvidia +} // namespace pmacc + +#endif diff --git a/include/pmacc/particles/AsyncCommunicationImpl.hpp b/include/pmacc/particles/AsyncCommunicationImpl.hpp index 8edbc0ed81..eaa2b3d3bd 100644 --- a/include/pmacc/particles/AsyncCommunicationImpl.hpp +++ b/include/pmacc/particles/AsyncCommunicationImpl.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Alexander Grund +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -27,25 +27,27 @@ #include "pmacc/particles/ParticlesBase.hpp" #include -namespace pmacc{ - +namespace pmacc +{ /** * Trait that should return true if T is a particle species */ template struct IsParticleSpecies { - enum{ value = boost::is_same::value }; + enum + { + value = boost::is_same::value + }; }; - namespace communication { - + namespace communication + { template - struct AsyncCommunicationImpl::value> > + struct AsyncCommunicationImpl::value>> { template - EventTask - operator()(T_Particles& par, EventTask event) const + EventTask operator()(T_Particles& par, EventTask event) const { EventTask ret; __startTransaction(event); @@ -59,5 +61,5 @@ namespace pmacc{ } }; - } // namespace communication -} // namespace pmacc + } // namespace communication +} // namespace pmacc diff --git a/include/pmacc/particles/IdProvider.def b/include/pmacc/particles/IdProvider.def index 25392d8a88..1e25e08559 100644 --- a/include/pmacc/particles/IdProvider.def +++ b/include/pmacc/particles/IdProvider.def @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Grund +/* Copyright 2016-2021 Alexander Grund * * This file is part of PMacc. * @@ -26,7 +26,6 @@ namespace pmacc { - /** * Provider for globally unique ids (even across ranks) * Implemented for use in static contexts which allows e.g. calling from CUDA kernels @@ -35,7 +34,8 @@ namespace pmacc class IdProvider { public: - struct State{ + struct State + { /** Next id to be returned */ uint64_t nextId; /** First id used */ diff --git a/include/pmacc/particles/IdProvider.hpp b/include/pmacc/particles/IdProvider.hpp index 07bd569f1b..5f17c70631 100644 --- a/include/pmacc/particles/IdProvider.hpp +++ b/include/pmacc/particles/IdProvider.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Grund +/* Copyright 2016-2021 Alexander Grund * * This file is part of PMacc. * @@ -31,10 +31,15 @@ namespace pmacc { - - namespace idDetail { - + namespace idDetail + { DEVICEONLY uint64_cu nextId; +#ifdef ALPAKA_ACC_ANY_BT_OACC_ENABLED +# pragma acc declare device_resident(::pmacc::idDetail::nextId) +#endif +#ifdef ALPAKA_ACC_ANY_BT_OMP5_ENABLED +# pragma omp declare target(::pmacc::idDetail::nextId) +#endif struct KernelSetNextId { @@ -63,7 +68,7 @@ namespace pmacc } }; - } // namespace idDetail + } // namespace idDetail template uint64_t IdProvider::m_maxNumProc; @@ -94,9 +99,8 @@ namespace pmacc m_startId = state.startId; if(m_maxNumProc < state.maxNumProc) m_maxNumProc = state.maxNumProc; - log("(Re-)Initialized IdProvider with id=%1%/%2% and maxNumProc=%3%/%4%") - % state.nextId % state.startId - % state.maxNumProc % m_maxNumProc; + log("(Re-)Initialized IdProvider with id=%1%/%2% and maxNumProc=%3%/%4%") % state.nextId + % state.startId % state.maxNumProc % m_maxNumProc; } template @@ -127,7 +131,8 @@ namespace pmacc * when counting the bits from 1 = right most bit * So first we calculate n, then remove the lowest bits of the next id so we have only the n upper bits * If any of them is non-zero, it is an overflow and we can have duplicate ids. - * If not, then all ids are probably unique (still a chance, the id is overflown so much, that detection is impossible) + * If not, then all ids are probably unique (still a chance, the id is overflown so much, that detection is + * impossible) */ uint64_t tmp = curState.maxNumProc - 1; int32_t bitsToCheck = 0; @@ -182,4 +187,4 @@ namespace pmacc return static_cast(newIdBuf.getHostBuffer().getDataBox()(0)); } -} // namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/Identifier.hpp b/include/pmacc/particles/Identifier.hpp index 5de6032581..3f6ccb0e75 100644 --- a/include/pmacc/particles/Identifier.hpp +++ b/include/pmacc/particles/Identifier.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Alexander Grund, Axel Huebl +/* Copyright 2013-2021 Rene Widera, Alexander Grund, Axel Huebl * * This file is part of PMacc. * diff --git a/include/pmacc/particles/ParticleDescription.hpp b/include/pmacc/particles/ParticleDescription.hpp index ae7aefc421..801088db09 100644 --- a/include/pmacc/particles/ParticleDescription.hpp +++ b/include/pmacc/particles/ParticleDescription.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -29,99 +29,97 @@ namespace pmacc { - -/** ParticleDescription defines attributes, methods and flags of a particle - * - * This class holds no runtime data. - * The class holds information about the name, attributes, flags and methods of a - * particle. - * - * @tparam T_Name name of described particle (e.g. electron, ion) - * type must be a boost::mpl::string - * @tparam T_SuperCellSize compile time size of a super cell - * @tparam T_ValueTypeSeq sequence or single type with value_identifier - * @tparam T_Flags sequence or single type with identifier to add flags on a frame - * @tparam T_MethodsList sequence or single class with particle methods - * (e.g. calculate mass, gamma, ...) - * (e.g. useSolverXY, calcRadiation, ...) - * @tparam T_FrameExtensionList sequence or single class with frame extensions - * - extension must be an unary template class that supports bmpl::apply1<> - * - type of the final frame is applied to each extension class - * (this allows pointers and references to a frame itself) - * - the final frame that uses ParticleDescription inherits from all - * extension classes - */ -template< -typename T_Name, -typename T_SuperCellSize, -typename T_ValueTypeSeq, -typename T_Flags = bmpl::vector0<>, -typename T_HandleGuardRegion = HandleGuardRegion, -typename T_MethodsList = bmpl::vector0<>, -typename T_FrameExtensionList = bmpl::vector0<> -> -struct ParticleDescription -{ - typedef T_Name Name; - typedef T_SuperCellSize SuperCellSize; - typedef typename ToSeq::type ValueTypeSeq; - typedef typename ToSeq::type FlagsList; - typedef T_HandleGuardRegion HandleGuardRegion; - typedef typename ToSeq::type MethodsList; - typedef typename ToSeq::type FrameExtensionList; - typedef ParticleDescription< - Name, - SuperCellSize, - ValueTypeSeq, - FlagsList, - HandleGuardRegion, - MethodsList, - FrameExtensionList - > ThisType; - -}; + /** ParticleDescription defines attributes, methods and flags of a particle + * + * This class holds no runtime data. + * The class holds information about the name, attributes, flags and methods of a + * particle. + * + * @tparam T_Name name of described particle (e.g. electron, ion) + * type must be a boost::mpl::string + * @tparam T_SuperCellSize compile time size of a super cell + * @tparam T_ValueTypeSeq sequence or single type with value_identifier + * @tparam T_Flags sequence or single type with identifier to add flags on a frame + * @tparam T_MethodsList sequence or single class with particle methods + * (e.g. calculate mass, gamma, ...) + * (e.g. useSolverXY, calcRadiation, ...) + * @tparam T_FrameExtensionList sequence or single class with frame extensions + * - extension must be an unary template class that supports bmpl::apply1<> + * - type of the final frame is applied to each extension class + * (this allows pointers and references to a frame itself) + * - the final frame that uses ParticleDescription inherits from all + * extension classes + */ + template< + typename T_Name, + typename T_SuperCellSize, + typename T_ValueTypeSeq, + typename T_Flags = bmpl::vector0<>, + typename T_HandleGuardRegion + = HandleGuardRegion, + typename T_MethodsList = bmpl::vector0<>, + typename T_FrameExtensionList = bmpl::vector0<>> + struct ParticleDescription + { + typedef T_Name Name; + typedef T_SuperCellSize SuperCellSize; + typedef typename ToSeq::type ValueTypeSeq; + typedef typename ToSeq::type FlagsList; + typedef T_HandleGuardRegion HandleGuardRegion; + typedef typename ToSeq::type MethodsList; + typedef typename ToSeq::type FrameExtensionList; + typedef ParticleDescription< + Name, + SuperCellSize, + ValueTypeSeq, + FlagsList, + HandleGuardRegion, + MethodsList, + FrameExtensionList> + ThisType; + }; -/** Get ParticleDescription with a new ValueTypeSeq - * - * @tparam T_OldParticleDescription base description - * @tparam T_NewValueTypeSeq new boost mpl sequence with value types - * @treturn ::type new ParticleDescription - */ -template -struct ReplaceValueTypeSeq -{ - typedef T_OldParticleDescription OldParticleDescription; - typedef ParticleDescription< - typename OldParticleDescription::Name, - typename OldParticleDescription::SuperCellSize, - typename ToSeq::type, - typename OldParticleDescription::FlagsList, - typename OldParticleDescription::HandleGuardRegion, - typename OldParticleDescription::MethodsList, - typename OldParticleDescription::FrameExtensionList - > type; -}; + /** Get ParticleDescription with a new ValueTypeSeq + * + * @tparam T_OldParticleDescription base description + * @tparam T_NewValueTypeSeq new boost mpl sequence with value types + * @treturn ::type new ParticleDescription + */ + template + struct ReplaceValueTypeSeq + { + typedef T_OldParticleDescription OldParticleDescription; + typedef ParticleDescription< + typename OldParticleDescription::Name, + typename OldParticleDescription::SuperCellSize, + typename ToSeq::type, + typename OldParticleDescription::FlagsList, + typename OldParticleDescription::HandleGuardRegion, + typename OldParticleDescription::MethodsList, + typename OldParticleDescription::FrameExtensionList> + type; + }; -/** Get ParticleDescription with a new FrameExtensionSeq - * - * @tparam T_OldParticleDescription base description - * @tparam T_FrameExtensionSeq new boost mpl sequence with value types - * @treturn ::type new ParticleDescription - */ -template -struct ReplaceFrameExtensionSeq -{ - typedef T_OldParticleDescription OldParticleDescription; - typedef ParticleDescription< - typename OldParticleDescription::Name, - typename OldParticleDescription::SuperCellSize, - typename OldParticleDescription::ValueTypeSeq, - typename OldParticleDescription::FlagsList, - typename OldParticleDescription::HandleGuardRegion, - typename OldParticleDescription::MethodsList, - typename ToSeq::type - > type; -}; + /** Get ParticleDescription with a new FrameExtensionSeq + * + * @tparam T_OldParticleDescription base description + * @tparam T_FrameExtensionSeq new boost mpl sequence with value types + * @treturn ::type new ParticleDescription + */ + template + struct ReplaceFrameExtensionSeq + { + typedef T_OldParticleDescription OldParticleDescription; + typedef ParticleDescription< + typename OldParticleDescription::Name, + typename OldParticleDescription::SuperCellSize, + typename OldParticleDescription::ValueTypeSeq, + typename OldParticleDescription::FlagsList, + typename OldParticleDescription::HandleGuardRegion, + typename OldParticleDescription::MethodsList, + typename ToSeq::type> + type; + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/ParticlesBase.hpp b/include/pmacc/particles/ParticlesBase.hpp index 3275a88db8..45ab0cd59e 100644 --- a/include/pmacc/particles/ParticlesBase.hpp +++ b/include/pmacc/particles/ParticlesBase.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz, * Alexander Grund * * This file is part of PMacc. @@ -39,174 +39,164 @@ namespace pmacc { + /* Tag used for marking particle types */ + struct ParticlesTag; -/* Tag used for marking particle types */ -struct ParticlesTag; - -template -class ParticlesBase : public SimulationFieldHelper -{ - typedef T_ParticleDescription ParticleDescription; - typedef T_MappingDesc MappingDesc; - -public: - - /* Type of used particles buffer - */ - typedef ParticlesBuffer BufferType; - - /* Type of frame in particles buffer - */ - typedef typename BufferType::FrameType FrameType; - /* Type of border frame in a particle buffer - */ - typedef typename BufferType::FrameTypeBorder FrameTypeBorder; - - /* Type of the particle box which particle buffer create - */ - typedef typename BufferType::ParticlesBoxType ParticlesBoxType; - - /* Policies for handling particles in guard cells */ - typedef typename ParticleDescription::HandleGuardRegion HandleGuardRegion; - - enum + template + class ParticlesBase : public SimulationFieldHelper { - Dim = MappingDesc::Dim, - Exchanges = traits::NumberOfExchanges::value, - TileSize = math::CT::volume::type::value - }; - - /* Mark this simulation data as a particle type */ - typedef ParticlesTag SimulationDataTag; + typedef T_ParticleDescription ParticleDescription; + typedef T_MappingDesc MappingDesc; + + public: + /* Type of used particles buffer + */ + typedef ParticlesBuffer< + ParticleDescription, + typename MappingDesc::SuperCellSize, + T_DeviceHeap, + MappingDesc::Dim> + BufferType; + + /* Type of frame in particles buffer + */ + typedef typename BufferType::FrameType FrameType; + /* Type of border frame in a particle buffer + */ + typedef typename BufferType::FrameTypeBorder FrameTypeBorder; + + /* Type of the particle box which particle buffer create + */ + typedef typename BufferType::ParticlesBoxType ParticlesBoxType; + + /* Policies for handling particles in guard cells */ + typedef typename ParticleDescription::HandleGuardRegion HandleGuardRegion; + + enum + { + Dim = MappingDesc::Dim, + Exchanges = traits::NumberOfExchanges::value, + TileSize = math::CT::volume::type::value + }; -protected: + /* Mark this simulation data as a particle type */ + typedef ParticlesTag SimulationDataTag; - BufferType *particlesBuffer; + protected: + BufferType* particlesBuffer; - ParticlesBase( - const std::shared_ptr& deviceHeap, - MappingDesc description - ) : - SimulationFieldHelper(description), - particlesBuffer(NULL) - { - particlesBuffer = new BufferType( - deviceHeap, - description.getGridLayout().getDataSpace(), - MappingDesc::SuperCellSize::toRT() - ); - } - - virtual ~ParticlesBase() - { - delete this->particlesBuffer; - } - - /* Shift all particle in a AREA - * @tparam AREA area which is used (CORE,BORDER,GUARD or a combination) - */ - template - void shiftParticles() - { - StrideMapping mapper(this->cellDescription); - ParticlesBoxType pBox = particlesBuffer->getDeviceParticleBox(); - - constexpr uint32_t numWorkers = traits::GetNumWorkers< - math::CT::volume::type::value - >::value; - __startTransaction(__getTransactionEvent()); - do + ParticlesBase(const std::shared_ptr& deviceHeap, MappingDesc description) + : SimulationFieldHelper(description) + , particlesBuffer(NULL) { - PMACC_KERNEL(KernelShiftParticles< numWorkers >{}) - (mapper.getGridDim(), numWorkers) - (pBox, mapper); + particlesBuffer = new BufferType( + deviceHeap, + description.getGridLayout().getDataSpace(), + MappingDesc::SuperCellSize::toRT()); } - while (mapper.next()); - - __setTransactionEvent(__endTransaction()); - } + virtual ~ParticlesBase() + { + delete this->particlesBuffer; + } - /* fill gaps in a AREA - * @tparam AREA area which is used (CORE,BORDER,GUARD or a combination) - */ - template - void fillGaps() - { - AreaMapping mapper(this->cellDescription); + /* Shift all particle in a AREA + * @tparam AREA area which is used (CORE,BORDER,GUARD or a combination) + */ + template + void shiftParticles() + { + StrideMapping mapper(this->cellDescription); + ParticlesBoxType pBox = particlesBuffer->getDeviceParticleBox(); + + constexpr uint32_t numWorkers + = traits::GetNumWorkers::type::value>::value; + __startTransaction(__getTransactionEvent()); + do + { + PMACC_KERNEL(KernelShiftParticles{}) + (mapper.getGridDim(), numWorkers)(pBox, mapper); + } while(mapper.next()); + + __setTransactionEvent(__endTransaction()); + } - constexpr uint32_t numWorkers = traits::GetNumWorkers< - math::CT::volume::type::value - >::value; + /* fill gaps in a AREA + * @tparam AREA area which is used (CORE,BORDER,GUARD or a combination) + */ + template + void fillGaps() + { + AreaMapping mapper(this->cellDescription); - PMACC_KERNEL(KernelFillGaps< numWorkers >{}) - (mapper.getGridDim(), numWorkers) - (particlesBuffer->getDeviceParticleBox(), mapper); - } + constexpr uint32_t numWorkers + = traits::GetNumWorkers::type::value>::value; + PMACC_KERNEL(KernelFillGaps{}) + (mapper.getGridDim(), numWorkers)(particlesBuffer->getDeviceParticleBox(), mapper); + } -public: - /* fill gaps in a the complete simulation area (include GUARD) - */ - void fillAllGaps() - { - this->fillGaps < CORE + BORDER + GUARD > (); - } + public: + /* fill gaps in a the complete simulation area (include GUARD) + */ + void fillAllGaps() + { + this->fillGaps(); + } - /* fill all gaps in the border of the simulation - */ - void fillBorderGaps() - { - this->fillGaps < BORDER > (); - } - - /* Delete all particles in GUARD for one direction. - */ - void deleteGuardParticles(uint32_t exchangeType); - - /* Delete all particle in an area*/ - template - void deleteParticlesInArea(); - - /** copy guard particles to intermediate exchange buffer - * - * Copy all particles from the guard of a direction to the device exchange buffer. - * @warning This method resets the number of particles in the processed supercells even - * if there are particles left in the supercell and does not guarantee that the last frame is - * contiguous filled. - * Call fillAllGaps afterwards if you need a valid number of particles - * and a contiguously filled last frame. - */ - void copyGuardToExchange(uint32_t exchangeType); - - /* Insert all particles which are in device exchange buffer - */ - void insertParticles(uint32_t exchangeType); - - ParticlesBoxType getDeviceParticlesBox() - { - return particlesBuffer->getDeviceParticleBox(); - } + /* fill all gaps in the border of the simulation + */ + void fillBorderGaps() + { + this->fillGaps(); + } - ParticlesBoxType getHostParticlesBox(const int64_t memoryOffset) - { - return particlesBuffer->getHostParticleBox(memoryOffset); - } + /* Delete all particles in GUARD for one direction. + */ + void deleteGuardParticles(uint32_t exchangeType); + + /* Delete all particle in an area*/ + template + void deleteParticlesInArea(); + + /** copy guard particles to intermediate exchange buffer + * + * Copy all particles from the guard of a direction to the device exchange buffer. + * @warning This method resets the number of particles in the processed supercells even + * if there are particles left in the supercell and does not guarantee that the last frame is + * contiguous filled. + * Call fillAllGaps afterwards if you need a valid number of particles + * and a contiguously filled last frame. + */ + void copyGuardToExchange(uint32_t exchangeType); + + /* Insert all particles which are in device exchange buffer + */ + void insertParticles(uint32_t exchangeType); + + ParticlesBoxType getDeviceParticlesBox() + { + return particlesBuffer->getDeviceParticleBox(); + } - /* Get the particles buffer which is used for the particles. - */ - BufferType& getParticlesBuffer() - { - PMACC_ASSERT(particlesBuffer != nullptr); - return *particlesBuffer; - } + ParticlesBoxType getHostParticlesBox(const int64_t memoryOffset) + { + return particlesBuffer->getHostParticleBox(memoryOffset); + } - /* set all internal objects to initial state*/ - virtual void reset(uint32_t currentStep); + /* Get the particles buffer which is used for the particles. + */ + BufferType& getParticlesBuffer() + { + PMACC_ASSERT(particlesBuffer != nullptr); + return *particlesBuffer; + } -}; + /* set all internal objects to initial state*/ + virtual void reset(uint32_t currentStep); + }; -} //namespace pmacc +} // namespace pmacc #include "pmacc/particles/ParticlesBase.tpp" diff --git a/include/pmacc/particles/ParticlesBase.kernel b/include/pmacc/particles/ParticlesBase.kernel index a493e7926a..5304e61804 100644 --- a/include/pmacc/particles/ParticlesBase.kernel +++ b/include/pmacc/particles/ParticlesBase.kernel @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -40,682 +40,423 @@ namespace pmacc { + template + DINLINE typename T_ParticleBox::FramePtr getPreviousFrameAndRemoveLastFrame( + const T_Acc& acc, + const typename T_ParticleBox::FramePtr& frame, + T_ParticleBox& pb, + const T_SuperCellIdxType& superCellIdx) + { + typename T_ParticleBox::FramePtr result = pb.getPreviousFrame(frame); + pb.removeLastFrame(acc, superCellIdx); + return result; + } -template -DINLINE typename T_ParticleBox::FramePtr -getPreviousFrameAndRemoveLastFrame( const typename T_ParticleBox::FramePtr& frame, - T_ParticleBox& pb, - const T_SuperCellIdxType& superCellIdx ) -{ - typename T_ParticleBox::FramePtr result = pb.getPreviousFrame( frame ); - pb.removeLastFrame( superCellIdx ); - return result; -} - -/** fill particle gaps in the last frame - * - * Copy all particles in a frame to the storage places at the frame's beginning. - * This leaves the frame with a contiguous number of valid particles at - * the beginning and a subsequent, contiguous gap at the end. - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelFillGapsLastFrame -{ - /** fill particle gaps + /** fill particle gaps in the last frame * - * @tparam T_ParBox pmacc::ParticlesBox, particle box type - * @tparam T_Mapping mapper functor type + * Copy all particles in a frame to the storage places at the frame's beginning. + * This leaves the frame with a contiguous number of valid particles at + * the beginning and a subsequent, contiguous gap at the end. * - * @param boxPar particle memory - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_ParBox, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_ParBox pb, - T_Mapping mapper - ) const + template + struct KernelFillGapsLastFrame { - using namespace particles::operations; - using namespace mappings::threads; - - constexpr uint32_t frameSize = math::CT::volume::type::value; - constexpr uint32_t dim = T_Mapping::Dim; - constexpr uint32_t numWorkers = T_numWorkers; - - using FramePtr = typename T_ParBox::FramePtr; - - DataSpace< dim > const superCellIdx = mapper.getSuperCellIndex( DataSpace< dim > ( blockIdx ) ); - - PMACC_SMEM( - acc, - lastFrame, - FramePtr - ); - PMACC_SMEM( - acc, - gapIndices_sh, - memory::Array< - int, - frameSize - > - ); - PMACC_SMEM( - acc, - numGaps, - int - ); - PMACC_SMEM( - acc, - numParticles, - int - ); - PMACC_SMEM( - acc, - srcGap, - int - ); - - uint32_t const workerIdx = threadIdx.x; - - using MasterOnly = IdxConfig< - 1, - numWorkers - >; - - ForEachIdx< MasterOnly >{ workerIdx }( - [&]( - uint32_t const, - uint32_t const - ) - { - lastFrame = pb.getLastFrame( superCellIdx ); + /** fill particle gaps + * + * @tparam T_ParBox pmacc::ParticlesBox, particle box type + * @tparam T_Mapping mapper functor type + * + * @param boxPar particle memory + * @param mapper functor to map a block to a supercell + */ + template + DINLINE void operator()(T_Acc const& acc, T_ParBox pb, T_Mapping mapper) const + { + using namespace particles::operations; + using namespace mappings::threads; + + constexpr uint32_t frameSize = math::CT::volume::type::value; + constexpr uint32_t dim = T_Mapping::Dim; + constexpr uint32_t numWorkers = T_numWorkers; + + using FramePtr = typename T_ParBox::FramePtr; + + DataSpace const superCellIdx = mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))); + + PMACC_SMEM(acc, lastFrame, FramePtr); + PMACC_SMEM(acc, gapIndices_sh, memory::Array); + PMACC_SMEM(acc, numGaps, int); + PMACC_SMEM(acc, numParticles, int); + PMACC_SMEM(acc, srcGap, int); + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + using MasterOnly = IdxConfig<1, numWorkers>; + + ForEachIdx{workerIdx}([&](uint32_t const, uint32_t const) { + lastFrame = pb.getLastFrame(superCellIdx); numGaps = 0; numParticles = 0; srcGap = 0; - } - ); + }); - __syncthreads( ); + cupla::__syncthreads(acc); - if ( lastFrame.isValid( ) ) - { - using ParticleDomCfg = IdxConfig< - frameSize, - numWorkers - >; - - /* context if an element within the frame is a particle */ - memory::CtxArray< - bool, - ParticleDomCfg - > - isParticleCtx( - workerIdx, - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - return lastFrame[ linearIdx ][ multiMask_ ]; - } - ); + if(lastFrame.isValid()) + { + using ParticleDomCfg = IdxConfig; - /* loop over all particles in the frame */ - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); + /* context if an element within the frame is a particle */ + memory::CtxArray isParticleCtx( + workerIdx, + [&](uint32_t const linearIdx, uint32_t const) { return lastFrame[linearIdx][multiMask_]; }); - // count particles in last frame - forEachParticle( - [&]( - uint32_t const, - uint32_t const idx - ) - { - if( isParticleCtx[ idx ] ) - nvidia::atomicAllInc( acc, &numParticles, ::alpaka::hierarchy::Threads{} ); - } - ); + /* loop over all particles in the frame */ + ForEachIdx forEachParticle(workerIdx); - __syncthreads( ); + // count particles in last frame + forEachParticle([&](uint32_t const, uint32_t const idx) { + if(isParticleCtx[idx]) + nvidia::atomicAllInc(acc, &numParticles, ::alpaka::hierarchy::Threads{}); + }); - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if ( linearIdx < numParticles && isParticleCtx[ idx ] == false ) + cupla::__syncthreads(acc); + + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(linearIdx < numParticles && isParticleCtx[idx] == false) { - int const localGapIdx = nvidia::atomicAllInc( acc, &numGaps, ::alpaka::hierarchy::Threads{} ); - gapIndices_sh[ localGapIdx ] = linearIdx; + int const localGapIdx = nvidia::atomicAllInc(acc, &numGaps, ::alpaka::hierarchy::Threads{}); + gapIndices_sh[localGapIdx] = linearIdx; } - } - ); - __syncthreads( ); - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if ( linearIdx >= numParticles && isParticleCtx[ idx ] ) + }); + cupla::__syncthreads(acc); + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(linearIdx >= numParticles && isParticleCtx[idx]) { // any particle search a gap - int const srcGapIdx = nvidia::atomicAllInc( acc, &srcGap, ::alpaka::hierarchy::Threads{} ); - int const gapIdx = gapIndices_sh[ srcGapIdx ]; - auto parDestFull = lastFrame[ gapIdx ]; + int const srcGapIdx = nvidia::atomicAllInc(acc, &srcGap, ::alpaka::hierarchy::Threads{}); + int const gapIdx = gapIndices_sh[srcGapIdx]; + auto parDestFull = lastFrame[gapIdx]; /* enable particle */ - parDestFull[ multiMask_ ] = 1; + parDestFull[multiMask_] = 1; /* we do not update the multiMask because copying from mem to mem is too slow * we have to enabled particles explicitly */ - auto parDest = deselect< multiMask >( parDestFull ); - auto parSrc = ( lastFrame[ linearIdx ] ); - assign( parDest, parSrc ); + auto parDest = deselect(parDestFull); + auto parSrc = (lastFrame[linearIdx]); + assign(parDest, parSrc); parSrc[multiMask_] = 0; // delete old particle } - } - ); - } - ForEachIdx< MasterOnly >{ workerIdx }( - [&]( - uint32_t const, - uint32_t const - ) - { + }); + } + ForEachIdx{workerIdx}([&](uint32_t const, uint32_t const) { // there is no need to add a zero to the global memory - if( numParticles != 0 ) + if(numParticles != 0) { - auto & superCell = pb.getSuperCell( superCellIdx ); - superCell.setNumParticles( - superCell.getNumParticles() + numParticles - ); + auto& superCell = pb.getSuperCell(superCellIdx); + superCell.setNumParticles(superCell.getNumParticles() + numParticles); } else { /* The last frame is empty therefore it must be removed. * It is save to call this method even if there is no last frame. */ - pb.removeLastFrame( superCellIdx ); + pb.removeLastFrame(acc, superCellIdx); } - } - ); - } -}; + }); + } + }; -/** fill particle gaps in all frames - * - * Copy all particles from the end to the gaps at the beginning of the frame list. - * The functor fulfills the restriction that the last frame must be hold a contiguous - * number of valid particles at the beginning and a subsequent, contiguous gap at the end. - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelFillGaps -{ - /** fill particle gaps + /** fill particle gaps in all frames * - * @tparam T_ParBox pmacc::ParticlesBox, particle box type - * @tparam T_Mapping mapper functor type + * Copy all particles from the end to the gaps at the beginning of the frame list. + * The functor fulfills the restriction that the last frame must be hold a contiguous + * number of valid particles at the beginning and a subsequent, contiguous gap at the end. * - * @param pb particle memory - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_ParBox, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_ParBox pb, - T_Mapping const mapper - ) const + template + struct KernelFillGaps { - using namespace particles::operations; - using namespace mappings::threads; - - using FramePtr = typename T_ParBox::FramePtr; - - constexpr uint32_t frameSize = math::CT::volume< typename T_ParBox::FrameType::SuperCellSize >::type::value; - constexpr uint32_t dim = T_Mapping::Dim; - constexpr uint32_t numWorkers = T_numWorkers; - - uint32_t const workerIdx = threadIdx.x; - - DataSpace< dim > const superCellIdx( mapper.getSuperCellIndex( DataSpace< dim >( blockIdx ) ) ); - - // data copied from right (last) to left (first) - PMACC_SMEM( - acc, - firstFrame, - FramePtr - ); - PMACC_SMEM( - acc, - lastFrame, - FramePtr - ); - - PMACC_SMEM( - acc, - particleIndices_sh, - memory::Array< - int, - frameSize - > - ); - // number of gaps in firstFrame frame - PMACC_SMEM( - acc, - numGaps, - int - ); - // number of particles in the lastFrame - PMACC_SMEM( - acc, - numParticles, - int - ); - - uint32_t numParticlesPerSuperCell = 0u; - - ForEachIdx< - IdxConfig< - 1, - numWorkers - > - > onlyMaster{ workerIdx }; - - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - firstFrame = pb.getFirstFrame( superCellIdx ); - lastFrame = pb.getLastFrame( superCellIdx ); - } - ); + /** fill particle gaps + * + * @tparam T_ParBox pmacc::ParticlesBox, particle box type + * @tparam T_Mapping mapper functor type + * + * @param pb particle memory + * @param mapper functor to map a block to a supercell + */ + template + DINLINE void operator()(T_Acc const& acc, T_ParBox pb, T_Mapping const mapper) const + { + using namespace particles::operations; + using namespace mappings::threads; - __syncthreads( ); + using FramePtr = typename T_ParBox::FramePtr; - while ( firstFrame.isValid( ) && firstFrame != lastFrame ) - { - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { + constexpr uint32_t frameSize = math::CT::volume::type::value; + constexpr uint32_t dim = T_Mapping::Dim; + constexpr uint32_t numWorkers = T_numWorkers; + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + DataSpace const superCellIdx(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + + // data copied from right (last) to left (first) + PMACC_SMEM(acc, firstFrame, FramePtr); + PMACC_SMEM(acc, lastFrame, FramePtr); + + PMACC_SMEM(acc, particleIndices_sh, memory::Array); + // number of gaps in firstFrame frame + PMACC_SMEM(acc, numGaps, int); + // number of particles in the lastFrame + PMACC_SMEM(acc, numParticles, int); + + uint32_t numParticlesPerSuperCell = 0u; + + ForEachIdx> onlyMaster{workerIdx}; + + onlyMaster([&](uint32_t const, uint32_t const) { + firstFrame = pb.getFirstFrame(superCellIdx); + lastFrame = pb.getLastFrame(superCellIdx); + }); + + cupla::__syncthreads(acc); + + while(firstFrame.isValid() && firstFrame != lastFrame) + { + onlyMaster([&](uint32_t const, uint32_t const) { numGaps = 0; numParticles = 0; - } - ); + }); - __syncthreads( ); + cupla::__syncthreads(acc); - using ParticleDomCfg = IdxConfig< - frameSize, - numWorkers - >; - // loop over all particles in the frame - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); - - memory::CtxArray< - int, - ParticleDomCfg - > localGapIdxCtx( INV_LOC_IDX ); - - // find gaps in firstFrame - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if( firstFrame[ linearIdx ][ multiMask_ ] == 0 ) + using ParticleDomCfg = IdxConfig; + // loop over all particles in the frame + ForEachIdx forEachParticle(workerIdx); + + memory::CtxArray localGapIdxCtx(INV_LOC_IDX); + + // find gaps in firstFrame + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(firstFrame[linearIdx][multiMask_] == 0) { - localGapIdxCtx[ idx ] = nvidia::atomicAllInc( acc, &numGaps, ::alpaka::hierarchy::Threads{} ); + localGapIdxCtx[idx] = nvidia::atomicAllInc(acc, &numGaps, ::alpaka::hierarchy::Threads{}); } - } - ); + }); - __syncthreads( ); + cupla::__syncthreads(acc); - if( numGaps != 0 ) - { - // count particles in lastFrame - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { + if(numGaps != 0) + { + // count particles in lastFrame + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { // search particles for gaps - if( lastFrame[ linearIdx ][ multiMask_ ] == 1 ) + if(lastFrame[linearIdx][multiMask_] == 1) { - int const localParticleIdx = nvidia::atomicAllInc( acc, &numParticles, ::alpaka::hierarchy::Threads{} ); - particleIndices_sh[ localParticleIdx ] = linearIdx; + int const localParticleIdx + = nvidia::atomicAllInc(acc, &numParticles, ::alpaka::hierarchy::Threads{}); + particleIndices_sh[localParticleIdx] = linearIdx; } - } - ); + }); - __syncthreads( ); + cupla::__syncthreads(acc); - // copy particles from lastFrame to the gaps in firstFrame - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if( localGapIdxCtx[ idx ] < numParticles ) + // copy particles from lastFrame to the gaps in firstFrame + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(localGapIdxCtx[idx] < numParticles) { - int const parIdx = particleIndices_sh[ localGapIdxCtx[ idx ] ]; - auto parDestFull = firstFrame[ linearIdx ]; + int const parIdx = particleIndices_sh[localGapIdxCtx[idx]]; + auto parDestFull = firstFrame[linearIdx]; // enable particle - parDestFull[ multiMask_ ] = 1; + parDestFull[multiMask_] = 1; /* we not update multiMask because copy from mem to mem is to slow * we have enabled particle explicit */ - auto parDest = deselect< multiMask >( parDestFull ); - auto parSrc = lastFrame[ parIdx ]; - assign( parDest, parSrc ); - parSrc[ multiMask_ ] = 0; + auto parDest = deselect(parDestFull); + auto parSrc = lastFrame[parIdx]; + assign(parDest, parSrc); + parSrc[multiMask_] = 0; } - } - ); + }); - __syncthreads( ); + cupla::__syncthreads(acc); - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - if( numGaps < numParticles ) + onlyMaster([&](uint32_t const, uint32_t const) { + if(numGaps < numParticles) { numParticlesPerSuperCell += frameSize; // any gap in the first frame is filled - firstFrame = pb.getNextFrame( firstFrame ); + firstFrame = pb.getNextFrame(firstFrame); } - else if( numGaps > numParticles ) + else if(numGaps > numParticles) { // we need more particles - lastFrame = getPreviousFrameAndRemoveLastFrame( - lastFrame, - pb, - superCellIdx - ); + lastFrame = getPreviousFrameAndRemoveLastFrame(acc, lastFrame, pb, superCellIdx); } - else if( numGaps == numParticles ) + else if(numGaps == numParticles) { // update lastFrame and firstFrame - lastFrame = getPreviousFrameAndRemoveLastFrame( - lastFrame, - pb, - superCellIdx - ); - if( lastFrame.isValid( ) && lastFrame != firstFrame ) + lastFrame = getPreviousFrameAndRemoveLastFrame(acc, lastFrame, pb, superCellIdx); + if(lastFrame.isValid() && lastFrame != firstFrame) { numParticlesPerSuperCell += frameSize; - firstFrame = pb.getNextFrame( firstFrame ); + firstFrame = pb.getNextFrame(firstFrame); } } - } - ); - } - else - { - // there are no gaps in firstFrame, goto to next frame - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { + }); + } + else + { + // there are no gaps in firstFrame, goto to next frame + onlyMaster([&](uint32_t const, uint32_t const) { numParticlesPerSuperCell += frameSize; - firstFrame = pb.getNextFrame( firstFrame ); - } - ); - } - - __syncthreads( ); + firstFrame = pb.getNextFrame(firstFrame); + }); + } - } + cupla::__syncthreads(acc); + } - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { + onlyMaster([&](uint32_t const, uint32_t const) { /* numParticlesPerSuperCell is the number of particles in the * supercell except the particles in the last frame */ - auto & superCell = pb.getSuperCell( superCellIdx ); - superCell.setNumParticles( numParticlesPerSuperCell ); - } - ); - - // fill all gaps in the last frame of the supercell - KernelFillGapsLastFrame< numWorkers >{ }( - acc, - pb, - mapper - ); - } -}; + auto& superCell = pb.getSuperCell(superCellIdx); + superCell.setNumParticles(numParticlesPerSuperCell); + }); -/** shift particles leaving the supercell - * - * The functor fulfills the restriction that all frames except the last - * must be fully filled with particles as can be stored in a frame. - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelShiftParticles -{ - /** This kernel moves particles to the next supercell + // fill all gaps in the last frame of the supercell + KernelFillGapsLastFrame{}(acc, pb, mapper); + } + }; + + /** shift particles leaving the supercell * - * @warning this kernel can only run with a double checker board + * The functor fulfills the restriction that all frames except the last + * must be fully filled with particles as can be stored in a frame. + * + * @tparam T_numWorkers number of workers */ - template< - typename T_ParBox, - typename Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_ParBox pb, - Mapping mapper - ) const + template + struct KernelShiftParticles { - using ParBox = T_ParBox; - using FrameType = typename ParBox::FrameType; - using FramePtr = typename ParBox::FramePtr; - - PMACC_CONSTEXPR_CAPTURE uint32_t dim = Mapping::Dim; - constexpr uint32_t frameSize = math::CT::volume< typename FrameType::SuperCellSize >::type::value; - /* number exchanges in 2D=9 and in 3D=27 */ - constexpr uint32_t numExchanges = traits::NumberOfExchanges< dim >::value; - constexpr uint32_t numWorkers = T_numWorkers; - - /* define memory for two times Exchanges - * index range [0,numExchanges-1] are being referred to as `low frames` - * index range [numExchanges,2*numExchanges-1] are being referred to as `high frames` + /** This kernel moves particles to the next supercell + * + * @warning this kernel can only run with a double checker board */ - PMACC_SMEM( - acc, - destFrames, - memory::Array< - FramePtr, - numExchanges * 2 - > - ); - //count particles per frame - PMACC_SMEM( - acc, - destFramesCounter, - memory::Array< - int, - numExchanges - > - ); - - PMACC_SMEM( - acc, - frame, - FramePtr - ); - PMACC_SMEM( - acc, - mustShift, - bool - ); - - DataSpace< dim > superCellIdx = mapper.getSuperCellIndex( DataSpace< dim >( blockIdx ) ); - uint32_t const workerIdx = threadIdx.x; - - using namespace mappings::threads; - - ForEachIdx< - IdxConfig< - 1, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const, - uint32_t const - ) - { - mustShift = pb.getSuperCell( superCellIdx ).mustShift( ); - if ( mustShift ) + template + DINLINE void operator()(T_Acc const& acc, T_ParBox pb, Mapping mapper) const + { + using ParBox = T_ParBox; + using FrameType = typename ParBox::FrameType; + using FramePtr = typename ParBox::FramePtr; + + PMACC_CONSTEXPR_CAPTURE uint32_t dim = Mapping::Dim; + constexpr uint32_t frameSize = math::CT::volume::type::value; + /* number exchanges in 2D=9 and in 3D=27 */ + constexpr uint32_t numExchanges = traits::NumberOfExchanges::value; + constexpr uint32_t numWorkers = T_numWorkers; + + /* define memory for two times Exchanges + * index range [0,numExchanges-1] are being referred to as `low frames` + * index range [numExchanges,2*numExchanges-1] are being referred to as `high frames` + */ + PMACC_SMEM(acc, destFrames, memory::Array); + // count particles per frame + PMACC_SMEM(acc, destFramesCounter, memory::Array); + + PMACC_SMEM(acc, frame, FramePtr); + PMACC_SMEM(acc, mustShift, bool); + + DataSpace superCellIdx = mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))); + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + using namespace mappings::threads; + + ForEachIdx>{workerIdx}([&](uint32_t const, uint32_t const) { + mustShift = pb.getSuperCell(superCellIdx).mustShift(); + if(mustShift) { - pb.getSuperCell( superCellIdx ).setMustShift( false ); - frame = pb.getFirstFrame( superCellIdx ); + pb.getSuperCell(superCellIdx).setMustShift(false); + frame = pb.getFirstFrame(superCellIdx); } - } - ); - - __syncthreads( ); - if ( !mustShift || !frame.isValid( ) ) return; - - using ExchangeDomCfg = IdxConfig< - numExchanges, - numWorkers - >; - - memory::CtxArray< - int32_t, - ExchangeDomCfg - > newParticleInFrame( 0 ); - - memory::CtxArray< - DataSpace< dim >, - ExchangeDomCfg - > relativeCtx( - workerIdx, - [&]( - uint32_t const linearIdx, - uint32_t const - ) - -> DataSpace< dim > - { - return superCellIdx + Mask::getRelativeDirections< dim > ( linearIdx + 1); - } - ); + }); - ForEachIdx< ExchangeDomCfg > forEachExchange( workerIdx ); + cupla::__syncthreads(acc); + if(!mustShift || !frame.isValid()) + return; - /* if a partially filled last frame exists for the neighboring supercell, - * each master thread (one master per direction) will load it - */ - forEachExchange( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - destFramesCounter[ linearIdx ] = 0; - destFrames[ linearIdx ] = FramePtr(); - destFrames[ linearIdx + numExchanges ] = FramePtr(); + using ExchangeDomCfg = IdxConfig; + + memory::CtxArray newParticleInFrame(0); + + memory::CtxArray, ExchangeDomCfg> relativeCtx( + workerIdx, + [&](uint32_t const linearIdx, uint32_t const) -> DataSpace { + return superCellIdx + Mask::getRelativeDirections(linearIdx + 1); + }); + + ForEachIdx forEachExchange(workerIdx); + + /* if a partially filled last frame exists for the neighboring supercell, + * each master thread (one master per direction) will load it + */ + forEachExchange([&](uint32_t const linearIdx, uint32_t const idx) { + destFramesCounter[linearIdx] = 0; + destFrames[linearIdx] = FramePtr(); + destFrames[linearIdx + numExchanges] = FramePtr(); /* load last frame of neighboring supercell */ - FramePtr tmpFrame( pb.getLastFrame( relativeCtx[ idx ] ) ); + FramePtr tmpFrame(pb.getLastFrame(relativeCtx[idx])); - if ( tmpFrame.isValid() ) + if(tmpFrame.isValid()) { - int32_t const particlesInFrame = pb.getSuperCell( relativeCtx[ idx ] ).getSizeLastFrame( ); + int32_t const particlesInFrame = pb.getSuperCell(relativeCtx[idx]).getSizeLastFrame(); // do not use the neighbor's last frame if it is full - if ( particlesInFrame < frameSize ) + if(particlesInFrame < frameSize) { - newParticleInFrame[ idx ] = -particlesInFrame; - destFrames[ linearIdx ] = tmpFrame; - destFramesCounter[ linearIdx ] = particlesInFrame; + newParticleInFrame[idx] = -particlesInFrame; + destFrames[linearIdx] = tmpFrame; + destFramesCounter[linearIdx] = particlesInFrame; } } - } - ); + }); - __syncthreads( ); + cupla::__syncthreads(acc); - /* iterate over the frame list of the current supercell */ - while ( frame.isValid( ) ) - { - using ParticleDomCfg = IdxConfig< - frameSize, - numWorkers - >; - - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); - - memory::CtxArray< - lcellId_t, - ParticleDomCfg - > destParticleIdxCtx( INV_LOC_IDX ); - memory::CtxArray< - int, - ParticleDomCfg - > directionCtx; - - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { + /* iterate over the frame list of the current supercell */ + while(frame.isValid()) + { + using ParticleDomCfg = IdxConfig; + + ForEachIdx forEachParticle(workerIdx); + + memory::CtxArray destParticleIdxCtx(INV_LOC_IDX); + memory::CtxArray directionCtx; + + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { /* set to value to of multiMask to a value in range [-2, EXCHANGES - 1] * -2 is no particle * -1 is particle but it is not shifted (stays in supercell) * >=0 particle moves in a certain direction * (@see ExchangeType in types.h) */ - directionCtx[ idx ] = frame[ linearIdx ][ multiMask_ ] - 2; - if ( directionCtx[ idx ] >= 0 ) + directionCtx[idx] = frame[linearIdx][multiMask_] - 2; + if(directionCtx[idx] >= 0) { - destParticleIdxCtx[ idx ] = atomicAdd( &(destFramesCounter[ directionCtx[ idx ] ]), 1, ::alpaka::hierarchy::Threads{} ); + destParticleIdxCtx[idx] = cupla::atomicAdd( + acc, + &(destFramesCounter[directionCtx[idx]]), + 1, + ::alpaka::hierarchy::Threads{}); } - } - ); - __syncthreads( ); - - forEachExchange( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { + }); + cupla::__syncthreads(acc); + + forEachExchange([&](uint32_t const linearIdx, uint32_t const idx) { /* If the master thread (responsible for a certain direction) did not * obtain a `low frame` from the neighboring super cell before the loop, * it will create one now. @@ -724,41 +465,27 @@ struct KernelShiftParticles * supercell fit into the `low frame`, a second frame is created to * contain further particles, the `high frame` (default: invalid). */ - if ( destFramesCounter[ linearIdx ] > 0 ) + if(destFramesCounter[linearIdx] > 0) { /* if we had no `low frame` we load a new empty one */ - if ( !destFrames[ linearIdx ].isValid( ) ) + if(!destFrames[linearIdx].isValid()) { - FramePtr tmpFrame( pb.getEmptyFrame( ) ); - destFrames[ linearIdx ] = tmpFrame; - pb.setAsLastFrame( - acc, - tmpFrame, - relativeCtx[ idx ] - ); + FramePtr tmpFrame(pb.getEmptyFrame(acc)); + destFrames[linearIdx] = tmpFrame; + pb.setAsLastFrame(acc, tmpFrame, relativeCtx[idx]); } /* check if a `high frame` is needed */ - if ( destFramesCounter[ linearIdx ] > frameSize ) + if(destFramesCounter[linearIdx] > frameSize) { - FramePtr tmpFrame( pb.getEmptyFrame( ) ); - destFrames[ linearIdx + numExchanges ] = tmpFrame; - pb.setAsLastFrame( - acc, - tmpFrame, - relativeCtx[ idx ] - ); + FramePtr tmpFrame(pb.getEmptyFrame(acc)); + destFrames[linearIdx + numExchanges] = tmpFrame; + pb.setAsLastFrame(acc, tmpFrame, relativeCtx[idx]); } } - } - ); - __syncthreads( ); - - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { + }); + cupla::__syncthreads(acc); + + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { /* All threads with a valid index in the neighbor's frame, valid index * range is [0, frameSize * 2-1], will copy their particle to the new * frame. @@ -766,588 +493,366 @@ struct KernelShiftParticles * The default value for indexes (in the destination frame) is * above this range (INV_LOC_IDX) for all particles that are not shifted. */ - if ( destParticleIdxCtx[ idx ] < frameSize * 2 ) + if(destParticleIdxCtx[idx] < frameSize * 2) { - if ( destParticleIdxCtx[ idx ] >= frameSize ) + if(destParticleIdxCtx[idx] >= frameSize) { /* use `high frame` */ - directionCtx[ idx ] += numExchanges; - destParticleIdxCtx[ idx ] -= frameSize; + directionCtx[idx] += numExchanges; + destParticleIdxCtx[idx] -= frameSize; } - auto dstParticle = destFrames[ directionCtx[ idx ] ][ destParticleIdxCtx[ idx ] ]; - auto srcParticle = frame[ linearIdx ]; - dstParticle[ multiMask_ ] = 1; - srcParticle[ multiMask_ ] = 0; - auto dstFilteredParticle = - particles::operations::deselect< multiMask >( dstParticle ); - particles::operations::assign( - dstFilteredParticle, - srcParticle - ); + auto dstParticle = destFrames[directionCtx[idx]][destParticleIdxCtx[idx]]; + auto srcParticle = frame[linearIdx]; + dstParticle[multiMask_] = 1; + srcParticle[multiMask_] = 0; + auto dstFilteredParticle = particles::operations::deselect(dstParticle); + particles::operations::assign(dstFilteredParticle, srcParticle); } - } - ); - __syncthreads( ); - - forEachExchange( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { + }); + cupla::__syncthreads(acc); + + forEachExchange([&](uint32_t const linearIdx, uint32_t const idx) { /* if the `low frame` is full, each master thread * uses the `high frame` (is invalid, if still empty) as the next * `low frame` for the following iteration of the loop */ - if ( destFramesCounter[ linearIdx ] >= frameSize ) + if(destFramesCounter[linearIdx] >= frameSize) { - newParticleInFrame[ idx ] += frameSize; - destFramesCounter[ linearIdx ] -= frameSize; - destFrames[ linearIdx ] = destFrames[ linearIdx + numExchanges ]; - destFrames[ linearIdx + numExchanges ] = FramePtr( ); + newParticleInFrame[idx] += frameSize; + destFramesCounter[linearIdx] -= frameSize; + destFrames[linearIdx] = destFrames[linearIdx + numExchanges]; + destFrames[linearIdx + numExchanges] = FramePtr(); } - if ( linearIdx == 0 ) + if(linearIdx == 0) { - frame = pb.getNextFrame( frame ); + frame = pb.getNextFrame(frame); } - } - ); - __syncthreads( ); - } + }); + cupla::__syncthreads(acc); + } - forEachExchange( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - newParticleInFrame[ idx ] += destFramesCounter[ linearIdx ]; - if( newParticleInFrame[ idx ] > 0 ) + forEachExchange([&](uint32_t const linearIdx, uint32_t const idx) { + newParticleInFrame[idx] += destFramesCounter[linearIdx]; + if(newParticleInFrame[idx] > 0) { /* Each master thread updates the number of particles * for the neighbor frame. The number of particles in the neighbor * frame must be correct because fill gaps is only called on the * current used supercell. */ - auto & superCell = pb.getSuperCell( relativeCtx[ idx ] ); - superCell.setNumParticles( - superCell.getNumParticles() + newParticleInFrame[ idx ] - ); + auto& superCell = pb.getSuperCell(relativeCtx[idx]); + superCell.setNumParticles(superCell.getNumParticles() + newParticleInFrame[idx]); } - } - ); - - // fill all gaps in the frame list of the supercell - KernelFillGaps< numWorkers >{ }( - acc, - pb, - mapper - ); - } -}; + }); -/** deletes all particles within an AREA - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelDeleteParticles -{ - /** deletes all particles - * - * @warning the particle memory of the particle is not byte-wise zeroed - * - * @tparam T_ParticleBox pmacc::ParticlesBox, particle box type - * @tparam T_Mapping mapper functor type + // fill all gaps in the frame list of the supercell + KernelFillGaps{}(acc, pb, mapper); + } + }; + + /** deletes all particles within an AREA * - * @param pb particle memory - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_ParticleBox, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_ParticleBox pb, - T_Mapping const mapper - ) const + template + struct KernelDeleteParticles { - using namespace particles::operations; - using namespace mappings::threads; - - using ParticleBox = T_ParticleBox; - using FrameType = typename ParticleBox::FrameType; - using FramePtr = typename ParticleBox::FramePtr; - - constexpr uint32_t dim = T_Mapping::Dim; - constexpr uint32_t frameSize = math::CT::volume< typename FrameType::SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; - - DataSpace< dim > const superCellIdx = mapper.getSuperCellIndex( DataSpace< dim >( blockIdx ) ); - uint32_t const workerIdx = threadIdx.x; - - PMACC_SMEM( - acc, - frame, - FramePtr - ); - - ForEachIdx< - IdxConfig< - 1, - numWorkers - > - > onlyMaster{ workerIdx }; - - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - frame = pb.getLastFrame( superCellIdx ); - } - ); + /** deletes all particles + * + * @warning the particle memory of the particle is not byte-wise zeroed + * + * @tparam T_ParticleBox pmacc::ParticlesBox, particle box type + * @tparam T_Mapping mapper functor type + * + * @param pb particle memory + * @param mapper functor to map a block to a supercell + */ + template + DINLINE void operator()(T_Acc const& acc, T_ParticleBox pb, T_Mapping const mapper) const + { + using namespace particles::operations; + using namespace mappings::threads; - __syncthreads( ); + using ParticleBox = T_ParticleBox; + using FrameType = typename ParticleBox::FrameType; + using FramePtr = typename ParticleBox::FramePtr; - while( frame.isValid( ) ) - { - using ParticleDomCfg = IdxConfig< - frameSize, - numWorkers - >; - // loop over all particles in the frame - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); + constexpr uint32_t dim = T_Mapping::Dim; + constexpr uint32_t frameSize = math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - auto particle = ( frame[ linearIdx ] ); - particle[ multiMask_ ] = 0; // delete particle - } - ); + DataSpace const superCellIdx = mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))); + uint32_t const workerIdx = cupla::threadIdx(acc).x; - __syncthreads( ); + PMACC_SMEM(acc, frame, FramePtr); - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - // always remove the last frame - frame = getPreviousFrameAndRemoveLastFrame( - frame, - pb, - superCellIdx - ); - } - ); - __syncthreads( ); - } + ForEachIdx> onlyMaster{workerIdx}; + + onlyMaster([&](uint32_t const, uint32_t const) { frame = pb.getLastFrame(superCellIdx); }); - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) + cupla::__syncthreads(acc); + + while(frame.isValid()) { - // all frames and particles are removed - pb.getSuperCell( superCellIdx ).setNumParticles( 0 ); + using ParticleDomCfg = IdxConfig; + // loop over all particles in the frame + ForEachIdx forEachParticle(workerIdx); + + forEachParticle([&](uint32_t const linearIdx, uint32_t const) { + auto particle = (frame[linearIdx]); + particle[multiMask_] = 0; // delete particle + }); + + cupla::__syncthreads(acc); + + onlyMaster([&](uint32_t const, uint32_t const) { + // always remove the last frame + frame = getPreviousFrameAndRemoveLastFrame(acc, frame, pb, superCellIdx); + }); + cupla::__syncthreads(acc); } - ); - } -}; -/** copy particles from the guard to an exchange buffer - * - * @warning This kernel resets the number of particles in the processed supercells even - * if there are particles left in the supercell and does not guarantee that the last frame is - * contiguous filled. - * Call KernelFillGaps afterwards if you need a valid number of particles - * and a contiguously filled last frame. - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelCopyGuardToExchange -{ - /** copy guard particles to an exchange buffer + onlyMaster([&](uint32_t const, uint32_t const) { + // all frames and particles are removed + pb.getSuperCell(superCellIdx).setNumParticles(0); + }); + } + }; + + /** copy particles from the guard to an exchange buffer * - * @tparam T_ParBox pmacc::ParticlesBox, particle box type - * @tparam T_ExchangeValueType frame type of the exchange buffer - * @tparam T_Mapping mapper functor type + * @warning This kernel resets the number of particles in the processed supercells even + * if there are particles left in the supercell and does not guarantee that the last frame is + * contiguous filled. + * Call KernelFillGaps afterwards if you need a valid number of particles + * and a contiguously filled last frame. * - * @param pb particle memory - * @param exchangeBox exchange buffer for particles - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_ParBox, - typename T_ExchangeValueType, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_ParBox pb, - ExchangePushDataBox< - vint_t, - T_ExchangeValueType, - T_Mapping::Dim - 1 - > exchangeBox, - T_Mapping const mapper - ) const + template + struct KernelCopyGuardToExchange { - using namespace particles::operations; - using namespace mappings::threads; - - PMACC_CONSTEXPR_CAPTURE uint32_t dim = T_Mapping::Dim; - constexpr uint32_t frameSize = math::CT::volume< typename T_ParBox::FrameType::SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; - - using FramePtr = typename T_ParBox::FramePtr; - - DataSpace< dim > const superCellIdx = mapper.getSuperCellIndex( DataSpace< dim >( blockIdx ) ); - uint32_t const workerIdx = threadIdx.x; - - // number of particles in the current handled frame - PMACC_SMEM( - acc, - numParticles, - int - ); - PMACC_SMEM( - acc, - frame, - FramePtr - ); - - /* `exchangeChunk` is a view to a chunk of the memory in the exchange- - * The chunk contains between 0 and `numParticles` particles - * and is updated for each frame. - */ - PMACC_SMEM( - acc, - exchangeChunk, - TileDataBox< T_ExchangeValueType > - ); - - /* flag: define if all particles from the current frame are copied to the - * exchange buffer + /** copy guard particles to an exchange buffer + * + * @tparam T_ParBox pmacc::ParticlesBox, particle box type + * @tparam T_ExchangeValueType frame type of the exchange buffer + * @tparam T_Mapping mapper functor type * - * `true` if all particles are copied, else `false` + * @param pb particle memory + * @param exchangeBox exchange buffer for particles + * @param mapper functor to map a block to a supercell */ - PMACC_SMEM( - acc, - allParticlesCopied, - bool - ); - - ForEachIdx< - IdxConfig< - 1, - numWorkers - > - > onlyMaster{ workerIdx }; - - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - allParticlesCopied = true; - frame = pb.getLastFrame( superCellIdx ); - } - ); + template + DINLINE void operator()( + T_Acc const& acc, + T_ParBox pb, + ExchangePushDataBox exchangeBox, + T_Mapping const mapper) const + { + using namespace particles::operations; + using namespace mappings::threads; - __syncthreads( ); + PMACC_CONSTEXPR_CAPTURE uint32_t dim = T_Mapping::Dim; + constexpr uint32_t frameSize = math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; - while ( frame.isValid( ) && allParticlesCopied ) - { - using ParticleDomCfg = IdxConfig< - frameSize, - numWorkers - >; + using FramePtr = typename T_ParBox::FramePtr; + + DataSpace const superCellIdx = mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc))); + uint32_t const workerIdx = cupla::threadIdx(acc).x; - /* the index of the gap in the exchange box where the particle - * is copied to + // number of particles in the current handled frame + PMACC_SMEM(acc, numParticles, int); + PMACC_SMEM(acc, frame, FramePtr); + + /* `exchangeChunk` is a view to a chunk of the memory in the exchange- + * The chunk contains between 0 and `numParticles` particles + * and is updated for each frame. */ - memory::CtxArray< - lcellId_t, - ParticleDomCfg - > - exchangeGapIdxCtx( INV_LOC_IDX ); - - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - numParticles = 0; - } - ); + PMACC_SMEM(acc, exchangeChunk, TileDataBox); - __syncthreads( ); + /* flag: define if all particles from the current frame are copied to the + * exchange buffer + * + * `true` if all particles are copied, else `false` + */ + PMACC_SMEM(acc, allParticlesCopied, bool); - // loop over all particles in the frame - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); + ForEachIdx> onlyMaster{workerIdx}; - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if ( frame[ linearIdx ][ multiMask_ ] == 1 ) - { - exchangeGapIdxCtx[ idx ] = nvidia::atomicAllInc( acc, &numParticles, ::alpaka::hierarchy::Threads{} ); - } - } - ); - __syncthreads( ); + onlyMaster([&](uint32_t const, uint32_t const) { + allParticlesCopied = true; + frame = pb.getLastFrame(superCellIdx); + }); + + cupla::__syncthreads(acc); - if( numParticles > 0 ) + while(frame.isValid() && allParticlesCopied) { + using ParticleDomCfg = IdxConfig; - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) + /* the index of the gap in the exchange box where the particle + * is copied to + */ + memory::CtxArray exchangeGapIdxCtx(INV_LOC_IDX); + + onlyMaster([&](uint32_t const, uint32_t const) { numParticles = 0; }); + + cupla::__syncthreads(acc); + + // loop over all particles in the frame + ForEachIdx forEachParticle(workerIdx); + + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(frame[linearIdx][multiMask_] == 1) { + exchangeGapIdxCtx[idx] + = nvidia::atomicAllInc(acc, &numParticles, ::alpaka::hierarchy::Threads{}); + } + }); + cupla::__syncthreads(acc); + + if(numParticles > 0) + { + onlyMaster([&](uint32_t const, uint32_t const) { // try to get as many memory as particles in the current frame exchangeChunk = exchangeBox.pushN( acc, numParticles, // Compute the target supercell depending on the exchangeType - DataSpaceOperations< dim >::reduce( - superCellIdx, - mapper.getExchangeType( ) - ), - ::alpaka::hierarchy::Blocks{} - ); - if( exchangeChunk.getSize( ) < numParticles ) + DataSpaceOperations::reduce(superCellIdx, mapper.getExchangeType()), + ::alpaka::hierarchy::Blocks{}); + if(exchangeChunk.getSize() < numParticles) allParticlesCopied = false; - } - ); + }); - __syncthreads( ); + cupla::__syncthreads(acc); - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if( exchangeGapIdxCtx[ idx ] != INV_LOC_IDX && exchangeGapIdxCtx[ idx ] < exchangeChunk.getSize( ) ) + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(exchangeGapIdxCtx[idx] != INV_LOC_IDX && exchangeGapIdxCtx[idx] < exchangeChunk.getSize()) { - auto parDest = exchangeChunk[ exchangeGapIdxCtx[ idx ] ][ 0 ]; - auto parSrc = frame[ linearIdx ]; - assign( parDest, parSrc ); - parSrc[ multiMask_ ] = 0; + auto parDest = exchangeChunk[exchangeGapIdxCtx[idx]][0]; + auto parSrc = frame[linearIdx]; + assign(parDest, parSrc); + parSrc[multiMask_] = 0; } - } - ); - __syncthreads( ); - } + }); + cupla::__syncthreads(acc); + } - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { + onlyMaster([&](uint32_t const, uint32_t const) { /* do not remove the frame if we had not copied * all particles from the current frame to the exchange buffer */ - if ( allParticlesCopied ) - frame = getPreviousFrameAndRemoveLastFrame( frame, pb, superCellIdx ); - } - ); + if(allParticlesCopied) + frame = getPreviousFrameAndRemoveLastFrame(acc, frame, pb, superCellIdx); + }); - __syncthreads( ); - } - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { + cupla::__syncthreads(acc); + } + onlyMaster([&](uint32_t const, uint32_t const) { /* Mark supercell as empty even if there are particles left. * This kernel not depends on the correct number particles in the supercell. */ - pb.getSuperCell( superCellIdx ).setNumParticles( 0 ); - } - ); - - } -}; + pb.getSuperCell(superCellIdx).setNumParticles(0); + }); + } + }; -/** copy particles from exchange buffer into the border of the simulation - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelInsertParticles -{ /** copy particles from exchange buffer into the border of the simulation * - * @tparam T_ParBox pmacc::ParticlesBox, particle box type - * @tparam T_ExchangeValueType frame type of the exchange buffer - * @tparam T_Mapping mapper functor type - * - * @param pb particle memory - * @param exchangeBox exchange box for particles - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_ParBox, - typename T_ExchangeValueType, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_ParBox pb, - ExchangePopDataBox< - vint_t, - T_ExchangeValueType, - T_Mapping::Dim - 1 - > exchangeBox, - T_Mapping const mapper - ) const + template + struct KernelInsertParticles { - using namespace particles::operations; - using namespace mappings::threads; - - PMACC_CONSTEXPR_CAPTURE uint32_t dim = T_Mapping::Dim; - constexpr uint32_t frameSize = math::CT::volume< typename T_ParBox::FrameType::SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; - - uint32_t const workerIdx = threadIdx.x; - - using FramePtr = typename T_ParBox::FramePtr; - - PMACC_SMEM( - acc, - frame, - FramePtr - ); - PMACC_SMEM( - acc, - elementCount, - int - ); - PMACC_SMEM( - acc, - exchangeChunk, - TileDataBox< T_ExchangeValueType > - ); - - using MasterOnly = IdxConfig< - 1, - numWorkers - >; - - /* compressed index of the the supercell - * can be uncompressed with `DataSpaceOperations< >::extend()` + /** copy particles from exchange buffer into the border of the simulation + * + * @tparam T_ParBox pmacc::ParticlesBox, particle box type + * @tparam T_ExchangeValueType frame type of the exchange buffer + * @tparam T_Mapping mapper functor type + * + * @param pb particle memory + * @param exchangeBox exchange box for particles + * @param mapper functor to map a block to a supercell */ - memory::CtxArray< - DataSpace< dim - 1 >, - MasterOnly - > compressedSuperCellIdxCtx{ }; - - ForEachIdx< - MasterOnly - > onlyMaster{ workerIdx }; - - onlyMaster( - [&]( - uint32_t const, - uint32_t const idx - ) - { - exchangeChunk = exchangeBox.get( - blockIdx.x, - compressedSuperCellIdxCtx[ idx ] - ); - elementCount = exchangeChunk.getSize( ); - if ( elementCount > 0 ) + template + DINLINE void operator()( + T_Acc const& acc, + T_ParBox pb, + ExchangePopDataBox exchangeBox, + T_Mapping const mapper) const + { + using namespace particles::operations; + using namespace mappings::threads; + + PMACC_CONSTEXPR_CAPTURE uint32_t dim = T_Mapping::Dim; + constexpr uint32_t frameSize = math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + using FramePtr = typename T_ParBox::FramePtr; + + PMACC_SMEM(acc, frame, FramePtr); + PMACC_SMEM(acc, elementCount, int); + PMACC_SMEM(acc, exchangeChunk, TileDataBox); + + using MasterOnly = IdxConfig<1, numWorkers>; + + /* compressed index of the the supercell + * can be uncompressed with `DataSpaceOperations< >::extend()` + */ + memory::CtxArray, MasterOnly> compressedSuperCellIdxCtx{}; + + ForEachIdx onlyMaster{workerIdx}; + + onlyMaster([&](uint32_t const, uint32_t const idx) { + exchangeChunk = exchangeBox.get(cupla::blockIdx(acc).x, compressedSuperCellIdxCtx[idx]); + elementCount = exchangeChunk.getSize(); + if(elementCount > 0) { - frame = pb.getEmptyFrame( ); + frame = pb.getEmptyFrame(acc); } - } - ); - - __syncthreads( ); - - // loop over all particles in the frame - ForEachIdx< - IdxConfig< - frameSize, - numWorkers - > - > forEachParticle{ workerIdx }; - - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - if( linearIdx < elementCount ) + }); + + cupla::__syncthreads(acc); + + // loop over all particles in the frame + ForEachIdx> forEachParticle{workerIdx}; + + forEachParticle([&](uint32_t const linearIdx, uint32_t const) { + if(linearIdx < elementCount) { - auto parDestFull = frame[ linearIdx ]; - parDestFull[ multiMask_ ] = 1; - auto parSrc = exchangeChunk[ linearIdx ][ 0 ]; + auto parDestFull = frame[linearIdx]; + parDestFull[multiMask_] = 1; + auto parSrc = exchangeChunk[linearIdx][0]; /*we know that source has no multiMask*/ - auto parDest = deselect( parDestFull ); - assign( parDest, parSrc ); + auto parDest = deselect(parDestFull); + assign(parDest, parSrc); } - } - ); + }); - /** @bug This synchronize fixes a kernel crash in special cases, - * psychocoderHPC: I can't tell why. - */ - __syncthreads( ); + /** @bug This synchronize fixes a kernel crash in special cases, + * psychocoderHPC: I can't tell why. + */ + cupla::__syncthreads(acc); - onlyMaster( - [&]( - uint32_t const, - uint32_t const idx - ) - { - if( elementCount > 0 ) + onlyMaster([&](uint32_t const, uint32_t const idx) { + if(elementCount > 0) { // compute the super cell position in target frame to insert into //! @todo: offset == simulation border should be passed to this func instead of being created here - DataSpace< dim > dstSuperCell = DataSpaceOperations < dim - 1 > ::extend( - compressedSuperCellIdxCtx[ idx ], - mapper.getExchangeType( ), - mapper.getGridSuperCells( ), - mapper.getGuardingSuperCells( ) - ); - - pb.setAsLastFrame( - acc, - frame, - dstSuperCell - ); - } - } - ); + DataSpace dstSuperCell = DataSpaceOperations::extend( + compressedSuperCellIdxCtx[idx], + mapper.getExchangeType(), + mapper.getGridSuperCells(), + mapper.getGuardingSuperCells()); - } -}; + pb.setAsLastFrame(acc, frame, dstSuperCell); + } + }); + } + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/ParticlesBase.tpp b/include/pmacc/particles/ParticlesBase.tpp index bf7c3fa7ba..fc32b20fcb 100644 --- a/include/pmacc/particles/ParticlesBase.tpp +++ b/include/pmacc/particles/ParticlesBase.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -37,111 +37,82 @@ namespace pmacc template void ParticlesBase::deleteGuardParticles(uint32_t exchangeType) { - ExchangeMapping mapper(this->cellDescription, exchangeType); - constexpr uint32_t numWorkers = traits::GetNumWorkers< - math::CT::volume< typename FrameType::SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( KernelDeleteParticles< numWorkers >{ } )( - mapper.getGridDim( ), - numWorkers - )( - particlesBuffer->getDeviceParticleBox( ), - mapper - ); + constexpr uint32_t numWorkers + = traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(KernelDeleteParticles{}) + (mapper.getGridDim(), numWorkers)(particlesBuffer->getDeviceParticleBox(), mapper); } template template void ParticlesBase::deleteParticlesInArea() { - AreaMapping mapper(this->cellDescription); - constexpr uint32_t numWorkers = traits::GetNumWorkers< - math::CT::volume< typename FrameType::SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( KernelDeleteParticles< numWorkers >{ } )( - mapper.getGridDim( ), - numWorkers - )( - particlesBuffer->getDeviceParticleBox( ), - mapper - ); + constexpr uint32_t numWorkers + = traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(KernelDeleteParticles{}) + (mapper.getGridDim(), numWorkers)(particlesBuffer->getDeviceParticleBox(), mapper); } template - void ParticlesBase::reset(uint32_t ) + void ParticlesBase::reset(uint32_t) { - deleteParticlesInArea(); - particlesBuffer->reset( ); + deleteParticlesInArea(); + particlesBuffer->reset(); } template - void ParticlesBase::copyGuardToExchange( uint32_t exchangeType ) + void ParticlesBase::copyGuardToExchange(uint32_t exchangeType) { - if( particlesBuffer->hasSendExchange( exchangeType ) ) + if(particlesBuffer->hasSendExchange(exchangeType)) { - ExchangeMapping< - GUARD, - MappingDesc - > mapper( - this->cellDescription, - exchangeType - ); - - particlesBuffer->getSendExchangeStack( exchangeType ).setCurrentSize( 0 ); - - constexpr uint32_t numWorkers = traits::GetNumWorkers< - math::CT::volume< typename FrameType::SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( KernelCopyGuardToExchange< numWorkers >{ } )( - mapper.getGridDim( ), - numWorkers - )( - particlesBuffer->getDeviceParticleBox( ), - particlesBuffer->getSendExchangeStack( exchangeType ).getDeviceExchangePushDataBox( ), - mapper - ); + ExchangeMapping mapper(this->cellDescription, exchangeType); + + particlesBuffer->getSendExchangeStack(exchangeType).setCurrentSize(0); + + constexpr uint32_t numWorkers + = traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(KernelCopyGuardToExchange{}) + (mapper.getGridDim(), numWorkers)( + particlesBuffer->getDeviceParticleBox(), + particlesBuffer->getSendExchangeStack(exchangeType).getDeviceExchangePushDataBox(), + mapper); } } template void ParticlesBase::insertParticles(uint32_t exchangeType) { - if( particlesBuffer->hasReceiveExchange( exchangeType ) ) + if(particlesBuffer->hasReceiveExchange(exchangeType)) { - size_t grid( particlesBuffer->getReceiveExchangeStack( exchangeType ).getHostCurrentSize( ) ); - if( grid != 0u ) + size_t numParticles = 0u; + if(Environment<>::get().isMpiDirectEnabled()) + numParticles = particlesBuffer->getReceiveExchangeStack(exchangeType).getDeviceCurrentSize(); + else + numParticles = particlesBuffer->getReceiveExchangeStack(exchangeType).getHostCurrentSize(); + + if(numParticles != 0u) { - ExchangeMapping< - GUARD, - MappingDesc - > mapper( - this->cellDescription, - exchangeType - ); - - constexpr uint32_t numWorkers = traits::GetNumWorkers< - math::CT::volume< typename FrameType::SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( KernelInsertParticles< numWorkers >{ } )( - grid, - numWorkers - )( - particlesBuffer->getDeviceParticleBox( ), - particlesBuffer->getReceiveExchangeStack( exchangeType ).getDeviceExchangePopDataBox( ), - mapper - ); + ExchangeMapping mapper(this->cellDescription, exchangeType); + + constexpr uint32_t numWorkers + = traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(KernelInsertParticles{}) + (numParticles, numWorkers)( + particlesBuffer->getDeviceParticleBox(), + particlesBuffer->getReceiveExchangeStack(exchangeType).getDeviceExchangePopDataBox(), + mapper); } } } -} //namespace pmacc +} // namespace pmacc #include "pmacc/particles/AsyncCommunicationImpl.hpp" diff --git a/include/pmacc/particles/algorithm/CallForEach.hpp b/include/pmacc/particles/algorithm/CallForEach.hpp index c2fbb9f497..4ea1155cc3 100644 --- a/include/pmacc/particles/algorithm/CallForEach.hpp +++ b/include/pmacc/particles/algorithm/CallForEach.hpp @@ -1,4 +1,4 @@ -/* Copyright 2019-2020 Rene Widera +/* Copyright 2019-2021 Rene Widera * * This file is part of PMacc. * @@ -30,56 +30,41 @@ namespace pmacc { -namespace particles -{ -namespace algorithm -{ - - /** Functor to execute an operation on all particles - * - * @tparam T_SpeciesOperator an operator to create the used species - * with the species type as ::type - * @tparam T_FunctorOperator an operator to create a particle functor - * with the functor type as ::type - */ - template< - typename T_SpeciesOperator, - typename T_FunctorOperator - > - struct CallForEach + namespace particles { - /** Operate on the domain CORE and BORDER - * - * @param currentStep current simulation time step - */ - HINLINE void - operator()( uint32_t const currentStep ) + namespace algorithm { - using Species = typename T_SpeciesOperator::type; - using FrameType = typename Species::FrameType; + /** Functor to execute an operation on all particles + * + * @tparam T_SpeciesOperator an operator to create the used species + * with the species type as ::type + * @tparam T_FunctorOperator an operator to create a particle functor + * with the functor type as ::type + */ + template + struct CallForEach + { + /** Operate on the domain CORE and BORDER + * + * @param currentStep current simulation time step + */ + HINLINE void operator()(uint32_t const currentStep) + { + using Species = typename T_SpeciesOperator::type; + using FrameType = typename Species::FrameType; - // be sure the species functor follows the pmacc functor interface - using UnaryFunctor = pmacc::functor::Interface< - typename T_FunctorOperator::type, - 1u, - void - >; + // be sure the species functor follows the pmacc functor interface + using UnaryFunctor = pmacc::functor::Interface; - DataConnector &dc = Environment<>::get().DataConnector(); - auto species = dc.get< Species >( - FrameType::getName(), - true - ); + DataConnector& dc = Environment<>::get().DataConnector(); + auto species = dc.get(FrameType::getName(), true); - forEach( - *species, - UnaryFunctor( currentStep ) - ); + forEach(*species, UnaryFunctor(currentStep)); - dc.releaseData( FrameType::getName() ); - } - }; + dc.releaseData(FrameType::getName()); + } + }; -} // namespace algorithm -} // namespace particles + } // namespace algorithm + } // namespace particles } // namespace pmacc diff --git a/include/pmacc/particles/algorithm/ForEach.hpp b/include/pmacc/particles/algorithm/ForEach.hpp index 68a0c1d284..764936252d 100644 --- a/include/pmacc/particles/algorithm/ForEach.hpp +++ b/include/pmacc/particles/algorithm/ForEach.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl, Rene Widera +/* Copyright 2017-2021 Axel Huebl, Rene Widera * * This file is part of PMacc. * @@ -33,164 +33,123 @@ namespace pmacc { -namespace particles -{ -namespace algorithm -{ -namespace acc -{ -namespace detail -{ - - /** operate on particles of a species - * - * @tparam T_numWorkers number of workers - */ - template< uint32_t T_numWorkers > - struct ForEachParticle + namespace particles { - /** operate on particles - * - * @tparam T_Acc alpaka accelerator type - * @tparam T_Functor type of the functor to operate on a particle - * @tparam T_Mapping mapping functor type - * @tparam T_ParBox pmacc::ParticlesBox, type of the species box - * - * @param acc alpaka accelerator - * @param functor functor to operate on a particle - * must fulfill the interface pmacc::functor::Interface - * @param mapper functor to map a block to a supercell - * @param pb particles species box - */ - template< - typename T_Acc, - typename T_Functor, - typename T_Mapping, - typename T_ParBox - > - DINLINE void operator()( - T_Acc const & acc, - T_Functor functor, - T_Mapping const mapper, - T_ParBox pb - ) const + namespace algorithm { - using namespace mappings::threads; - - using SuperCellSize = typename T_ParBox::FrameType::SuperCellSize; - constexpr uint32_t dim = SuperCellSize::dim; - constexpr uint32_t frameSize = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; - - uint32_t const workerIdx = threadIdx.x; - - DataSpace< dim > const superCellIdx( - mapper.getSuperCellIndex( DataSpace< dim >( blockIdx ) ) - ); - - auto const & superCell = pb.getSuperCell( superCellIdx ); - uint32_t const numPartcilesInSupercell = superCell.getNumParticles(); - - - // end kernel if we have no particles - if( numPartcilesInSupercell == 0 ) - return; - - using FramePtr = typename T_ParBox::FramePtr; - FramePtr frame = pb.getFirstFrame( superCellIdx ); + namespace acc + { + namespace detail + { + /** operate on particles of a species + * + * @tparam T_numWorkers number of workers + */ + template + struct ForEachParticle + { + /** operate on particles + * + * @tparam T_Acc alpaka accelerator type + * @tparam T_Functor type of the functor to operate on a particle + * @tparam T_Mapping mapping functor type + * @tparam T_ParBox pmacc::ParticlesBox, type of the species box + * + * @param acc alpaka accelerator + * @param functor functor to operate on a particle + * must fulfill the interface pmacc::functor::Interface + * @param mapper functor to map a block to a supercell + * @param pb particles species box + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_Functor functor, + T_Mapping const mapper, + T_ParBox pb) const + { + using namespace mappings::threads; + + using SuperCellSize = typename T_ParBox::FrameType::SuperCellSize; + constexpr uint32_t dim = SuperCellSize::dim; + constexpr uint32_t frameSize = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + DataSpace const superCellIdx( + mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); + + auto const& superCell = pb.getSuperCell(superCellIdx); + uint32_t const numPartcilesInSupercell = superCell.getNumParticles(); + + + // end kernel if we have no particles + if(numPartcilesInSupercell == 0) + return; + + using FramePtr = typename T_ParBox::FramePtr; + FramePtr frame = pb.getFirstFrame(superCellIdx); + + // offset of the superCell (in cells, without any guards) to the origin of the local domain + DataSpace const localSuperCellOffset = superCellIdx - mapper.getGuardingSuperCells(); + + auto accFunctor = functor(acc, localSuperCellOffset, WorkerCfg{workerIdx}); + + for(uint32_t parOffset = 0; parOffset < numPartcilesInSupercell; parOffset += frameSize) + { + using ParticleDomCfg = IdxConfig; + + // loop over all particles in the frame + ForEachIdx{workerIdx}([&](uint32_t const linearIdx, uint32_t const) { + // particle index within the supercell + uint32_t parIdx = parOffset + linearIdx; + auto particle = frame[linearIdx]; + + bool const isPar = parIdx < numPartcilesInSupercell; + if(isPar) + accFunctor(acc, particle); + }); + + frame = pb.getNextFrame(frame); + } + } + }; + + } // namespace detail + } // namespace acc + + /** Run a unary functor for each particle of a species + * + * @warning Does NOT fill gaps automatically! If the + * operation deactivates particles or creates "gaps" in any + * other way, CallFillAllGaps needs to be called for the + * species manually afterwards! + * + * Operates on the domain CORE and BORDER + * + * @tparam T_Species type of the species + * @tparam T_Functor unary particle functor type which follows the interface of + * pmacc::functor::Interface + * + * @param species species to operate on + * @param functor operation which is applied to each particle of the species + */ + template + void forEach(T_Species&& species, T_Functor functor) + { + using MappingDesc = decltype(species.getCellDescription()); + AreaMapping mapper(species.getCellDescription()); - // offset of the superCell (in cells, without any guards) to the origin of the local domain - DataSpace< dim > const localSuperCellOffset = - superCellIdx - mapper.getGuardingSuperCells( ); + using SuperCellSize = typename MappingDesc::SuperCellSize; - auto accFunctor = functor( - acc, - localSuperCellOffset, - WorkerCfg< T_numWorkers >{ workerIdx } - ); + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; - for( uint32_t parOffset = 0; parOffset < numPartcilesInSupercell; parOffset += frameSize) - { - using ParticleDomCfg = IdxConfig< - frameSize, - numWorkers - >; - - // loop over all particles in the frame - ForEachIdx< ParticleDomCfg >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - // particle index within the supercell - uint32_t parIdx = parOffset + linearIdx; - auto particle = frame[ linearIdx ]; - - bool const isPar = parIdx < numPartcilesInSupercell; - if( isPar ) - accFunctor( - acc, - particle - ); - } - ); - - frame = pb.getNextFrame( frame ); + PMACC_KERNEL(acc::detail::ForEachParticle{}) + (mapper.getGridDim(), numWorkers)(std::move(functor), mapper, species.getDeviceParticlesBox()); } - } - }; - -} //namespace detail -} //namespace acc - - /** Run a unary functor for each particle of a species - * - * @warning Does NOT fill gaps automatically! If the - * operation deactivates particles or creates "gaps" in any - * other way, CallFillAllGaps needs to be called for the - * species manually afterwards! - * - * Operates on the domain CORE and BORDER - * - * @tparam T_Species type of the species - * @tparam T_Functor unary particle functor type which follows the interface of - * pmacc::functor::Interface - * - * @param species species to operate on - * @param functor operation which is applied to each particle of the species - */ - template< - typename T_Species, - typename T_Functor - > - void forEach( - T_Species && species, - T_Functor functor - ) - { - using MappingDesc = decltype(species.getCellDescription()); - AreaMapping< - CORE + BORDER, - MappingDesc - > mapper( species.getCellDescription() ); - - using SuperCellSize = typename MappingDesc::SuperCellSize; - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( acc::detail::ForEachParticle< numWorkers >{ } )( - mapper.getGridDim(), - numWorkers - )( - std::move(functor), - mapper, - species.getDeviceParticlesBox( ) - ); - } - -} // namespace algorithm -} // namespace particles + + } // namespace algorithm + } // namespace particles } // namespace pmacc diff --git a/include/pmacc/particles/boostExtension/InheritGenerators.hpp b/include/pmacc/particles/boostExtension/InheritGenerators.hpp index c2f171ce92..0fe3e627a0 100644 --- a/include/pmacc/particles/boostExtension/InheritGenerators.hpp +++ b/include/pmacc/particles/boostExtension/InheritGenerators.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -37,68 +37,65 @@ namespace pmacc { - -template -struct LinearInherit; - -template -class LinearInheritFork : public Base1, public Base2 -{ -}; - - -/** Rule if head is a class without Base template parameter - * - * Create a fork and inherit from head and combined classes from Vec - */ -template ::value> -struct TypelistLinearInherit; - -template -struct TypelistLinearInherit -{ - typedef LinearInheritFork::type > type; -}; - - - -/** Rule if head is a class which can inherit from other class - */ -template < template class Head, class Vec> -struct TypelistLinearInherit, Vec ,false> -{ - typedef Head::type > type; -}; - - -/** Rule if Vec is empty but Head is valid - * - * This is the recursive end rule - */ -template -struct TypelistLinearInherit -{ - typedef Head type; -}; - - - -/** Create a data structure which inherit linearly - * \tparam vec_ boost mpl vector with classes - * - * class A; - * LinearInherit,B> >::type return - * - * typedef A type; - */ -template -struct LinearInherit -{ - typedef typename TypelistLinearInherit < - typename bmpl::front::type, - typename bmpl::pop_front::type >::type type; -}; - -} - - + template + struct LinearInherit; + + template + class LinearInheritFork + : public Base1 + , public Base2 + { + }; + + + /** Rule if head is a class without Base template parameter + * + * Create a fork and inherit from head and combined classes from Vec + */ + template::value> + struct TypelistLinearInherit; + + template + struct TypelistLinearInherit + { + typedef LinearInheritFork::type> type; + }; + + + /** Rule if head is a class which can inherit from other class + */ + template class Head, class Vec> + struct TypelistLinearInherit, Vec, false> + { + typedef Head::type> type; + }; + + + /** Rule if Vec is empty but Head is valid + * + * This is the recursive end rule + */ + template + struct TypelistLinearInherit + { + typedef Head type; + }; + + + /** Create a data structure which inherit linearly + * \tparam vec_ boost mpl vector with classes + * + * class A; + * LinearInherit,B> >::type return + * + * typedef A type; + */ + template + struct LinearInherit + { + typedef + typename TypelistLinearInherit::type, typename bmpl::pop_front::type>:: + type type; + }; + +} // namespace pmacc diff --git a/include/pmacc/particles/boostExtension/InheritLinearly.hpp b/include/pmacc/particles/boostExtension/InheritLinearly.hpp index feeab93ab6..bb7485ddda 100644 --- a/include/pmacc/particles/boostExtension/InheritLinearly.hpp +++ b/include/pmacc/particles/boostExtension/InheritLinearly.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -31,43 +31,27 @@ namespace pmacc { -namespace detail -{ - - /** get combined type which inherit from a boost mpl sequence - * - * @tparam T_Sequence boost mpl sequence with classes - * @tparam T_Accessor unary operator to transform each element of the sequence - */ - template< - typename T_Sequence, - template< typename > class T_Accessor = meta::accessors::Identity - > - using InheritLinearly = - typename bmpl::inherit_linearly< - T_Sequence, - bmpl::inherit< - bmpl::_1, - T_Accessor< bmpl::_2 > - > - >::type; + namespace detail + { + /** get combined type which inherit from a boost mpl sequence + * + * @tparam T_Sequence boost mpl sequence with classes + * @tparam T_Accessor unary operator to transform each element of the sequence + */ + template class T_Accessor = meta::accessors::Identity> + using InheritLinearly = + typename bmpl::inherit_linearly>>::type; -} //namespace detail + } // namespace detail /** type which inherits from multiple classes * * @tparam T_Sequence boost mpl sequence with classes * @tparam T_Accessor unary operator to transform each element of the sequence */ - template< - typename T_Sequence, - template< typename > class T_Accessor = meta::accessors::Identity - > - struct InheritLinearly : detail::InheritLinearly< - T_Sequence, - T_Accessor - > + template class T_Accessor = meta::accessors::Identity> + struct InheritLinearly : detail::InheritLinearly { }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/frame_types.hpp b/include/pmacc/particles/frame_types.hpp index c0254388bd..c2d994a420 100644 --- a/include/pmacc/particles/frame_types.hpp +++ b/include/pmacc/particles/frame_types.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -23,10 +23,10 @@ #include "pmacc/types.hpp" -//define which index means that the index is invalid +// define which index means that the index is invalid #define INV_IDX 0xFFFFFFFF -//define which index means that a local cell index is invalid +// define which index means that a local cell index is invalid #define INV_LOC_IDX 0xFFFF namespace pmacc @@ -45,5 +45,10 @@ namespace pmacc /** * Describes type of a frame (core, border) */ - enum FrameType { CORE_FRAME = 0u, BORDER_FRAME =1u , BIG_FRAME=2u}; -} + enum FrameType + { + CORE_FRAME = 0u, + BORDER_FRAME = 1u, + BIG_FRAME = 2u + }; +} // namespace pmacc diff --git a/include/pmacc/particles/memory/boxes/ExchangePopDataBox.hpp b/include/pmacc/particles/memory/boxes/ExchangePopDataBox.hpp index c8dbf9e2c1..869bd02cb0 100644 --- a/include/pmacc/particles/memory/boxes/ExchangePopDataBox.hpp +++ b/include/pmacc/particles/memory/boxes/ExchangePopDataBox.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -29,36 +29,31 @@ namespace pmacc { - - -template -class ExchangePopDataBox : public DataBox > -{ -public: - typedef ExchangeMemoryIndex PopType; - - HDINLINE ExchangePopDataBox(DataBox > data, - DataBox > virtualMemory - ) : - DataBox >(data), - virtualMemory(virtualMemory) - { - - } - - HDINLINE - TileDataBox get(TYPE idx, DataSpace &superCell) + template + class ExchangePopDataBox : public DataBox> { - PopType tmp = virtualMemory[idx]; - - superCell = tmp.getSuperCell(); - return TileDataBox (this->fixedPointer, - DataSpace (tmp.getStartIndex()), - tmp.getCount()); - } - -protected: - PMACC_ALIGN8(virtualMemory, DataBox >); -}; - -} + public: + typedef ExchangeMemoryIndex PopType; + + HDINLINE ExchangePopDataBox( + DataBox> data, + DataBox> virtualMemory) + : DataBox>(data) + , virtualMemory(virtualMemory) + { + } + + HDINLINE + TileDataBox get(TYPE idx, DataSpace& superCell) + { + PopType tmp = virtualMemory[idx]; + + superCell = tmp.getSuperCell(); + return TileDataBox(this->fixedPointer, DataSpace(tmp.getStartIndex()), tmp.getCount()); + } + + protected: + PMACC_ALIGN8(virtualMemory, DataBox>); + }; + +} // namespace pmacc diff --git a/include/pmacc/particles/memory/boxes/ExchangePushDataBox.hpp b/include/pmacc/particles/memory/boxes/ExchangePushDataBox.hpp index 9160d271ab..6091d5bd7a 100644 --- a/include/pmacc/particles/memory/boxes/ExchangePushDataBox.hpp +++ b/include/pmacc/particles/memory/boxes/ExchangePushDataBox.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -31,82 +31,76 @@ namespace pmacc { - - -/** - * @tparam TYPE type for addresses - * @tparam VALUE type for actual data - * @tparam DIM dimension - */ -template -class ExchangePushDataBox : public DataBox > -{ -public: - - typedef ExchangeMemoryIndex PushType; - - HDINLINE ExchangePushDataBox(VALUE *data, TYPE *currentSizePointer, TYPE maxSize, - PushDataBox virtualMemory) : - DataBox >(PitchedBox(data, DataSpace())), - currentSizePointer(currentSizePointer), - maxSize(maxSize), - virtualMemory(virtualMemory) - { - } - - /** give access to push N elements into the memory - * - * The method is threadsave within the given alpaka hierarchy. - * - * @tparam T_Acc type of the alpaka accelerator - * @tparam T_Hierarchy alpaka::hierarchy type of the hierarchy - * - * @param acc alpaka accelerator - * @param count number of elements to increase stack with - * @param superCell offset of the supercell relative to the local domain - * @param hierarchy alpaka parallelism hierarchy levels guarantee valid - * concurrency access to the memory - * - * @return a TileDataBox of size count pointing to the new stack elements + /** + * @tparam TYPE type for addresses + * @tparam VALUE type for actual data + * @tparam DIM dimension */ - template< typename T_Acc, typename T_Hierarchy > - HDINLINE TileDataBox pushN( - T_Acc const & acc, - TYPE count, - DataSpace const &superCell, - T_Hierarchy const & hierarchy - ) + template + class ExchangePushDataBox : public DataBox> { - TYPE oldSize = atomicAdd(currentSizePointer, count, hierarchy); //get count VALUEs + public: + typedef ExchangeMemoryIndex PushType; - if (oldSize + count > maxSize) + HDINLINE ExchangePushDataBox( + VALUE* data, + TYPE* currentSizePointer, + TYPE maxSize, + PushDataBox virtualMemory) + : DataBox>(PitchedBox(data, DataSpace())) + , currentSizePointer(currentSizePointer) + , maxSize(maxSize) + , virtualMemory(virtualMemory) { - atomicExch(currentSizePointer, maxSize, hierarchy); //reset size to maxsize - if (oldSize >= maxSize) - { - return TileDataBox (nullptr, - DataSpace (0), - 0); - } - else - count = maxSize - oldSize; } - TileDataBox tmp = virtualMemory.pushN(acc, 1, hierarchy); - tmp[0].setSuperCell(superCell); - tmp[0].setCount(count); - tmp[0].setStartIndex(oldSize); - return TileDataBox (this->fixedPointer, - DataSpace (oldSize), - count); - } + /** give access to push N elements into the memory + * + * The method is threadsave within the given alpaka hierarchy. + * + * @tparam T_Acc type of the alpaka accelerator + * @tparam T_Hierarchy alpaka::hierarchy type of the hierarchy + * + * @param acc alpaka accelerator + * @param count number of elements to increase stack with + * @param superCell offset of the supercell relative to the local domain + * @param hierarchy alpaka parallelism hierarchy levels guarantee valid + * concurrency access to the memory + * + * @return a TileDataBox of size count pointing to the new stack elements + */ + template + HDINLINE TileDataBox pushN( + T_Acc const& acc, + TYPE count, + DataSpace const& superCell, + T_Hierarchy const& hierarchy) + { + TYPE oldSize = cupla::atomicAdd(acc, currentSizePointer, count, hierarchy); // get count VALUEs + if(oldSize + count > maxSize) + { + cupla::atomicExch(acc, currentSizePointer, maxSize, hierarchy); // reset size to maxsize + if(oldSize >= maxSize) + { + return TileDataBox(nullptr, DataSpace(0), 0); + } + else + count = maxSize - oldSize; + } + + TileDataBox tmp = virtualMemory.pushN(acc, 1, hierarchy); + tmp[0].setSuperCell(superCell); + tmp[0].setCount(count); + tmp[0].setStartIndex(oldSize); + return TileDataBox(this->fixedPointer, DataSpace(oldSize), count); + } -protected: - PMACC_ALIGN8(virtualMemory, PushDataBox); - PMACC_ALIGN(maxSize, TYPE); - PMACC_ALIGN(currentSizePointer, TYPE*); -}; + protected: + PMACC_ALIGN8(virtualMemory, PushDataBox); + PMACC_ALIGN(maxSize, TYPE); + PMACC_ALIGN(currentSizePointer, TYPE*); + }; -} +} // namespace pmacc diff --git a/include/pmacc/particles/memory/boxes/ParticlesBox.hpp b/include/pmacc/particles/memory/boxes/ParticlesBox.hpp index 24532ffa6f..d6630e606e 100644 --- a/include/pmacc/particles/memory/boxes/ParticlesBox.hpp +++ b/include/pmacc/particles/memory/boxes/ParticlesBox.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Alexander Grund * * This file is part of PMacc. @@ -22,8 +22,8 @@ #pragma once -#if( PMACC_CUDA_ENABLED == 1 ) -# include +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) +# include #endif #include "pmacc/particles/frame_types.hpp" #include "pmacc/dimensions/DataSpace.hpp" @@ -33,300 +33,283 @@ namespace pmacc { - -/** - * A DIM-dimensional Box holding frames with particle data. - * - * @tparam FRAME datatype for frames - * @tparam DIM dimension of data (1-3) - */ -template -class ParticlesBox : protected DataBox, DIM> > -{ -private: - PMACC_ALIGN( m_deviceHeapHandle, T_DeviceHeapHandle ); - PMACC_ALIGN( hostMemoryOffset, int64_t ); -public: - - typedef T_Frame FrameType; - typedef FramePointer FramePtr; - typedef SuperCell SuperCellType; - typedef DataBox, DIM> > BaseType; - typedef T_DeviceHeapHandle DeviceHeapHandle; - - static constexpr uint32_t Dim = DIM; - - /** default constructor + /** + * A DIM-dimensional Box holding frames with particle data. * - * \warning after this call the object is in a invalid state and must be - * initialized with an assignment of a valid ParticleBox + * @tparam FRAME datatype for frames + * @tparam DIM dimension of data (1-3) */ - HDINLINE ParticlesBox( ) : hostMemoryOffset( 0 ) - { - - } - - HDINLINE ParticlesBox( - const DataBox >& superCells, - const DeviceHeapHandle& deviceHeapHandle - ) : - BaseType( superCells ), m_deviceHeapHandle(deviceHeapHandle), hostMemoryOffset( 0 ) + template + class ParticlesBox : protected DataBox, DIM>> { + private: + PMACC_ALIGN(m_deviceHeapHandle, T_DeviceHeapHandle); + PMACC_ALIGN(hostMemoryOffset, int64_t); + + public: + typedef T_Frame FrameType; + typedef FramePointer FramePtr; + typedef SuperCell SuperCellType; + typedef DataBox, DIM>> BaseType; + typedef T_DeviceHeapHandle DeviceHeapHandle; + + static constexpr uint32_t Dim = DIM; + + /** default constructor + * + * \warning after this call the object is in a invalid state and must be + * initialized with an assignment of a valid ParticleBox + */ + HDINLINE ParticlesBox() : hostMemoryOffset(0) + { + } - } - - HDINLINE ParticlesBox( - const DataBox > &superCells, - const DeviceHeapHandle& deviceHeapHandle, - int64_t memoryOffset - ) : - BaseType( superCells ), m_deviceHeapHandle(deviceHeapHandle), hostMemoryOffset( memoryOffset ) - { + HDINLINE ParticlesBox( + const DataBox>& superCells, + const DeviceHeapHandle& deviceHeapHandle) + : BaseType(superCells) + , m_deviceHeapHandle(deviceHeapHandle) + , hostMemoryOffset(0) + { + } - } + HDINLINE ParticlesBox( + const DataBox>& superCells, + const DeviceHeapHandle& deviceHeapHandle, + int64_t memoryOffset) + : BaseType(superCells) + , m_deviceHeapHandle(deviceHeapHandle) + , hostMemoryOffset(memoryOffset) + { + } - /** - * Returns an empty frame from data heap. - * - * @return an empty frame - */ - DINLINE FramePtr getEmptyFrame( ) - { - FrameType* tmp = nullptr; - const int maxTries = 13; //magic number is not performance critical - for ( int numTries = 0; numTries < maxTries; ++numTries ) + /** + * Returns an empty frame from data heap. + * + * @return an empty frame + */ + template + DINLINE FramePtr getEmptyFrame(const T_Acc& acc) { -#if( PMACC_CUDA_ENABLED == 1 ) - tmp = (FrameType*) m_deviceHeapHandle.malloc( sizeof (FrameType) ); + FrameType* tmp = nullptr; + const int maxTries = 13; // magic number is not performance critical + for(int numTries = 0; numTries < maxTries; ++numTries) + { +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) + tmp = (FrameType*) m_deviceHeapHandle.malloc(acc, sizeof(FrameType)); #else - tmp = new FrameType; + tmp = new FrameType; #endif - if ( tmp != nullptr ) - { - /* disable all particles since we can not assume that newly allocated memory contains zeros */ - for ( int i = 0; i < (int) math::CT::volume::type::value; ++i ) - ( *tmp )[i][multiMask_] = 0; -#if( PMACC_CUDA_ENABLED == 1 ) - /* takes care that changed values are visible to all threads inside this block*/ - __threadfence_block( ); + if(tmp != nullptr) + { + /* disable all particles since we can not assume that newly allocated memory contains zeros */ + for(int i = 0; i < (int) math::CT::volume::type::value; ++i) + (*tmp)[i][multiMask_] = 0; +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) + /* takes care that changed values are visible to all threads inside this block*/ + __threadfence_block(); #endif - break; - } - else - { - printf( "%s: mallocMC out of memory (try %i of %i)\n", + break; + } + else + { +#ifndef BOOST_COMP_HIP + printf( + "%s: mallocMC out of memory (try %i of %i)\n", (numTries + 1) == maxTries ? "ERROR" : "WARNING", numTries + 1, - maxTries ); + maxTries); +#endif + } } - } - return FramePtr( tmp ); - } + return FramePtr(tmp); + } - /** - * Removes frame from heap data heap. - * - * @param frame frame to remove - */ - DINLINE void removeFrame( FramePtr& frame ) - { -#if( PMACC_CUDA_ENABLED == 1 ) - m_deviceHeapHandle.free( (void*) frame.ptr ); + /** + * Removes frame from heap data heap. + * + * @param frame frame to remove + */ + template + DINLINE void removeFrame(const T_Acc& acc, FramePtr& frame) + { +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) + m_deviceHeapHandle.free(acc, (void*) frame.ptr); #else - delete(frame.ptr); + delete(frame.ptr); #endif - frame.ptr = nullptr; - } + frame.ptr = nullptr; + } - HDINLINE - FramePtr mapPtr( const FramePtr& devPtr ) const - { -#ifndef __CUDA_ARCH__ - int64_t useOffset = hostMemoryOffset * static_cast (devPtr.ptr != 0); - return FramePtr( reinterpret_cast ( - reinterpret_cast (devPtr.ptr) - useOffset - ) - ); + HDINLINE + FramePtr mapPtr(const FramePtr& devPtr) const + { +#if(CUPLA_DEVICE_COMPILE == 1) + return devPtr; #else - return devPtr; + int64_t useOffset = hostMemoryOffset * static_cast(devPtr.ptr != 0); + return FramePtr(reinterpret_cast(reinterpret_cast(devPtr.ptr) - useOffset)); #endif - } - - /** - * Returns the next frame in the linked list. - * - * @param frame the active frame - * @return the next frame in the list - */ - HDINLINE FramePtr getNextFrame( const FramePtr& frame ) const - { - return mapPtr( frame->nextFrame.ptr ); - } + } - /** - * Returns the previous frame in the linked list. - * - * @param frame the active frame - * @return the previous frame in the list - */ - HDINLINE FramePtr getPreviousFrame( const FramePtr& frame ) const - { - return mapPtr( frame->previousFrame.ptr ); - } + /** + * Returns the next frame in the linked list. + * + * @param frame the active frame + * @return the next frame in the list + */ + HDINLINE FramePtr getNextFrame(const FramePtr& frame) const + { + return mapPtr(frame->nextFrame.ptr); + } - /** - * Returns the last frame of a supercell. - * - * @param idx position of supercell - * @return the last frame of the linked list from supercell - */ - HDINLINE FramePtr getLastFrame( const DataSpace &idx ) const - { - return mapPtr( getSuperCell( idx ).LastFramePtr( ) ); - } + /** + * Returns the previous frame in the linked list. + * + * @param frame the active frame + * @return the previous frame in the list + */ + HDINLINE FramePtr getPreviousFrame(const FramePtr& frame) const + { + return mapPtr(frame->previousFrame.ptr); + } - /** - * Returns the first frame of a supercell. - * - * @param idx position of supercell - * @return the first frame of the linked list from supercell - */ - HDINLINE FramePtr getFirstFrame( const DataSpace &idx ) const - { - return mapPtr( getSuperCell( idx ).FirstFramePtr( ) ); - } + /** + * Returns the last frame of a supercell. + * + * @param idx position of supercell + * @return the last frame of the linked list from supercell + */ + HDINLINE FramePtr getLastFrame(const DataSpace& idx) const + { + return mapPtr(getSuperCell(idx).LastFramePtr()); + } - /** - * Sets frame as the first frame of a supercell. - * - * @param frame frame to set as first frame - * @param idx position of supercell - */ - template< - typename T_Acc - > - DINLINE void setAsFirstFrame( - T_Acc const & acc, - FramePtr & frame, - DataSpace< DIM > const &idx - ) - { - FrameType** firstFrameNativPtr = &(getSuperCell( idx ).firstFramePtr); + /** + * Returns the first frame of a supercell. + * + * @param idx position of supercell + * @return the first frame of the linked list from supercell + */ + HDINLINE FramePtr getFirstFrame(const DataSpace& idx) const + { + return mapPtr(getSuperCell(idx).FirstFramePtr()); + } - frame->previousFrame = FramePtr( ); - frame->nextFrame = FramePtr( *firstFrameNativPtr ); -#if( PMACC_CUDA_ENABLED == 1 ) - /* - takes care that `next[index]` is visible to all threads on the gpu - * - this is needed because later on in this method we change `previous` - * of an other frame, this must be done in order! + /** + * Sets frame as the first frame of a supercell. + * + * @param frame frame to set as first frame + * @param idx position of supercell */ - __threadfence( ); + template + DINLINE void setAsFirstFrame(T_Acc const& acc, FramePtr& frame, DataSpace const& idx) + { + FrameType** firstFrameNativPtr = &(getSuperCell(idx).firstFramePtr); + + frame->previousFrame = FramePtr(); + frame->nextFrame = FramePtr(*firstFrameNativPtr); +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) + /* - takes care that `next[index]` is visible to all threads on the gpu + * - this is needed because later on in this method we change `previous` + * of an other frame, this must be done in order! + */ + __threadfence(); #endif - FramePtr oldFirstFramePtr( - (FrameType*) atomicExch( + FramePtr oldFirstFramePtr((FrameType*) cupla::atomicExch( + acc, (unsigned long long int*) firstFrameNativPtr, (unsigned long long int) frame.ptr, - ::alpaka::hierarchy::Grids{} - ) - ); + ::alpaka::hierarchy::Grids{})); - frame->nextFrame = oldFirstFramePtr; - if ( oldFirstFramePtr.isValid( ) ) - { - oldFirstFramePtr->previousFrame = frame; - } - else - { - //we add the first frame in supercell - getSuperCell( idx ).lastFramePtr = frame.ptr; + frame->nextFrame = oldFirstFramePtr; + if(oldFirstFramePtr.isValid()) + { + oldFirstFramePtr->previousFrame = frame; + } + else + { + // we add the first frame in supercell + getSuperCell(idx).lastFramePtr = frame.ptr; + } } - } - - /** - * Sets frame as the last frame of a supercell. - * - * @param frame frame to set as last frame - * @param idx position of supercell - */ - template< - typename T_Acc - > - DINLINE void setAsLastFrame( - T_Acc const & acc, - FramePointer< - FrameType - >& frame, - DataSpace< DIM > const &idx - ) - { - FrameType** lastFrameNativPtr = &(getSuperCell( idx ).lastFramePtr); - frame->nextFrame = FramePtr( ); - frame->previousFrame = FramePtr( *lastFrameNativPtr ); -#if( PMACC_CUDA_ENABLED == 1 ) - /* - takes care that `next[index]` is visible to all threads on the gpu - * - this is needed because later on in this method we change `next` - * of an other frame, this must be done in order! + /** + * Sets frame as the last frame of a supercell. + * + * @param frame frame to set as last frame + * @param idx position of supercell */ - __threadfence( ); + template + DINLINE void setAsLastFrame(T_Acc const& acc, FramePointer& frame, DataSpace const& idx) + { + FrameType** lastFrameNativPtr = &(getSuperCell(idx).lastFramePtr); + + frame->nextFrame = FramePtr(); + frame->previousFrame = FramePtr(*lastFrameNativPtr); +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) + /* - takes care that `next[index]` is visible to all threads on the gpu + * - this is needed because later on in this method we change `next` + * of an other frame, this must be done in order! + */ + __threadfence(); #endif - FramePtr oldLastFramePtr( - (FrameType*) atomicExch( + FramePtr oldLastFramePtr((FrameType*) cupla::atomicExch( + acc, (unsigned long long int*) lastFrameNativPtr, (unsigned long long int) frame.ptr, - ::alpaka::hierarchy::Grids{} - ) - ); + ::alpaka::hierarchy::Grids{})); - frame->previousFrame = oldLastFramePtr; - if ( oldLastFramePtr.isValid( ) ) - { - oldLastFramePtr->nextFrame = frame; - } - else - { - //we add the first frame in supercell - getSuperCell( idx ).firstFramePtr = frame.ptr; + frame->previousFrame = oldLastFramePtr; + if(oldLastFramePtr.isValid()) + { + oldLastFramePtr->nextFrame = frame; + } + else + { + // we add the first frame in supercell + getSuperCell(idx).firstFramePtr = frame.ptr; + } } - } - /** - * Removes the last frame of a supercell. - * This call is not threadsave, only one thread from a supercell may call this function. - * @param idx position of supercell - * @return true if more frames in list, else false - */ - DINLINE bool removeLastFrame( const DataSpace &idx ) - { - //!\todo this is not thread save - FrameType** lastFrameNativPtr = &(getSuperCell( idx ).lastFramePtr); - - FramePtr last( *lastFrameNativPtr ); - if ( last.isValid( ) ) + /** + * Removes the last frame of a supercell. + * This call is not threadsave, only one thread from a supercell may call this function. + * @param idx position of supercell + * @return true if more frames in list, else false + */ + template + DINLINE bool removeLastFrame(const T_Acc& acc, const DataSpace& idx) { - FramePtr prev( last->previousFrame ); + //!\todo this is not thread save + FrameType** lastFrameNativPtr = &(getSuperCell(idx).lastFramePtr); - if ( prev.isValid( ) ) + FramePtr last(*lastFrameNativPtr); + if(last.isValid()) { - prev->nextFrame = FramePtr( ); //set to invalid frame - *lastFrameNativPtr = prev.ptr; //set new last frame - removeFrame( last ); - return true; + FramePtr prev(last->previousFrame); + + if(prev.isValid()) + { + prev->nextFrame = FramePtr(); // set to invalid frame + *lastFrameNativPtr = prev.ptr; // set new last frame + removeFrame(acc, last); + return true; + } + // remove last frame of supercell + getSuperCell(idx).firstFramePtr = nullptr; + getSuperCell(idx).lastFramePtr = nullptr; + + removeFrame(acc, last); } - //remove last frame of supercell - getSuperCell( idx ).firstFramePtr = nullptr; - getSuperCell( idx ).lastFramePtr = nullptr; - - removeFrame( last ); + return false; } - return false; - } - HDINLINE SuperCellType& getSuperCell( DataSpace idx ) const - { - return BaseType::operator()(idx); - } -}; + HDINLINE SuperCellType& getSuperCell(DataSpace idx) const + { + return BaseType::operator()(idx); + } + }; -} +} // namespace pmacc diff --git a/include/pmacc/particles/memory/boxes/PushDataBox.hpp b/include/pmacc/particles/memory/boxes/PushDataBox.hpp index c5041727a4..e503268610 100644 --- a/include/pmacc/particles/memory/boxes/PushDataBox.hpp +++ b/include/pmacc/particles/memory/boxes/PushDataBox.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Benjamin Worpitz * * This file is part of PMacc. @@ -21,7 +21,6 @@ */ - #pragma once @@ -32,8 +31,6 @@ namespace pmacc { - - /** * Implements a Box to which elements can only be added, using atomic operations. * @@ -41,10 +38,9 @@ namespace pmacc * @tparam VALUE datatype for values addresses point to */ template - class PushDataBox : public DataBox > + class PushDataBox : public DataBox> { public: - /** * Constructor. * @@ -52,11 +48,11 @@ namespace pmacc * @param offset relative offset to pointer start address * @param currentSize size of the buffer data points to */ - HDINLINE PushDataBox(VALUE *data, TYPE *currentSize, DataSpace offset=DataSpace(0)) : - DataBox >(PitchedBox ( data, offset)), - currentSize(currentSize),maxSize(0) /*\todo implement max size*/ + HDINLINE PushDataBox(VALUE* data, TYPE* currentSize, DataSpace offset = DataSpace(0)) + : DataBox>(PitchedBox(data, offset)) + , currentSize(currentSize) + , maxSize(0) /*\todo implement max size*/ { - } /** Increases the size of the stack with count elements in an atomic operation @@ -73,11 +69,11 @@ namespace pmacc * * @return a TileDataBox of size count pointing to the new stack elements */ - template< typename T_Acc, typename T_Hierarchy > - HDINLINE TileDataBox pushN(T_Acc const & acc, TYPE count, T_Hierarchy const & hierarchy) + template + HDINLINE TileDataBox pushN(T_Acc const& acc, TYPE count, T_Hierarchy const& hierarchy) { - TYPE old_addr = atomicAdd(currentSize, count, hierarchy); - return TileDataBox (this->fixedPointer, DataSpace(old_addr)); + TYPE old_addr = cupla::atomicAdd(acc, currentSize, count, hierarchy); + return TileDataBox(this->fixedPointer, DataSpace(old_addr)); } /** Adds a value to the stack in an atomic operation. @@ -94,15 +90,15 @@ namespace pmacc * * @return a TileDataBox of size count pointing to the new stack elements */ - template< typename T_Acc, typename T_Hierarchy > - HDINLINE void push(T_Acc const & acc, VALUE val, T_Hierarchy const & hierarchy) + template + HDINLINE void push(T_Acc const& acc, VALUE val, T_Hierarchy const& hierarchy) { - TYPE old_addr = atomicAdd(currentSize, 1, hierarchy); + TYPE old_addr = cupla::atomicAdd(acc, currentSize, 1, hierarchy); (*this)[old_addr] = val; } protected: - PMACC_ALIGN(maxSize,TYPE); - PMACC_ALIGN(currentSize,TYPE*); + PMACC_ALIGN(maxSize, TYPE); + PMACC_ALIGN(currentSize, TYPE*); }; -} +} // namespace pmacc diff --git a/include/pmacc/particles/memory/boxes/TileDataBox.hpp b/include/pmacc/particles/memory/boxes/TileDataBox.hpp index 049ba03b01..bff337df03 100644 --- a/include/pmacc/particles/memory/boxes/TileDataBox.hpp +++ b/include/pmacc/particles/memory/boxes/TileDataBox.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -28,81 +28,74 @@ namespace pmacc { - -template -class VectorDataBox : public DataBox > -{ -public: - typedef DataBox > BaseType; - typedef TYPE type; - - template struct result; - - template - struct result < F(T)> - { - typedef TYPE& type; - }; - - template - struct result < const F(T)> + template + class VectorDataBox : public DataBox> { - typedef const TYPE& type; + public: + typedef DataBox> BaseType; + typedef TYPE type; + + template + struct result; + + template + struct result + { + typedef TYPE& type; + }; + + template + struct result + { + typedef const TYPE& type; + }; + + HDINLINE VectorDataBox(TYPE* pointer, const DataSpace& offset = DataSpace(0)) + : BaseType(PitchedBox(pointer, offset)) + { + } + + HDINLINE VectorDataBox() + { + } }; - HDINLINE VectorDataBox(TYPE* pointer, - const DataSpace &offset = DataSpace(0)) : - BaseType(PitchedBox(pointer, offset)) - { - } - - HDINLINE VectorDataBox() - { - } - - -}; - -/** - * Specifies a one-dimensional DataBox for more convenient usage. - * - * @tparam TYPE type of data represented by the DataBox - */ -template -class TileDataBox : public VectorDataBox -{ -public: - typedef VectorDataBox BaseType; - - HDINLINE TileDataBox(TYPE* pointer, - const DataSpace &offset = DataSpace(0), - uint32_t size = 0) : - BaseType(pointer, offset), size(size) - { - } - /** - * Returns size of the Box. + * Specifies a one-dimensional DataBox for more convenient usage. * - * @return size of this TileDataBox + * @tparam TYPE type of data represented by the DataBox */ - HDINLINE int getSize() - { - return size; - } - - /*object is not initialized valid, copy a valid instance to this object to get a valid instance*/ - HDINLINE TileDataBox() + template + class TileDataBox : public VectorDataBox { - } - - -protected: - - PMACC_ALIGN(size, size_t); - -}; - + public: + typedef VectorDataBox BaseType; + + HDINLINE TileDataBox(TYPE* pointer, const DataSpace& offset = DataSpace(0), uint32_t size = 0) + : BaseType(pointer, offset) + , size(size) + { + } + + /** + * Returns size of the Box. + * + * @return size of this TileDataBox + */ + HDINLINE int getSize() + { + return size; + } + + /*object is not initialized valid, copy a valid instance to this object to get a valid instance*/ + HDINLINE TileDataBox() + { + } + + + protected: + PMACC_ALIGN(size, size_t); + }; -} +} // namespace pmacc diff --git a/include/pmacc/particles/memory/buffers/MallocMCBuffer.hpp b/include/pmacc/particles/memory/buffers/MallocMCBuffer.hpp index 896c69e13a..3af7745493 100644 --- a/include/pmacc/particles/memory/buffers/MallocMCBuffer.hpp +++ b/include/pmacc/particles/memory/buffers/MallocMCBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PMacc. * @@ -21,24 +21,25 @@ #pragma once - #include "pmacc/dataManagement/ISimulationData.hpp" -#include - #include -#include +#include + +#if(PMACC_CUDA_ENABLED == 1 || ALPAKA_ACC_GPU_HIP_ENABLED == 1) + +# include +# include namespace pmacc { - - template< typename T_DeviceHeap > + template class MallocMCBuffer : public ISimulationData { public: using DeviceHeap = T_DeviceHeap; - MallocMCBuffer( const std::shared_ptr& deviceHeap ); + MallocMCBuffer(const std::shared_ptr& deviceHeap); virtual ~MallocMCBuffer(); @@ -60,7 +61,6 @@ namespace pmacc void synchronize() override; private: - char* hostPtr; int64_t hostBufferOffset; mallocMC::HeapInfo deviceHeapInfo; @@ -69,4 +69,39 @@ namespace pmacc } // namespace pmacc -#include "pmacc/particles/memory/buffers/MallocMCBuffer.tpp" +# include "pmacc/particles/memory/buffers/MallocMCBuffer.tpp" + +#else + +namespace pmacc +{ + template + class MallocMCBuffer : public ISimulationData + { + public: + MallocMCBuffer(const std::shared_ptr&); + + virtual ~MallocMCBuffer() = default; + + SimulationDataId getUniqueId() override + { + return getName(); + } + + static std::string getName() + { + return std::string("MallocMCBuffer"); + } + + int64_t getOffset() + { + return 0u; + } + + void synchronize() override + { + } + }; + +} // namespace pmacc +#endif diff --git a/include/pmacc/particles/memory/buffers/MallocMCBuffer.tpp b/include/pmacc/particles/memory/buffers/MallocMCBuffer.tpp index 7c6c916f45..7003ac35c8 100644 --- a/include/pmacc/particles/memory/buffers/MallocMCBuffer.tpp +++ b/include/pmacc/particles/memory/buffers/MallocMCBuffer.tpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Alexander Grund +/* Copyright 2015-2021 Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -21,58 +21,71 @@ #pragma once -#include "pmacc/particles/memory/buffers/MallocMCBuffer.hpp" -#include "pmacc/types.hpp" -#include "pmacc/eventSystem/EventSystem.hpp" +#if(PMACC_CUDA_ENABLED == 1 || ALPAKA_ACC_GPU_HIP_ENABLED == 1) -#include +# include "pmacc/particles/memory/buffers/MallocMCBuffer.hpp" +# include "pmacc/types.hpp" +# include "pmacc/eventSystem/EventSystem.hpp" +# include -namespace pmacc -{ -template< typename T_DeviceHeap > -MallocMCBuffer< T_DeviceHeap >::MallocMCBuffer( const std::shared_ptr& deviceHeap ) : - hostPtr( nullptr ), - /* currently mallocMC has only one heap */ - deviceHeapInfo( deviceHeap->getHeapLocations( )[ 0 ] ), - hostBufferOffset( 0 ) -{ -} -template< typename T_DeviceHeap > -MallocMCBuffer< T_DeviceHeap >::~MallocMCBuffer( ) +namespace pmacc { - if ( hostPtr != nullptr ) - cudaHostUnregister(hostPtr); - - __deleteArray(hostPtr); + template + MallocMCBuffer::MallocMCBuffer(const std::shared_ptr& deviceHeap) + : hostPtr(nullptr) + , + /* currently mallocMC has only one heap */ + deviceHeapInfo(deviceHeap->getHeapLocations()[0]) + , hostBufferOffset(0) + { + } -} + template + MallocMCBuffer::~MallocMCBuffer() + { + if(hostPtr != nullptr) + { +# if(PMACC_CUDA_ENABLED == 1) + cudaHostUnregister(hostPtr); + __deleteArray(hostPtr); +# else + CUDA_CHECK_NO_EXCEPT((cuplaError_t) hipFree(hostPtr)); +# endif + } + } -template< typename T_DeviceHeap > -void MallocMCBuffer< T_DeviceHeap >::synchronize( ) -{ - /** \todo: we had no abstraction to create a host buffer and a pseudo - * device buffer (out of the mallocMC ptr) and copy both with our event - * system. - * WORKAROUND: use native cuda calls :-( - */ - if ( hostPtr == nullptr ) + template + void MallocMCBuffer::synchronize() { - /* use `new` and than `cudaHostRegister` is faster than `cudaMallocHost` - * but with the some result (create page-locked memory) + /** \todo: we had no abstraction to create a host buffer and a pseudo + * device buffer (out of the mallocMC ptr) and copy both with our event + * system. + * WORKAROUND: use native CUDA/HIP calls :-( */ - hostPtr = new char[deviceHeapInfo.size]; - CUDA_CHECK((cuplaError_t)cudaHostRegister(hostPtr, deviceHeapInfo.size, cudaHostRegisterDefault)); - + if(hostPtr == nullptr) + { +# if(PMACC_CUDA_ENABLED == 1) + /* use `new` and than `cudaHostRegister` is faster than `cudaMallocHost` + * but with the some result (create page-locked memory) + */ + hostPtr = new char[deviceHeapInfo.size]; + CUDA_CHECK((cuplaError_t) cudaHostRegister(hostPtr, deviceHeapInfo.size, cudaHostRegisterDefault)); +# else + // we do not use hipHostRegister because this would require a strict alignment + // https://github.com/alpaka-group/alpaka/pull/896 + CUDA_CHECK((cuplaError_t) hipHostMalloc((void**) &hostPtr, deviceHeapInfo.size, hipHostMallocDefault)); +# endif - this->hostBufferOffset = static_cast(reinterpret_cast(deviceHeapInfo.p) - hostPtr); + this->hostBufferOffset = static_cast(reinterpret_cast(deviceHeapInfo.p) - hostPtr); + } + /* add event system hints */ + __startOperation(ITask::TASK_DEVICE); + __startOperation(ITask::TASK_HOST); + CUDA_CHECK(cuplaMemcpy(hostPtr, deviceHeapInfo.p, deviceHeapInfo.size, cuplaMemcpyDeviceToHost)); } - /* add event system hints */ - __startOperation(ITask::TASK_CUDA); - __startOperation(ITask::TASK_HOST); - CUDA_CHECK(cudaMemcpy(hostPtr, deviceHeapInfo.p, deviceHeapInfo.size, cudaMemcpyDeviceToHost)); -} +} // namespace pmacc -} //namespace pmacc +#endif diff --git a/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp b/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp index f5721b26a2..ef702e9259 100644 --- a/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp +++ b/include/pmacc/particles/memory/buffers/ParticlesBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -52,331 +52,293 @@ namespace pmacc { - -/** - * Describes DIM-dimensional buffer for particles data on the host. - * - * @tParam T_ParticleDescription Object which describe a frame @see ParticleDescription.hpp - * @tparam SuperCellSize_ TVec which descripe size of a superce - * @tparam DIM dimension of the buffer (1-3) - */ -template -class ParticlesBuffer -{ -public: - - /** create static array - */ - template< uint32_t T_size > - struct OperatorCreatePairStaticArray - { - - template - struct apply - { - typedef bmpl::pair< - X, - StaticArray< - typename traits::Resolve::type::type, - bmpl::integral_c - > - > type; - }; - }; - - /** type of the border frame management object - * - * contains: - * - superCell position of the border frames inside a given range - * - start position inside the exchange stack for frames - * - number of frames corresponding to the superCell position - */ - typedef ExchangeMemoryIndex< - vint_t, - DIM - 1 - > BorderFrameIndex; - - typedef SuperCellSize_ SuperCellSize; - - typedef typename MakeSeq< - typename T_ParticleDescription::ValueTypeSeq, - localCellIdx, - multiMask - >::type ParticleAttributeList; - - typedef typename MakeSeq< - typename T_ParticleDescription::ValueTypeSeq, - localCellIdx - >::type ParticleAttributeListBorder; - - typedef - typename ReplaceValueTypeSeq< - T_ParticleDescription, - ParticleAttributeList - >::type FrameDescriptionWithManagementAttributes; - - /** double linked list pointer */ - typedef - typename MakeSeq< - PreviousFramePtr<>, - NextFramePtr<> - >::type LinkedListPointer; - - /* extent particle description with pointer to a frame*/ - typedef typename ReplaceFrameExtensionSeq< - FrameDescriptionWithManagementAttributes, - LinkedListPointer - >::type FrameDescription; - - /** frame definition - * - * a group of particles is stored as frame - */ - typedef Frame< - OperatorCreatePairStaticArray< - pmacc::math::CT::volume< SuperCellSize >::type::value - >, - FrameDescription - > FrameType; - - typedef typename ReplaceValueTypeSeq< - T_ParticleDescription, - ParticleAttributeListBorder - >::type FrameDescriptionBorder; - - /** frame which is used to communicate particles to neighbors - * - * - each frame contains only one particle - * - local administration attributes of a particle are removed - */ - typedef Frame< - OperatorCreatePairStaticArray< 1u >, - FrameDescriptionBorder - > FrameTypeBorder; - - typedef SuperCell SuperCellType; - - typedef T_DeviceHeap DeviceHeap; - /* Type of the particle box which particle buffer create */ - typedef ParticlesBox< FrameType, typename DeviceHeap::AllocatorHandle, DIM> ParticlesBoxType; - -private: - - /* this enum is used only for internal calculations */ - enum - { - SizeOfOneBorderElement = (sizeof (FrameTypeBorder) + sizeof (BorderFrameIndex)) - }; - -public: - /** - * Constructor. + * Describes DIM-dimensional buffer for particles data on the host. * - * @param deviceHeap device heap memory allocator - * @param layout number of cell per dimension - * @param superCellSize size of one super cell - * @param gpuMemory how many memory on device is used for this instance (in byte) + * @tParam T_ParticleDescription Object which describe a frame @see ParticleDescription.hpp + * @tparam SuperCellSize_ TVec which descripe size of a superce + * @tparam DIM dimension of the buffer (1-3) */ - ParticlesBuffer(const std::shared_ptr& deviceHeap, DataSpace layout, DataSpace superCellSize) : - m_deviceHeap(deviceHeap), superCellSize(superCellSize), gridSize(layout), framesExchanges(nullptr) + template + class ParticlesBuffer { + public: + /** create static array + */ + template + struct OperatorCreatePairStaticArray + { + template + struct apply + { + typedef bmpl:: + pair::type::type, bmpl::integral_c>> + type; + }; + }; - exchangeMemoryIndexer = new GridBuffer (DataSpace (0)); - framesExchanges = new GridBuffer< FrameType, DIM1, FrameTypeBorder > (DataSpace (0)); - - DataSpace superCellsCount = gridSize / superCellSize; - - superCells = new GridBuffer (superCellsCount); - - reset(); - } - - /** - * Destructor. - */ - virtual ~ParticlesBuffer() - { - __delete(superCells); - __delete(framesExchanges); - __delete(exchangeMemoryIndexer); - } - - /** - * Resets all internal buffers. - */ - void reset() - { - - superCells->getDeviceBuffer().setValue(SuperCellType ()); - superCells->getHostBuffer().setValue(SuperCellType ()); - } - - /** - * Adds an exchange buffer to frames. - * - * @param receive Mask describing receive directions - * @param usedMemory memory to be used for this exchange - */ - void addExchange(Mask receive, size_t usedMemory, uint32_t communicationTag) - { - - size_t numFrameTypeBorders = usedMemory / SizeOfOneBorderElement; - - framesExchanges->addExchangeBuffer(receive, DataSpace (numFrameTypeBorders), communicationTag, true, false); - - exchangeMemoryIndexer->addExchangeBuffer(receive, DataSpace (numFrameTypeBorders), communicationTag | (1u << (20 - 5)), true, false); - } - - /** - * Returns a ParticlesBox for device frame data. - * - * @return device frames ParticlesBox - */ - ParticlesBoxType getDeviceParticleBox() - { - - return ParticlesBoxType( - superCells->getDeviceBuffer().getDataBox(), - m_deviceHeap->getAllocatorHandle() - ); - } - - /** - * Returns a ParticlesBox for host frame data. - * - * @return host frames ParticlesBox - */ - ParticlesBoxType getHostParticleBox(int64_t memoryOffset) - { - - return ParticlesBoxType ( - superCells->getHostBuffer().getDataBox(), - m_deviceHeap->getAllocatorHandle(), - memoryOffset - ); - } - - /** - * Returns if the buffer has a send exchange in ex direction. - * - * @param ex direction to query - * @return true if buffer has send exchange for ex - */ - bool hasSendExchange(uint32_t ex) - { - - return framesExchanges->hasSendExchange(ex); - } - - /** - * Returns if the buffer has a receive exchange in ex direction. - * - * @param ex direction to query - * @return true if buffer has receive exchange for ex - */ - bool hasReceiveExchange(uint32_t ex) - { - - return framesExchanges->hasReceiveExchange(ex); - } - - StackExchangeBuffer getSendExchangeStack(uint32_t ex) - { - - return StackExchangeBuffer - (framesExchanges->getSendExchange(ex), exchangeMemoryIndexer->getSendExchange(ex)); - } - - StackExchangeBuffer getReceiveExchangeStack(uint32_t ex) - { - - return StackExchangeBuffer - (framesExchanges->getReceiveExchange(ex), exchangeMemoryIndexer->getReceiveExchange(ex)); - } - - /** - * Starts sync data from own device buffer to neighbor device buffer. - * - * GridBuffer - * - */ - EventTask asyncCommunication(EventTask serialEvent) - { - - return framesExchanges->asyncCommunication(serialEvent) + - exchangeMemoryIndexer->asyncCommunication(serialEvent); - } - - EventTask asyncSendParticles(EventTask serialEvent, uint32_t ex) - { - /* store each gpu-free event separately to avoid race conditions */ - EventTask framesExchangesGPUEvent; - EventTask exchangeMemoryIndexerGPUEvent; - EventTask returnEvent = framesExchanges->asyncSend(serialEvent, ex) + - exchangeMemoryIndexer->asyncSend(serialEvent, ex); + /** type of the border frame management object + * + * contains: + * - superCell position of the border frames inside a given range + * - start position inside the exchange stack for frames + * - number of frames corresponding to the superCell position + */ + typedef ExchangeMemoryIndex BorderFrameIndex; + + typedef SuperCellSize_ SuperCellSize; + + typedef typename MakeSeq::type + ParticleAttributeList; + + typedef typename MakeSeq::type + ParticleAttributeListBorder; + + typedef typename ReplaceValueTypeSeq::type + FrameDescriptionWithManagementAttributes; + + /** double linked list pointer */ + typedef typename MakeSeq, NextFramePtr<>>::type LinkedListPointer; + + /* extent particle description with pointer to a frame*/ + typedef typename ReplaceFrameExtensionSeq::type + FrameDescription; + + /** frame definition + * + * a group of particles is stored as frame + */ + typedef Frame< + OperatorCreatePairStaticArray::type::value>, + FrameDescription> + FrameType; + + typedef typename ReplaceValueTypeSeq::type + FrameDescriptionBorder; + + /** frame which is used to communicate particles to neighbors + * + * - each frame contains only one particle + * - local administration attributes of a particle are removed + */ + typedef Frame, FrameDescriptionBorder> FrameTypeBorder; + + typedef SuperCell SuperCellType; + + typedef T_DeviceHeap DeviceHeap; + /* Type of the particle box which particle buffer create */ + typedef ParticlesBox ParticlesBoxType; + + private: + /* this enum is used only for internal calculations */ + enum + { + SizeOfOneBorderElement = (sizeof(FrameTypeBorder) + sizeof(BorderFrameIndex)) + }; - return returnEvent; - } + public: + /** + * Constructor. + * + * @param deviceHeap device heap memory allocator + * @param layout number of cell per dimension + * @param superCellSize size of one super cell + * @param gpuMemory how many memory on device is used for this instance (in byte) + */ + ParticlesBuffer( + const std::shared_ptr& deviceHeap, + DataSpace layout, + DataSpace superCellSize) + : m_deviceHeap(deviceHeap) + , superCellSize(superCellSize) + , gridSize(layout) + , framesExchanges(nullptr) + { + exchangeMemoryIndexer = new GridBuffer(DataSpace(0)); + framesExchanges = new GridBuffer(DataSpace(0)); - EventTask asyncReceiveParticles(EventTask serialEvent, uint32_t ex) - { + DataSpace superCellsCount = gridSize / superCellSize; - return framesExchanges->asyncReceive(serialEvent, ex) + - exchangeMemoryIndexer->asyncReceive(serialEvent, ex); - } + superCells = new GridBuffer(superCellsCount); - /** - * Returns number of supercells in each dimension. - * - * @return number of supercells - */ - DataSpace getSuperCellsCount() - { + reset(); + } - PMACC_ASSERT(superCells != nullptr); - return superCells->getGridLayout().getDataSpace(); - } + /** + * Destructor. + */ + virtual ~ParticlesBuffer() + { + __delete(superCells); + __delete(framesExchanges); + __delete(exchangeMemoryIndexer); + } + + /** + * Resets all internal buffers. + */ + void reset() + { + superCells->getDeviceBuffer().setValue(SuperCellType()); + superCells->getHostBuffer().setValue(SuperCellType()); + } + + /** + * Adds an exchange buffer to frames. + * + * @param receive Mask describing receive directions + * @param usedMemory memory to be used for this exchange + */ + void addExchange(Mask receive, size_t usedMemory, uint32_t communicationTag) + { + size_t numFrameTypeBorders = usedMemory / SizeOfOneBorderElement; + + framesExchanges + ->addExchangeBuffer(receive, DataSpace(numFrameTypeBorders), communicationTag, true, false); + + exchangeMemoryIndexer->addExchangeBuffer( + receive, + DataSpace(numFrameTypeBorders), + communicationTag | (1u << (20 - 5)), + true, + false); + } + + /** + * Returns a ParticlesBox for device frame data. + * + * @return device frames ParticlesBox + */ + ParticlesBoxType getDeviceParticleBox() + { + return ParticlesBoxType(superCells->getDeviceBuffer().getDataBox(), m_deviceHeap->getAllocatorHandle()); + } + + /** + * Returns a ParticlesBox for host frame data. + * + * @return host frames ParticlesBox + */ + ParticlesBoxType getHostParticleBox(int64_t memoryOffset) + { + return ParticlesBoxType( + superCells->getHostBuffer().getDataBox(), + m_deviceHeap->getAllocatorHandle(), + memoryOffset); + } + + /** + * Returns if the buffer has a send exchange in ex direction. + * + * @param ex direction to query + * @return true if buffer has send exchange for ex + */ + bool hasSendExchange(uint32_t ex) + { + return framesExchanges->hasSendExchange(ex); + } + + /** + * Returns if the buffer has a receive exchange in ex direction. + * + * @param ex direction to query + * @return true if buffer has receive exchange for ex + */ + bool hasReceiveExchange(uint32_t ex) + { + return framesExchanges->hasReceiveExchange(ex); + } - /** - * Returns number of supercells in each dimension. - * - * @return number of supercells - */ - GridLayout getSuperCellsLayout() - { + StackExchangeBuffer getSendExchangeStack(uint32_t ex) + { + return StackExchangeBuffer( + framesExchanges->getSendExchange(ex), + exchangeMemoryIndexer->getSendExchange(ex)); + } - PMACC_ASSERT(superCells != nullptr); - return superCells->getGridLayout(); - } + StackExchangeBuffer getReceiveExchangeStack(uint32_t ex) + { + return StackExchangeBuffer( + framesExchanges->getReceiveExchange(ex), + exchangeMemoryIndexer->getReceiveExchange(ex)); + } + + /** + * Starts sync data from own device buffer to neighbor device buffer. + * + * GridBuffer + * + */ + EventTask asyncCommunication(EventTask serialEvent) + { + return framesExchanges->asyncCommunication(serialEvent) + + exchangeMemoryIndexer->asyncCommunication(serialEvent); + } - /** - * Returns size of supercells in each dimension. - * - * @return size of supercells - */ - DataSpace getSuperCellSize() - { + EventTask asyncSendParticles(EventTask serialEvent, uint32_t ex) + { + /* store each gpu-free event separately to avoid race conditions */ + EventTask framesExchangesGPUEvent; + EventTask exchangeMemoryIndexerGPUEvent; + EventTask returnEvent + = framesExchanges->asyncSend(serialEvent, ex) + exchangeMemoryIndexer->asyncSend(serialEvent, ex); - return superCellSize; - } + return returnEvent; + } - void deviceToHost() - { - superCells->deviceToHost(); - } + EventTask asyncReceiveParticles(EventTask serialEvent, uint32_t ex) + { + return framesExchanges->asyncReceive(serialEvent, ex) + + exchangeMemoryIndexer->asyncReceive(serialEvent, ex); + } + + /** + * Returns number of supercells in each dimension. + * + * @return number of supercells + */ + DataSpace getSuperCellsCount() + { + PMACC_ASSERT(superCells != nullptr); + return superCells->getGridLayout().getDataSpace(); + } + + /** + * Returns number of supercells in each dimension. + * + * @return number of supercells + */ + GridLayout getSuperCellsLayout() + { + PMACC_ASSERT(superCells != nullptr); + return superCells->getGridLayout(); + } + + /** + * Returns size of supercells in each dimension. + * + * @return size of supercells + */ + DataSpace getSuperCellSize() + { + return superCellSize; + } + void deviceToHost() + { + superCells->deviceToHost(); + } -private: - GridBuffer *exchangeMemoryIndexer; - GridBuffer *superCells; - /*GridBuffer for hold borderFrames, we need a own buffer to create first exchanges without core memory*/ - GridBuffer< FrameType, DIM1, FrameTypeBorder> *framesExchanges; + private: + GridBuffer* exchangeMemoryIndexer; - DataSpace superCellSize; - DataSpace gridSize; - std::shared_ptr m_deviceHeap; + GridBuffer* superCells; + /*GridBuffer for hold borderFrames, we need a own buffer to create first exchanges without core memory*/ + GridBuffer* framesExchanges; -}; -} + DataSpace superCellSize; + DataSpace gridSize; + std::shared_ptr m_deviceHeap; + }; +} // namespace pmacc diff --git a/include/pmacc/particles/memory/buffers/StackExchangeBuffer.hpp b/include/pmacc/particles/memory/buffers/StackExchangeBuffer.hpp index f1f2db8903..30e7639743 100644 --- a/include/pmacc/particles/memory/buffers/StackExchangeBuffer.hpp +++ b/include/pmacc/particles/memory/buffers/StackExchangeBuffer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Felix Schmitt, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -28,9 +28,6 @@ namespace pmacc { - - - /** * Can be used for creating several DataBox types from an Exchange. * @@ -40,7 +37,6 @@ namespace pmacc class StackExchangeBuffer { public: - /** * Create a stack from any ExchangeBuffer. * @@ -48,10 +44,10 @@ namespace pmacc * * @param stack Exchange */ - StackExchangeBuffer(Exchange &stack, Exchange &stackIndexer) : - stack(stack), stackIndexer(stackIndexer) + StackExchangeBuffer(Exchange& stack, Exchange& stackIndexer) + : stack(stack) + , stackIndexer(stackIndexer) { - } /** @@ -61,13 +57,13 @@ namespace pmacc */ ExchangePushDataBox getHostExchangePushDataBox() { - return ExchangePushDataBox ( - stack.getHostBuffer().getBasePointer(), - stack.getHostBuffer().getCurrentSizePointer(), - stack.getHostBuffer().getDataSpace().productOfComponents(), - PushDataBox ( - stackIndexer.getHostBuffer().getBasePointer(), - stackIndexer.getHostBuffer().getCurrentSizePointer())); + return ExchangePushDataBox( + stack.getHostBuffer().getBasePointer(), + stack.getHostBuffer().getCurrentSizePointer(), + stack.getHostBuffer().getDataSpace().productOfComponents(), + PushDataBox( + stackIndexer.getHostBuffer().getBasePointer(), + stackIndexer.getHostBuffer().getCurrentSizePointer())); } /** @@ -77,10 +73,9 @@ namespace pmacc */ ExchangePopDataBox getHostExchangePopDataBox() { - return ExchangePopDataBox ( - stack.getHostBuffer().getDataBox(), - stackIndexer.getHostBuffer().getDataBox() - ); + return ExchangePopDataBox( + stack.getHostBuffer().getDataBox(), + stackIndexer.getHostBuffer().getDataBox()); } /** @@ -92,13 +87,13 @@ namespace pmacc { PMACC_ASSERT(stack.getDeviceBuffer().hasCurrentSizeOnDevice() == true); PMACC_ASSERT(stackIndexer.getDeviceBuffer().hasCurrentSizeOnDevice() == true); - return ExchangePushDataBox ( - stack.getDeviceBuffer().getBasePointer(), - (vint_t*) stack.getDeviceBuffer().getCurrentSizeOnDevicePointer(), - stack.getDeviceBuffer().getDataSpace().productOfComponents(), - PushDataBox ( - stackIndexer.getDeviceBuffer().getBasePointer(), - (vint_t*) stackIndexer.getDeviceBuffer().getCurrentSizeOnDevicePointer())); + return ExchangePushDataBox( + stack.getDeviceBuffer().getBasePointer(), + (vint_t*) stack.getDeviceBuffer().getCurrentSizeOnDevicePointer(), + stack.getDeviceBuffer().getDataSpace().productOfComponents(), + PushDataBox( + stackIndexer.getDeviceBuffer().getBasePointer(), + (vint_t*) stackIndexer.getDeviceBuffer().getCurrentSizeOnDevicePointer())); } /** @@ -108,20 +103,24 @@ namespace pmacc */ ExchangePopDataBox getDeviceExchangePopDataBox() { - return ExchangePopDataBox ( - stack.getDeviceBuffer().getDataBox(), - stackIndexer.getDeviceBuffer().getDataBox() - ); + return ExchangePopDataBox( + stack.getDeviceBuffer().getDataBox(), + stackIndexer.getDeviceBuffer().getDataBox()); } void setCurrentSize(const size_t size) { // do host and device setCurrentSize parallel EventTask split = __getTransactionEvent(); - __startTransaction(split); - stackIndexer.getHostBuffer().setCurrentSize(size); - stack.getHostBuffer().setCurrentSize(size); - EventTask e1 = __endTransaction(); + EventTask e1; + + if(!Environment<>::get().isMpiDirectEnabled()) + { + __startTransaction(split); + stackIndexer.getHostBuffer().setCurrentSize(size); + stack.getHostBuffer().setCurrentSize(size); + e1 = __endTransaction(); + } __startTransaction(split); stackIndexer.getDeviceBuffer().setCurrentSize(size); @@ -135,7 +134,13 @@ namespace pmacc size_t getHostCurrentSize() { - return stackIndexer.getHostBuffer().getCurrentSize(); + size_t result = 0u; + if(Environment<>::get().isMpiDirectEnabled()) + result = stackIndexer.getDeviceBuffer().getCurrentSize(); + else + result = stackIndexer.getHostBuffer().getCurrentSize(); + + return result; } size_t getDeviceCurrentSize() @@ -150,17 +155,25 @@ namespace pmacc size_t getHostParticlesCurrentSize() { + if(Environment<>::get().isMpiDirectEnabled()) + return stack.getDeviceBuffer().getCurrentSize(); + return stack.getHostBuffer().getCurrentSize(); } size_t getMaxParticlesCount() { - return stack.getHostBuffer().getDataSpace().productOfComponents(); + size_t result = 0u; + if(Environment<>::get().isMpiDirectEnabled()) + result = stack.getDeviceBuffer().getDataSpace().productOfComponents(); + else + result = stack.getHostBuffer().getDataSpace().productOfComponents(); + + return result; } private: - - Exchange &getExchangeBuffer() + Exchange& getExchangeBuffer() { return stack; } @@ -168,4 +181,4 @@ namespace pmacc Exchange& stack; Exchange& stackIndexer; }; -} +} // namespace pmacc diff --git a/include/pmacc/particles/memory/dataTypes/ExchangeMemoryIndex.hpp b/include/pmacc/particles/memory/dataTypes/ExchangeMemoryIndex.hpp index 259c1c7800..bb64465e1b 100644 --- a/include/pmacc/particles/memory/dataTypes/ExchangeMemoryIndex.hpp +++ b/include/pmacc/particles/memory/dataTypes/ExchangeMemoryIndex.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -26,49 +26,47 @@ namespace pmacc { - -template -class ExchangeMemoryIndex -{ -public: - - HDINLINE ExchangeMemoryIndex() : startIdx(0), count(0) + template + class ExchangeMemoryIndex { - } + public: + HDINLINE ExchangeMemoryIndex() : startIdx(0), count(0) + { + } - HDINLINE void setStartIndex(TYPE startIdx) - { - this->startIdx = startIdx; - } + HDINLINE void setStartIndex(TYPE startIdx) + { + this->startIdx = startIdx; + } - HDINLINE void setCount(TYPE count) - { - this->count = count; - } + HDINLINE void setCount(TYPE count) + { + this->count = count; + } - HDINLINE void setSuperCell(DataSpace superCell) - { - this->superCell = superCell; - } + HDINLINE void setSuperCell(DataSpace superCell) + { + this->superCell = superCell; + } - HDINLINE TYPE getStartIndex() - { - return startIdx; - } + HDINLINE TYPE getStartIndex() + { + return startIdx; + } - HDINLINE TYPE getCount() - { - return count; - } + HDINLINE TYPE getCount() + { + return count; + } - HDINLINE DataSpace getSuperCell() - { - return superCell; - } -private: + HDINLINE DataSpace getSuperCell() + { + return superCell; + } - PMACC_ALIGN(superCell, DataSpace); - PMACC_ALIGN(startIdx, TYPE); - PMACC_ALIGN(count, TYPE); -}; -} + private: + PMACC_ALIGN(superCell, DataSpace); + PMACC_ALIGN(startIdx, TYPE); + PMACC_ALIGN(count, TYPE); + }; +} // namespace pmacc diff --git a/include/pmacc/particles/memory/dataTypes/FramePointer.hpp b/include/pmacc/particles/memory/dataTypes/FramePointer.hpp index 3c5a88ed19..dc9fc9cfeb 100644 --- a/include/pmacc/particles/memory/dataTypes/FramePointer.hpp +++ b/include/pmacc/particles/memory/dataTypes/FramePointer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PMacc. * @@ -26,68 +26,67 @@ namespace pmacc { - -/** Wrapper for a raw pointer a PMacc frame - * - * @tparam T_Type type of the pointed object - */ -template< typename T_Type > -class FramePointer : public Pointer< T_Type > -{ -private: - using Base = Pointer< T_Type >; -public: - using type = typename Base::type; - using PtrType = typename Base::PtrType; - - /** default constructor + /** Wrapper for a raw pointer a PMacc frame * - * the default pointer points to invalid memory + * @tparam T_Type type of the pointed object */ - HDINLINE FramePointer( ) : Base( ) + template + class FramePointer : public Pointer { - } + private: + using Base = Pointer; - HDINLINE FramePointer( PtrType const ptrIn ) : Base( ptrIn ) - { - } + public: + using type = typename Base::type; + using PtrType = typename Base::PtrType; - HDINLINE FramePointer( const Base& other ) : Base( other ) - { - } + /** default constructor + * + * the default pointer points to invalid memory + */ + HDINLINE FramePointer() : Base() + { + } - HDINLINE FramePointer( const FramePointer& other ) : Base( other ) - { - } + HDINLINE FramePointer(PtrType const ptrIn) : Base(ptrIn) + { + } - HDINLINE FramePointer& operator=(const FramePointer& other) - { - Base::operator=(other); - return *this; - } + HDINLINE FramePointer(const Base& other) : Base(other) + { + } - /** access the Nth particle - * - * it is not checked whether `FramePointer` points to valid memory - * - * @param idx particle index in the frame - */ - HDINLINE typename type::ParticleType operator[](const uint32_t idx) - { - return (*Base::ptr)[idx]; - } + HDINLINE FramePointer(const FramePointer& other) : Base(other) + { + } - /** access the Nth particle - * - * it is not checked whether `FramePointer` points to valid memory - * - * @param idx particle index in the frame - */ - HDINLINE const typename type::ParticleType operator[](const uint32_t idx) const - { - return (*Base::ptr)[idx]; - } + HDINLINE FramePointer& operator=(const FramePointer& other) + { + Base::operator=(other); + return *this; + } + + /** access the Nth particle + * + * it is not checked whether `FramePointer` points to valid memory + * + * @param idx particle index in the frame + */ + HDINLINE typename type::ParticleType operator[](const uint32_t idx) + { + return (*Base::ptr)[idx]; + } -}; + /** access the Nth particle + * + * it is not checked whether `FramePointer` points to valid memory + * + * @param idx particle index in the frame + */ + HDINLINE const typename type::ParticleType operator[](const uint32_t idx) const + { + return (*Base::ptr)[idx]; + } + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/memory/dataTypes/ListPointer.hpp b/include/pmacc/particles/memory/dataTypes/ListPointer.hpp index b2d3d29325..d24172052f 100644 --- a/include/pmacc/particles/memory/dataTypes/ListPointer.hpp +++ b/include/pmacc/particles/memory/dataTypes/ListPointer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PMacc. * @@ -27,17 +27,16 @@ namespace pmacc { + template + struct PreviousFramePtr + { + PMACC_ALIGN(previousFrame, Pointer); + }; -template -struct PreviousFramePtr -{ - PMACC_ALIGN(previousFrame, Pointer); -}; - -template -struct NextFramePtr -{ - PMACC_ALIGN(nextFrame, Pointer); -}; + template + struct NextFramePtr + { + PMACC_ALIGN(nextFrame, Pointer); + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/memory/dataTypes/Particle.hpp b/include/pmacc/particles/memory/dataTypes/Particle.hpp index b19354cd28..26333a84a0 100644 --- a/include/pmacc/particles/memory/dataTypes/Particle.hpp +++ b/include/pmacc/particles/memory/dataTypes/Particle.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -49,295 +49,246 @@ namespace pmacc { + namespace pmath = pmacc::math; -namespace pmath = pmacc::math; - -/** A single particle of a @see Frame - * - * A instance of this Particle is a representation ("pointer") to the memory - * where the frame is stored. - * - * @tparam T_FrameType type of the parent frame - * @tparam T_ValueTypeSeq sequence with all attribute identifiers - * (can be a subset of T_FrameType::ValueTypeSeq) - */ -template -struct Particle : public InheritLinearly -{ - typedef T_FrameType FrameType; - typedef T_ValueTypeSeq ValueTypeSeq; - typedef typename FrameType::Name Name; - typedef typename FrameType::SuperCellSize SuperCellSize; - typedef Particle ThisType; - typedef typename FrameType::MethodsList MethodsList; - - /** index of particle inside the Frame*/ - PMACC_ALIGN(idx, uint32_t); - - /** pointer to parent frame where this particle is from + /** A single particle of a @see Frame * - * ATTENTION: The pointer must be the last member to avoid local memory usage - * https://github.com/ComputationalRadiationPhysics/picongpu/pull/762 - */ - PMACC_ALIGN(frame, FrameType*); - - /** set particle handle to invalid + * A instance of this Particle is a representation ("pointer") to the memory + * where the frame is stored. * - * This method sets the particle handle to invalid. It is possible to test with - * the method isHandleValid if the particle is valid. - * If the particle is set to invalid it is not allowed to call any method other - * than isHandleValid or setHandleInvalid, but it does not mean the particle is - * deactivated outside of this instance. + * @tparam T_FrameType type of the parent frame + * @tparam T_ValueTypeSeq sequence with all attribute identifiers + * (can be a subset of T_FrameType::ValueTypeSeq) */ - HDINLINE void setHandleInvalid() + template + struct Particle : public InheritLinearly { - frame = nullptr; - } - - /** check if particle handle is valid - * - * A valid particle handle means that the memory behind the handle can be used - * savely. A valid handle does not mean that the particle's multiMask is valid (>=1). - * - * @return true if the particle handle is valid, else false - */ - HDINLINE bool isHandleValid() const - { - return frame != nullptr; - } - - /** create particle - * - * @param frame reference to parent frame - * @param idx index of particle inside the frame - */ - HDINLINE Particle(FrameType& frame, uint32_t idx) : frame(&frame), idx(idx) - { - } - - template - HDINLINE Particle(const T_OtherParticle& other) : frame(other.frame), idx(other.idx) - { - } - - /** access attribute with a identifier - * - * @param T_Key instance of identifier type - * (can be an alias, value_identifier or any other class) - * @return result of operator[] of the Frame - */ - template - HDINLINE - typename boost::result_of< - typename boost::remove_reference< - typename boost::result_of < FrameType(T_Key)>::type - >::type(uint32_t) - >::type - operator[](const T_Key key) - { - PMACC_CASSERT_MSG_TYPE( - key_not_available, - T_Key, - traits::HasIdentifier< Particle, T_Key >::type::value - ); - - return frame->getIdentifier(key)[idx]; - } - - /** const version of method operator(const T_Key) */ - template - HDINLINE - typename boost::result_of< - typename boost::remove_reference< - typename boost::result_of ::type - >::type(uint32_t) - >::type - operator[](const T_Key key) const - { - PMACC_CASSERT_MSG_TYPE( - key_not_available, - T_Key, - traits::HasIdentifier< Particle, T_Key >::type::value - ); - - return frame->getIdentifier(key)[idx]; - } - - HDINLINE - ThisType& operator=(const ThisType& other) = default; - -private: - /* we disallow to assign this class*/ - template - HDINLINE - ThisType& operator=(const T_OtherParticle& other); - -}; - -namespace traits -{ - -template< - typename T_Key, - typename T_FrameType, - typename T_ValueTypeSeq -> -struct HasIdentifier< - pmacc::Particle< T_FrameType, T_ValueTypeSeq >, - T_Key -> -{ -private: - typedef pmacc::Particle ParticleType; - typedef typename ParticleType::ValueTypeSeq ValueTypeSeq; -public: - /* If T_Key can not be found in the T_ValueTypeSeq of this Particle class, - * SolvedAliasName will be void_. - * Look-up is also valid if T_Key is an alias. - */ - typedef typename GetKeyFromAlias< - ValueTypeSeq, - T_Key - >::type SolvedAliasName; - - typedef bmpl::contains type; -}; - -template< - typename T_Key, - typename T_FrameType, - typename T_ValueTypeSeq -> -struct HasFlag< - pmacc::Particle, - T_Key ->: public HasFlag -{}; - -template< - typename T_Key, - typename T_FrameType, - typename T_ValueTypeSeq -> -struct GetFlagType< - pmacc::Particle, - T_Key ->: public GetFlagType -{}; - -} //namespace traits - -namespace particles -{ -namespace operations -{ -namespace detail -{ - -/** Assign common attributes of two particle species - * - * Assigns all attributes in ValueTypeSeq1 that also exist in T_ValueTypeSeq2 - * from T_FrameType1 to T_FrameType2. - */ -template< -typename T_FrameType1, typename T_ValueTypeSeq1, -typename T_FrameType2, typename T_ValueTypeSeq2 -> -struct Assign -< -pmacc::Particle, -pmacc::Particle -> -{ - typedef pmacc::Particle Dest; - typedef pmacc::Particle Src; - - typedef typename Dest::ValueTypeSeq DestTypeSeq; - typedef typename Src::ValueTypeSeq SrcTypeSeq; - - /* create attribute list with a subset of common attributes in two sequences - * bmpl::contains has lower complexity than traits::HasIdentifier - * and was used for this reason - */ - typedef typename bmpl::copy_if< - DestTypeSeq, - bmpl::contains, - bmpl::back_inserter< bmpl::vector0<> > - >::type CommonTypeSeq; - - /* create sequences with disjunct attributes from `DestTypeSeq` */ - typedef typename bmpl::copy_if< - DestTypeSeq, - bmpl::not_ >, - bmpl::back_inserter< bmpl::vector0<> > - >::type UniqueInDestTypeSeq; - - /** Assign particle attributes - * - * The common subset of the attribute lists from both particles is - * used to set the attributes in dest with the corresponding ones from src. - * The remaining attributes that only exist in dest (UniqueInDestTypeSeq) - * are simply set to their default values. - * - * @param dest destination particle that shall be initialized/assigned with values from src - * @param src source particle were attributes are loaded from - */ - HDINLINE - void operator()(Dest& dest, const Src& src) - { - using pmacc::meta::ForEach; - /* assign attributes from src to dest*/ - ForEach > copy; - copy(dest, src); - - /* set all attributes which are not in src to their default value*/ - ForEach > setAttributeToDefault; - setAttributeToDefault(dest); - - }; -}; - -template< -typename T_MPLSeqWithObjectsToRemove, -typename T_FrameType, typename T_ValueTypeSeq -> -struct Deselect -< -T_MPLSeqWithObjectsToRemove, -pmacc::Particle -> -{ - typedef T_FrameType FrameType; - typedef T_ValueTypeSeq ValueTypeSeq; - typedef pmacc::Particle ParticleType; - typedef T_MPLSeqWithObjectsToRemove MPLSeqWithObjectsToRemove; - - /* translate aliases to full specialized identifier*/ - typedef typename ResolveAliases::type ResolvedSeqWithObjectsToRemove; - /* remove types from original particle attribute list*/ - typedef typename RemoveFromSeq::type NewValueTypeSeq; - /* new particle type*/ - typedef pmacc::Particle ResultType; - - template struct result; - - template - struct result< F(T_Obj)> - { - typedef ResultType type; + typedef T_FrameType FrameType; + typedef T_ValueTypeSeq ValueTypeSeq; + typedef typename FrameType::Name Name; + typedef typename FrameType::SuperCellSize SuperCellSize; + typedef Particle ThisType; + typedef typename FrameType::MethodsList MethodsList; + + /** index of particle inside the Frame*/ + PMACC_ALIGN(idx, uint32_t); + + /** pointer to parent frame where this particle is from + * + * ATTENTION: The pointer must be the last member to avoid local memory usage + * https://github.com/ComputationalRadiationPhysics/picongpu/pull/762 + */ + PMACC_ALIGN(frame, FrameType*); + + /** set particle handle to invalid + * + * This method sets the particle handle to invalid. It is possible to test with + * the method isHandleValid if the particle is valid. + * If the particle is set to invalid it is not allowed to call any method other + * than isHandleValid or setHandleInvalid, but it does not mean the particle is + * deactivated outside of this instance. + */ + HDINLINE void setHandleInvalid() + { + frame = nullptr; + } + + /** check if particle handle is valid + * + * A valid particle handle means that the memory behind the handle can be used + * savely. A valid handle does not mean that the particle's multiMask is valid (>=1). + * + * @return true if the particle handle is valid, else false + */ + HDINLINE bool isHandleValid() const + { + return frame != nullptr; + } + + /** create particle + * + * @param frame reference to parent frame + * @param idx index of particle inside the frame + */ + HDINLINE Particle(FrameType& frame, uint32_t idx) : frame(&frame), idx(idx) + { + } + + template + HDINLINE Particle(const T_OtherParticle& other) : frame(other.frame) + , idx(other.idx) + { + } + + /** access attribute with a identifier + * + * @param T_Key instance of identifier type + * (can be an alias, value_identifier or any other class) + * @return result of operator[] of the Frame + */ + template + HDINLINE typename boost::result_of< + typename boost::remove_reference::type>::type(uint32_t)>::type + operator[](const T_Key key) + { + PMACC_CASSERT_MSG_TYPE(key_not_available, T_Key, traits::HasIdentifier::type::value); + + return frame->getIdentifier(key)[idx]; + } + + /** const version of method operator(const T_Key) */ + template + HDINLINE typename boost::result_of::type>::type(uint32_t)>::type + operator[](const T_Key key) const + { + PMACC_CASSERT_MSG_TYPE(key_not_available, T_Key, traits::HasIdentifier::type::value); + + return frame->getIdentifier(key)[idx]; + } + + HDINLINE + ThisType& operator=(const ThisType& other) = default; + + private: + /* we disallow to assign this class*/ + template + HDINLINE ThisType& operator=(const T_OtherParticle& other); }; - HDINLINE - ResultType operator()(const ParticleType& particle) + namespace traits { - return ResultType(particle); - }; -}; - -} //namespace detail -} //namespace operations -} //namespace particles - -} //namespace pmacc + template + struct HasIdentifier, T_Key> + { + private: + typedef pmacc::Particle ParticleType; + typedef typename ParticleType::ValueTypeSeq ValueTypeSeq; + + public: + /* If T_Key can not be found in the T_ValueTypeSeq of this Particle class, + * SolvedAliasName will be void_. + * Look-up is also valid if T_Key is an alias. + */ + typedef typename GetKeyFromAlias::type SolvedAliasName; + + typedef bmpl::contains type; + }; + + template + struct HasFlag, T_Key> : public HasFlag + { + }; + + template + struct GetFlagType, T_Key> + : public GetFlagType + { + }; + + } // namespace traits + + namespace particles + { + namespace operations + { + namespace detail + { + /** Assign common attributes of two particle species + * + * Assigns all attributes in ValueTypeSeq1 that also exist in T_ValueTypeSeq2 + * from T_FrameType1 to T_FrameType2. + */ + template< + typename T_FrameType1, + typename T_ValueTypeSeq1, + typename T_FrameType2, + typename T_ValueTypeSeq2> + struct Assign< + pmacc::Particle, + pmacc::Particle> + { + typedef pmacc::Particle Dest; + typedef pmacc::Particle Src; + + typedef typename Dest::ValueTypeSeq DestTypeSeq; + typedef typename Src::ValueTypeSeq SrcTypeSeq; + + /* create attribute list with a subset of common attributes in two sequences + * bmpl::contains has lower complexity than traits::HasIdentifier + * and was used for this reason + */ + typedef typename bmpl::copy_if< + DestTypeSeq, + bmpl::contains, + bmpl::back_inserter>>::type CommonTypeSeq; + + /* create sequences with disjunct attributes from `DestTypeSeq` */ + typedef typename bmpl::copy_if< + DestTypeSeq, + bmpl::not_>, + bmpl::back_inserter>>::type UniqueInDestTypeSeq; + + /** Assign particle attributes + * + * The common subset of the attribute lists from both particles is + * used to set the attributes in dest with the corresponding ones from src. + * The remaining attributes that only exist in dest (UniqueInDestTypeSeq) + * are simply set to their default values. + * + * @param dest destination particle that shall be initialized/assigned with values from src + * @param src source particle were attributes are loaded from + */ + HDINLINE + void operator()(Dest& dest, const Src& src) + { + using pmacc::meta::ForEach; + /* assign attributes from src to dest*/ + ForEach> copy; + copy(dest, src); + + /* set all attributes which are not in src to their default value*/ + ForEach> setAttributeToDefault; + setAttributeToDefault(dest); + }; + }; + + template + struct Deselect> + { + typedef T_FrameType FrameType; + typedef T_ValueTypeSeq ValueTypeSeq; + typedef pmacc::Particle ParticleType; + typedef T_MPLSeqWithObjectsToRemove MPLSeqWithObjectsToRemove; + + /* translate aliases to full specialized identifier*/ + typedef typename ResolveAliases< + MPLSeqWithObjectsToRemove, + ValueTypeSeq, + errorHandlerPolicies::ReturnValue>::type ResolvedSeqWithObjectsToRemove; + /* remove types from original particle attribute list*/ + typedef typename RemoveFromSeq::type NewValueTypeSeq; + /* new particle type*/ + typedef pmacc::Particle ResultType; + + template + struct result; + + template + struct result + { + typedef ResultType type; + }; + + HDINLINE + ResultType operator()(const ParticleType& particle) + { + return ResultType(particle); + }; + }; + + } // namespace detail + } // namespace operations + } // namespace particles + +} // namespace pmacc diff --git a/include/pmacc/particles/memory/dataTypes/Pointer.hpp b/include/pmacc/particles/memory/dataTypes/Pointer.hpp index 27cfbf2a98..c4eb169c6d 100644 --- a/include/pmacc/particles/memory/dataTypes/Pointer.hpp +++ b/include/pmacc/particles/memory/dataTypes/Pointer.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -25,84 +25,81 @@ namespace pmacc { - -/** Wrapper for a raw pointer - * - * @tparam T_Type type of the pointed object - */ -template< typename T_Type > -class Pointer -{ -public: - - using type = T_Type; - using PtrType = type*; - using ConstPtrType = const type*; - - HDINLINE Pointer( ): - ptr{ nullptr } - { - } - - HDINLINE Pointer( PtrType const ptrIn ) : ptr( ptrIn ) - { - } - - HDINLINE Pointer( const Pointer& other ) : ptr( other.ptr ) - { - } - - HDINLINE Pointer& operator=(const Pointer& other) - { - ptr = other.ptr; - return *this; - } - - /** dereference the pointer*/ - HDINLINE type& operator*() - { - return *ptr; - } - - /** dereference the pointer*/ - HDINLINE const type& operator*() const - { - return *ptr; - } - - /** access member*/ - HDINLINE PtrType operator->() - { - return ptr; - } - - /** access member*/ - HDINLINE ConstPtrType operator->() const - { - return ptr; - } - - /** compare if two pointers point to the same memory address*/ - HDINLINE bool operator==(const Pointer& other) const - { - return ptr == other.ptr; - } - - /** check if the memory address of two pointers are different*/ - HDINLINE bool operator!=(const Pointer& other) const - { - return ptr != other.ptr; - } - - /** check if the memory pointed to has a valid address - * @return false if memory adress is nullptr else true + /** Wrapper for a raw pointer + * + * @tparam T_Type type of the pointed object */ - HDINLINE bool isValid( ) const + template + class Pointer { - return ptr != nullptr; - } - - PMACC_ALIGN( ptr, PtrType ); -}; - -} //namespace pmacc + public: + using type = T_Type; + using PtrType = type*; + using ConstPtrType = const type*; + + HDINLINE Pointer() : ptr{nullptr} + { + } + + HDINLINE Pointer(PtrType const ptrIn) : ptr(ptrIn) + { + } + + HDINLINE Pointer(const Pointer& other) : ptr(other.ptr) + { + } + + HDINLINE Pointer& operator=(const Pointer& other) + { + ptr = other.ptr; + return *this; + } + + /** dereference the pointer*/ + HDINLINE type& operator*() + { + return *ptr; + } + + /** dereference the pointer*/ + HDINLINE const type& operator*() const + { + return *ptr; + } + + /** access member*/ + HDINLINE PtrType operator->() + { + return ptr; + } + + /** access member*/ + HDINLINE ConstPtrType operator->() const + { + return ptr; + } + + /** compare if two pointers point to the same memory address*/ + HDINLINE bool operator==(const Pointer& other) const + { + return ptr == other.ptr; + } + + /** check if the memory address of two pointers are different*/ + HDINLINE bool operator!=(const Pointer& other) const + { + return ptr != other.ptr; + } + + /** check if the memory pointed to has a valid address + * @return false if memory adress is nullptr else true + */ + HDINLINE bool isValid() const + { + return ptr != nullptr; + } + + PMACC_ALIGN(ptr, PtrType); + }; + +} // namespace pmacc diff --git a/include/pmacc/particles/memory/dataTypes/StaticArray.hpp b/include/pmacc/particles/memory/dataTypes/StaticArray.hpp index 27e46794d0..9b68e7c6f2 100644 --- a/include/pmacc/particles/memory/dataTypes/StaticArray.hpp +++ b/include/pmacc/particles/memory/dataTypes/StaticArray.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -26,44 +26,45 @@ namespace pmacc { + namespace pmath = pmacc::math; -namespace pmath = pmacc::math; + template + class StaticArray + { + public: + static constexpr uint32_t size = T_size::value; + typedef T_Type Type; -template -class StaticArray -{ -public: - static constexpr uint32_t size = T_size::value; - typedef T_Type Type; -private: - Type data[size]; -public: + private: + Type data[size]; - template struct result; + public: + template + struct result; - template - struct result - { - typedef Type& type; - }; + template + struct result + { + typedef Type& type; + }; - template - struct result - { - typedef const Type& type; - }; + template + struct result + { + typedef const Type& type; + }; - HDINLINE - Type& operator[](const int idx) - { - return data[idx]; - } + HDINLINE + Type& operator[](const int idx) + { + return data[idx]; + } - HDINLINE - const Type& operator[](const int idx) const - { - return data[idx]; - } -}; + HDINLINE + const Type& operator[](const int idx) const + { + return data[idx]; + } + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/memory/dataTypes/SuperCell.hpp b/include/pmacc/particles/memory/dataTypes/SuperCell.hpp index cf0a9aabe4..ac79628368 100644 --- a/include/pmacc/particles/memory/dataTypes/SuperCell.hpp +++ b/include/pmacc/particles/memory/dataTypes/SuperCell.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,36 +27,30 @@ namespace pmacc { - - template< class T_FrameType > + template class SuperCell { public: - - HDINLINE SuperCell() : - firstFramePtr( nullptr ), - lastFramePtr( nullptr ), - numParticles( 0 ), - mustShiftVal( false ) + HDINLINE SuperCell() : firstFramePtr(nullptr), lastFramePtr(nullptr), numParticles(0), mustShiftVal(false) { } - HDINLINE T_FrameType * FirstFramePtr() + HDINLINE T_FrameType* FirstFramePtr() { return firstFramePtr; } - HDINLINE T_FrameType * LastFramePtr() + HDINLINE T_FrameType* LastFramePtr() { return lastFramePtr; } - HDINLINE T_FrameType const * FirstFramePtr() const + HDINLINE T_FrameType const* FirstFramePtr() const { return firstFramePtr; } - HDINLINE T_FrameType const * LastFramePtr() const + HDINLINE T_FrameType const* LastFramePtr() const { return lastFramePtr; } @@ -66,17 +60,15 @@ namespace pmacc return mustShiftVal; } - HDINLINE void setMustShift( bool const value ) + HDINLINE void setMustShift(bool const value) { mustShiftVal = value; } HDINLINE uint32_t getSizeLastFrame() const { - constexpr uint32_t frameSize = math::CT::volume< - typename T_FrameType::SuperCellSize - >::type::value; - return numParticles ? ( ( numParticles - 1u ) % frameSize + 1u ) : 0u; + constexpr uint32_t frameSize = math::CT::volume::type::value; + return numParticles ? ((numParticles - 1u) % frameSize + 1u) : 0u; } HDINLINE uint32_t getNumParticles() const @@ -84,29 +76,18 @@ namespace pmacc return numParticles; } - HDINLINE void setNumParticles( uint32_t const size ) + HDINLINE void setNumParticles(uint32_t const size) { numParticles = size; } public: - PMACC_ALIGN( - firstFramePtr, - T_FrameType* - ); - PMACC_ALIGN( - lastFramePtr, - T_FrameType* - ); + PMACC_ALIGN(firstFramePtr, T_FrameType*); + PMACC_ALIGN(lastFramePtr, T_FrameType*); + private: - PMACC_ALIGN( - numParticles, - uint32_t - ); - PMACC_ALIGN( - mustShiftVal, - bool - ); + PMACC_ALIGN(numParticles, uint32_t); + PMACC_ALIGN(mustShiftVal, bool); }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/memory/frames/Frame.hpp b/include/pmacc/particles/memory/frames/Frame.hpp index e45959a475..ca59b7b43c 100644 --- a/include/pmacc/particles/memory/frames/Frame.hpp +++ b/include/pmacc/particles/memory/frames/Frame.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Alexander Grund +/* Copyright 2013-2021 Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -50,171 +50,150 @@ namespace pmacc { + namespace pmath = pmacc::math; -namespace pmath = pmacc::math; - -/** Frame is a storage for arbitrary number >0 of Particles with attributes - * - * @tparam T_CreatePairOperator unary template operator to create a boost pair - * from single type ( pair ) - * @see MapTupel - * @tparam T_ValueTypeSeq sequence with value_identifier - * @tparam T_MethodsList sequence of classes with particle methods - * (e.g. calculate mass, gamma, ...) - * @tparam T_Flags sequence with identifiers to add flags on a frame - * (e.g. useSolverXY, calcRadiation, ...) - */ -template -struct Frame; - -template -struct Frame : -public InheritLinearly, -protected pmath::MapTuple::type, pmath::AlignedData>, -public InheritLinearly< - typename OperateOnSeq< - typename T_ParticleDescription::FrameExtensionList, - bmpl::apply1 > - >::type -> -{ - typedef T_ParticleDescription ParticleDescription; - typedef typename ParticleDescription::Name Name; - typedef typename ParticleDescription::SuperCellSize SuperCellSize; - typedef typename ParticleDescription::ValueTypeSeq ValueTypeSeq; - typedef typename ParticleDescription::MethodsList MethodsList; - typedef typename ParticleDescription::FlagsList FlagList; - typedef typename ParticleDescription::FrameExtensionList FrameExtensionList; - typedef Frame ThisType; - /* definition of the MapTupel where we inherit from*/ - typedef pmath::MapTuple::type, pmath::AlignedData> BaseType; - - /* type of a single particle*/ - typedef pmacc::Particle ParticleType; - - /* define boost result_of results - * normaly result_of defines operator() result, in this case we define the result for - * operator[] - */ - template struct result; - - /* const operator[]*/ - template - struct result - { - typedef typename GetKeyFromAlias::type Key; - typedef typename boost::result_of::type type; - }; - - /* non const operator[]*/ - template - struct result< F(TKey)> - { - typedef typename GetKeyFromAlias::type Key; - typedef typename boost::result_of< BaseType(Key)>::type type; - }; - - /** access the Nth particle*/ - HDINLINE ParticleType operator[](const uint32_t idx) - { - return ParticleType(*this, idx); - } - - /** access the Nth particle*/ - HDINLINE const ParticleType operator[](const uint32_t idx) const - { - return ParticleType(*this, idx); - } - - /** access attribute with a identifier + /** Frame is a storage for arbitrary number >0 of Particles with attributes * - * @param T_Key instance of identifier type - * (can be an alias, value_identifier or any other class) - * @return result of operator[] of MapTupel + * @tparam T_CreatePairOperator unary template operator to create a boost pair + * from single type ( pair ) + * @see MapTupel + * @tparam T_ValueTypeSeq sequence with value_identifier + * @tparam T_MethodsList sequence of classes with particle methods + * (e.g. calculate mass, gamma, ...) + * @tparam T_Flags sequence with identifiers to add flags on a frame + * (e.g. useSolverXY, calcRadiation, ...) */ - template - HDINLINE - typename boost::result_of < ThisType(T_Key)>::type - getIdentifier(const T_Key) + template + struct Frame; + + template + struct Frame + : public InheritLinearly + , protected pmath::MapTuple< + typename SeqToMap::type, + pmath::AlignedData> + , public InheritLinearly>>::type> { - typedef typename GetKeyFromAlias::type Key; - return BaseType::operator[](Key()); - } - - /** const version of method getIdentifier(const T_Key) */ - template - HDINLINE - typename boost::result_of < const ThisType(T_Key)>::type - getIdentifier(const T_Key) const - { - typedef typename GetKeyFromAlias::type Key; - return BaseType::operator[](Key()); - } + typedef T_ParticleDescription ParticleDescription; + typedef typename ParticleDescription::Name Name; + typedef typename ParticleDescription::SuperCellSize SuperCellSize; + typedef typename ParticleDescription::ValueTypeSeq ValueTypeSeq; + typedef typename ParticleDescription::MethodsList MethodsList; + typedef typename ParticleDescription::FlagsList FlagList; + typedef typename ParticleDescription::FrameExtensionList FrameExtensionList; + typedef Frame ThisType; + /* definition of the MapTupel where we inherit from*/ + typedef pmath::MapTuple::type, pmath::AlignedData> + BaseType; + + /* type of a single particle*/ + typedef pmacc::Particle ParticleType; + + /* define boost result_of results + * normaly result_of defines operator() result, in this case we define the result for + * operator[] + */ + template + struct result; + + /* const operator[]*/ + template + struct result + { + typedef typename GetKeyFromAlias::type Key; + typedef typename boost::result_of::type type; + }; + + /* non const operator[]*/ + template + struct result + { + typedef typename GetKeyFromAlias::type Key; + typedef typename boost::result_of::type type; + }; + + /** access the Nth particle*/ + HDINLINE ParticleType operator[](const uint32_t idx) + { + return ParticleType(*this, idx); + } + + /** access the Nth particle*/ + HDINLINE const ParticleType operator[](const uint32_t idx) const + { + return ParticleType(*this, idx); + } + + /** access attribute with a identifier + * + * @param T_Key instance of identifier type + * (can be an alias, value_identifier or any other class) + * @return result of operator[] of MapTupel + */ + template + HDINLINE typename boost::result_of::type getIdentifier(const T_Key) + { + typedef typename GetKeyFromAlias::type Key; + return BaseType::operator[](Key()); + } + + /** const version of method getIdentifier(const T_Key) */ + template + HDINLINE typename boost::result_of::type getIdentifier(const T_Key) const + { + typedef typename GetKeyFromAlias::type Key; + return BaseType::operator[](Key()); + } + + HINLINE static std::string getName() + { + return Name::str(); + } + }; - HINLINE static std::string getName() + namespace traits { - return Name::str(); - } - -}; - -namespace traits -{ - -template -struct HasIdentifier< -pmacc::Frame, -T_IdentifierName -> -{ -private: - typedef pmacc::Frame FrameType; -public: - typedef typename FrameType::ValueTypeSeq ValueTypeSeq; - /* if T_IdentifierName is void_ than we have no T_IdentifierName in our Sequence. - * check is also valid if T_Key is a alias - */ - typedef typename GetKeyFromAlias::type SolvedAliasName; - - typedef bmpl::contains type; -}; - -template -struct HasFlag< -pmacc::Frame, T_IdentifierName> -{ -private: - typedef pmacc::Frame FrameType; - typedef typename GetFlagType::type SolvedAliasName; - typedef typename FrameType::FlagList FlagList; -public: - - typedef bmpl::contains type; -}; - -template -struct GetFlagType< -pmacc::Frame, T_IdentifierName> -{ -private: - typedef pmacc::Frame FrameType; - typedef typename FrameType::FlagList FlagList; -public: - - typedef typename GetKeyFromAlias::type type; -}; - -} //namespace traits - -}//namespace pmacc + template + struct HasIdentifier, T_IdentifierName> + { + private: + typedef pmacc::Frame FrameType; + + public: + typedef typename FrameType::ValueTypeSeq ValueTypeSeq; + /* if T_IdentifierName is void_ than we have no T_IdentifierName in our Sequence. + * check is also valid if T_Key is a alias + */ + typedef typename GetKeyFromAlias::type SolvedAliasName; + + typedef bmpl::contains type; + }; + + template + struct HasFlag, T_IdentifierName> + { + private: + typedef pmacc::Frame FrameType; + typedef typename GetFlagType::type SolvedAliasName; + typedef typename FrameType::FlagList FlagList; + + public: + typedef bmpl::contains type; + }; + + template + struct GetFlagType, T_IdentifierName> + { + private: + typedef pmacc::Frame FrameType; + typedef typename FrameType::FlagList FlagList; + + public: + typedef typename GetKeyFromAlias::type type; + }; + + } // namespace traits + +} // namespace pmacc diff --git a/include/pmacc/particles/memory/frames/NullFrame.hpp b/include/pmacc/particles/memory/frames/NullFrame.hpp index 30cd3070b7..508ec3a46d 100644 --- a/include/pmacc/particles/memory/frames/NullFrame.hpp +++ b/include/pmacc/particles/memory/frames/NullFrame.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -26,16 +26,14 @@ namespace pmacc { - - class NullFrame { public: - enum { - tileSize = 0, dim = DIM3 + tileSize = 0, + dim = DIM3 }; }; -}//namespace +} // namespace pmacc diff --git a/include/pmacc/particles/meta/FindByNameOrType.hpp b/include/pmacc/particles/meta/FindByNameOrType.hpp index 628bde237b..5bb51d55c5 100644 --- a/include/pmacc/particles/meta/FindByNameOrType.hpp +++ b/include/pmacc/particles/meta/FindByNameOrType.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PMacc. * @@ -35,70 +35,48 @@ namespace pmacc { -namespace particles -{ -namespace meta -{ - - /* find a type within a sequence by name or the type itself - * - * pmacc::traits::GetCTName is used to translate each element of - * T_MPLSeq into a name. - * - * @tparam T_MPLSeq source sequence where we search T_Identifier - * @tparam T_Identifier name or type to search - */ - template< - typename T_MPLSeq, - typename T_Identifier, - typename T_KeyNotFoundPolicy = pmacc::errorHandlerPolicies::ThrowValueNotFound - > - struct FindByNameOrType + namespace particles { - using KeyNotFoundPolicy = T_KeyNotFoundPolicy; - - template< typename T_Value > - struct HasTypeOrName + namespace meta { - using type = bmpl::or_< - boost::is_same< - T_Identifier, - T_Value - >, - boost::is_same< - pmacc::traits::GetCTName_t< T_Value >, - T_Identifier - > - >; - }; + /* find a type within a sequence by name or the type itself + * + * pmacc::traits::GetCTName is used to translate each element of + * T_MPLSeq into a name. + * + * @tparam T_MPLSeq source sequence where we search T_Identifier + * @tparam T_Identifier name or type to search + */ + template< + typename T_MPLSeq, + typename T_Identifier, + typename T_KeyNotFoundPolicy = pmacc::errorHandlerPolicies::ThrowValueNotFound> + struct FindByNameOrType + { + using KeyNotFoundPolicy = T_KeyNotFoundPolicy; + + template + struct HasTypeOrName + { + using type = bmpl::or_< + boost::is_same, + boost::is_same, T_Identifier>>; + }; - using FilteredSeq = typename bmpl::copy_if< - T_MPLSeq, - HasTypeOrName< bmpl::_1 > - >::type; + using FilteredSeq = typename bmpl::copy_if>::type; - using type = typename bmpl::if_< - bmpl::empty< FilteredSeq >, - bmpl::apply< - KeyNotFoundPolicy, - T_MPLSeq, - T_Identifier - >, - bmpl::front< FilteredSeq > - >::type::type; - }; + using type = typename bmpl::if_< + bmpl::empty, + bmpl::apply, + bmpl::front>::type::type; + }; - template< - typename T_MPLSeq, - typename T_Identifier, - typename T_KeyNotFoundPolicy = pmacc::errorHandlerPolicies::ThrowValueNotFound - > - using FindByNameOrType_t = typename FindByNameOrType< - T_MPLSeq, - T_Identifier, - T_KeyNotFoundPolicy - >::type; + template< + typename T_MPLSeq, + typename T_Identifier, + typename T_KeyNotFoundPolicy = pmacc::errorHandlerPolicies::ThrowValueNotFound> + using FindByNameOrType_t = typename FindByNameOrType::type; -} // namespace meta -} // namespace particles + } // namespace meta + } // namespace particles } // namespace pmacc diff --git a/include/pmacc/particles/operations/Assign.hpp b/include/pmacc/particles/operations/Assign.hpp index 1da74ced9f..200260f872 100644 --- a/include/pmacc/particles/operations/Assign.hpp +++ b/include/pmacc/particles/operations/Assign.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -26,24 +26,23 @@ namespace pmacc { -namespace particles -{ -namespace operations -{ - -namespace detail -{ -template -struct Assign; - -}//namespace detail - -template -HDINLINE void assign(T_Dest& dest,const T_Src& src) -{ - detail::Assign()(dest,src); -} - -}//operators -}//namespace particles -} //namespace pmacc + namespace particles + { + namespace operations + { + namespace detail + { + template + struct Assign; + + } // namespace detail + + template + HDINLINE void assign(T_Dest& dest, const T_Src& src) + { + detail::Assign()(dest, src); + } + + } // namespace operations + } // namespace particles +} // namespace pmacc diff --git a/include/pmacc/particles/operations/ConcatListOfFrames.hpp b/include/pmacc/particles/operations/ConcatListOfFrames.hpp index bc26bd8134..d113cc3996 100644 --- a/include/pmacc/particles/operations/ConcatListOfFrames.hpp +++ b/include/pmacc/particles/operations/ConcatListOfFrames.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Alexander Grund +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Alexander Grund * * This file is part of PMacc. * @@ -29,143 +29,141 @@ namespace pmacc { -namespace particles -{ -namespace operations -{ - -/** Copy Particles to a Single Frame - * - * - copy particle data that was stored in a linked list of frames for each - * super-cell on the GPU to a single frame on the CPU RAM - * - the deep on-GPU hierarchy must be copied to the CPU beforehand - * - remove species attributes `multiMask` and `localCellIdx` - * - add new cellIdx attribute relative to a user-defined domain - */ -template -struct ConcatListOfFrames -{ - DataSpace m_gridSize; - - ConcatListOfFrames(const DataSpace& gridSize) : - m_gridSize(gridSize) - { - - } - - /** concatenate list of frames to single frame - * - * @param counter[in,out] scalar offset in `destFrame` - * @param destFrame single frame were all particles are copied in - * @param srcBox particle box were particles are read from - * @param particleFilter filter to select particles - * @param domainOffset offset to a user-defined domain. Can, e.g. be used to - * calculate a totalCellIdx, adding - * globalDomain.offset + localDomain.offset - * @param domainCellIdxIdentifier the identifier for the particle cellIdx - * that is calculated with respect to - * domainOffset - * @param mapper mapper which describes the area where particles are copied from - * @param parFilter particle filter method, must fulfill the interface of pmacc::filter::Interface - * The working domain for the filter is supercells. - */ - template - void operator()( - int& counter, - T_DestFrame destFrame, - T_SrcBox srcBox, - const T_Filter particleFilter, - const T_Space domainOffset, - const T_Identifier domainCellIdxIdentifier, - const T_Mapping mapper, - T_ParticleFilter & parFilter - ) + namespace particles { - #pragma omp parallel for - for (int linearBlockIdx = 0; - linearBlockIdx < m_gridSize.productOfComponents(); - ++linearBlockIdx - ) + namespace operations { - // local copy for each omp thread - T_Filter filter = particleFilter; - DataSpace blockIndex(DataSpaceOperations::map(m_gridSize, linearBlockIdx)); - - using namespace pmacc::particles::operations; - using namespace mappings::threads; - - typedef T_DestFrame DestFrameType; - typedef typename T_SrcBox::FrameType SrcFrameType; - typedef typename T_SrcBox::FramePtr SrcFramePtr; - - typedef T_Mapping Mapping; - typedef typename Mapping::SuperCellSize SuperCellSize; - - - const int particlesPerFrame = pmacc::math::CT::volume::type::value; - int localIdxs[particlesPerFrame]; - - const DataSpace superCellIdx = mapper.getSuperCellIndex(blockIndex); - const DataSpace superCellPosition((superCellIdx - mapper.getGuardingSuperCells()) * mapper.getSuperCellSize()); - filter.setSuperCellPosition(superCellPosition); - auto accParFilter = parFilter( - 1, /* @todo this is a hack, please add a alpaka accelerator here*/ - superCellIdx - mapper.getGuardingSuperCells( ), - WorkerCfg< 1 >{ 0 } /* @todo this is a workaround because we use no alpaka*/ - ); - - SrcFramePtr srcFramePtr = srcBox.getFirstFrame(superCellIdx); - - /* Loop over all frames in current super cell */ - while (srcFramePtr.isValid()) + /** Copy Particles to a Single Frame + * + * - copy particle data that was stored in a linked list of frames for each + * super-cell on the GPU to a single frame on the CPU RAM + * - the deep on-GPU hierarchy must be copied to the CPU beforehand + * - remove species attributes `multiMask` and `localCellIdx` + * - add new cellIdx attribute relative to a user-defined domain + */ + template + struct ConcatListOfFrames { - /* Count number of particles in current frame and init its indices */ - int curNumParticles = 0; - for (int particleIdx = 0; particleIdx < particlesPerFrame; ++particleIdx) - { - localIdxs[particleIdx] = -1; - auto parSrc = (srcFramePtr[particleIdx]); - /* Check if particle exists and is not filtered */ - if (parSrc[multiMask_] == 1 && filter(*srcFramePtr, particleIdx)) - if( - accParFilter( - 1, /* @todo this is a hack, please add a alpaka accelerator here*/ - parSrc - ) - ) - localIdxs[particleIdx] = curNumParticles++; - } + DataSpace m_gridSize; - int globalOffset; - /* atomic update with openmp*/ - #pragma omp critical + ConcatListOfFrames(const DataSpace& gridSize) : m_gridSize(gridSize) { - globalOffset = counter; - counter += curNumParticles; } - for (int particleIdx = 0; particleIdx < particlesPerFrame; ++particleIdx) + /** concatenate list of frames to single frame + * + * @param counter[in,out] scalar offset in `destFrame` + * @param destFrame single frame were all particles are copied in + * @param srcBox particle box were particles are read from + * @param particleFilter filter to select particles + * @param domainOffset offset to a user-defined domain. Can, e.g. be used to + * calculate a totalCellIdx, adding + * globalDomain.offset + localDomain.offset + * @param domainCellIdxIdentifier the identifier for the particle cellIdx + * that is calculated with respect to + * domainOffset + * @param mapper mapper which describes the area where particles are copied from + * @param parFilter particle filter method, must fulfill the interface of pmacc::filter::Interface + * The working domain for the filter is supercells. + */ + template< + class T_DestFrame, + class T_SrcBox, + class T_Filter, + class T_Space, + class T_Identifier, + class T_Mapping, + typename T_ParticleFilter> + void operator()( + int& counter, + T_DestFrame destFrame, + T_SrcBox srcBox, + const T_Filter particleFilter, + const T_Space domainOffset, + const T_Identifier domainCellIdxIdentifier, + const T_Mapping mapper, + T_ParticleFilter& parFilter) { - if (localIdxs[particleIdx] != -1) +#pragma omp parallel for + for(int linearBlockIdx = 0; linearBlockIdx < m_gridSize.productOfComponents(); ++linearBlockIdx) { - auto parSrc = (srcFramePtr[particleIdx]); - auto parDest = destFrame[globalOffset + localIdxs[particleIdx]]; - auto parDestNoDomainIdx = deselect(parDest); - assign(parDestNoDomainIdx, parSrc); - /* calculate cell index for user-defined domain */ - DataSpace localCellIdx(DataSpaceOperations::template map(parSrc[localCellIdx_])); - parDest[domainCellIdxIdentifier] = domainOffset + superCellPosition + localCellIdx; + // local copy for each omp thread + T_Filter filter = particleFilter; + DataSpace blockIndex(DataSpaceOperations::map(m_gridSize, linearBlockIdx)); + + using namespace pmacc::particles::operations; + using namespace mappings::threads; + + typedef T_DestFrame DestFrameType; + typedef typename T_SrcBox::FrameType SrcFrameType; + typedef typename T_SrcBox::FramePtr SrcFramePtr; + + typedef T_Mapping Mapping; + typedef typename Mapping::SuperCellSize SuperCellSize; + + + const int particlesPerFrame = pmacc::math::CT::volume::type::value; + int localIdxs[particlesPerFrame]; + + const DataSpace superCellIdx = mapper.getSuperCellIndex(blockIndex); + const DataSpace superCellPosition( + (superCellIdx - mapper.getGuardingSuperCells()) * mapper.getSuperCellSize()); + filter.setSuperCellPosition(superCellPosition); + auto accParFilter = parFilter( + 1, /* @todo this is a hack, please add a alpaka accelerator here*/ + superCellIdx - mapper.getGuardingSuperCells(), + WorkerCfg<1>{0} /* @todo this is a workaround because we use no alpaka*/ + ); + + SrcFramePtr srcFramePtr = srcBox.getFirstFrame(superCellIdx); + + /* Loop over all frames in current super cell */ + while(srcFramePtr.isValid()) + { + /* Count number of particles in current frame and init its indices */ + int curNumParticles = 0; + for(int particleIdx = 0; particleIdx < particlesPerFrame; ++particleIdx) + { + localIdxs[particleIdx] = -1; + auto parSrc = (srcFramePtr[particleIdx]); + /* Check if particle exists and is not filtered */ + if(parSrc[multiMask_] == 1 && filter(*srcFramePtr, particleIdx)) + if(accParFilter( + 1, /* @todo this is a hack, please add a alpaka accelerator here*/ + parSrc)) + localIdxs[particleIdx] = curNumParticles++; + } + + int globalOffset; +/* atomic update with openmp*/ +#pragma omp critical + { + globalOffset = counter; + counter += curNumParticles; + } + + for(int particleIdx = 0; particleIdx < particlesPerFrame; ++particleIdx) + { + if(localIdxs[particleIdx] != -1) + { + auto parSrc = (srcFramePtr[particleIdx]); + auto parDest = destFrame[globalOffset + localIdxs[particleIdx]]; + auto parDestNoDomainIdx = deselect(parDest); + assign(parDestNoDomainIdx, parSrc); + /* calculate cell index for user-defined domain */ + DataSpace localCellIdx( + DataSpaceOperations::template map( + parSrc[localCellIdx_])); + parDest[domainCellIdxIdentifier] = domainOffset + superCellPosition + localCellIdx; + } + } + /*get next frame in supercell*/ + srcFramePtr = srcBox.getNextFrame(srcFramePtr); + } } } - /*get next frame in supercell*/ - srcFramePtr = srcBox.getNextFrame(srcFramePtr); - - } - } - } - -}; + }; -} //namespace operations -} //namespace particles -} //namespace pmacc + } // namespace operations + } // namespace particles +} // namespace pmacc diff --git a/include/pmacc/particles/operations/CopyIdentifier.hpp b/include/pmacc/particles/operations/CopyIdentifier.hpp index 9b0a33929d..a855ad5967 100644 --- a/include/pmacc/particles/operations/CopyIdentifier.hpp +++ b/include/pmacc/particles/operations/CopyIdentifier.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -27,21 +27,17 @@ namespace pmacc { + namespace pmath = pmacc::math; -namespace pmath = pmacc::math; - -template -struct CopyIdentifier -{ - template - HDINLINE - void operator()(T_T1& dest, const T_T2& src) + template + struct CopyIdentifier { - dest[T_Key()]=src[T_Key()]; - } - - -}; - -}//namespace pmacc + template + HDINLINE void operator()(T_T1& dest, const T_T2& src) + { + dest[T_Key()] = src[T_Key()]; + } + }; + +} // namespace pmacc diff --git a/include/pmacc/particles/operations/CountParticles.hpp b/include/pmacc/particles/operations/CountParticles.hpp index b75bdc42a1..ccbc94c8e6 100644 --- a/include/pmacc/particles/operations/CountParticles.hpp +++ b/include/pmacc/particles/operations/CountParticles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Erik Zenker +/* Copyright 2013-2021 Rene Widera, Erik Zenker * * This file is part of PMacc. * @@ -37,281 +37,222 @@ namespace pmacc { - -/* count particles - * - * it is allowed to call this kernel on frames with holes (without calling fillAllGAps before) - * - * @tparam T_numWorkers number of workers - */ -template< uint32_t T_numWorkers > -struct KernelCountParticles -{ - /** count particles + /* count particles * - * @tparam T_PBox pmacc::ParticlesBox, particle box type - * @tparam T_Filter functor to filter particles - * @tparam T_Mapping supercell mapper functor type - * @tparam T_ParticleFilter pmacc::filter::Interface, type of the particle filter - * @tparam T_Acc type of the alpaka accelerator + * it is allowed to call this kernel on frames with holes (without calling fillAllGAps before) * - * @param pb particle memory - * @param gCounter pointer for the result - * @param filter functor to filter particles those should be counted - * @param mapper functor to map a block to a supercell - * @param parFilter particle filter method, the working domain for the filter is supercells + * @tparam T_numWorkers number of workers */ - template< - typename T_PBox, - typename T_Filter, - typename T_Mapping, - typename T_ParticleFilter, - typename T_Acc - > - DINLINE void operator( )( - T_Acc const & acc, - T_PBox pb, - uint64_cu* gCounter, - T_Filter filter, - T_Mapping const mapper, - T_ParticleFilter parFilter - ) const + template + struct KernelCountParticles { - using namespace mappings::threads; + /** count particles + * + * @tparam T_PBox pmacc::ParticlesBox, particle box type + * @tparam T_Filter functor to filter particles + * @tparam T_Mapping supercell mapper functor type + * @tparam T_ParticleFilter pmacc::filter::Interface, type of the particle filter + * @tparam T_Acc type of the alpaka accelerator + * + * @param pb particle memory + * @param gCounter pointer for the result + * @param filter functor to filter particles those should be counted + * @param mapper functor to map a block to a supercell + * @param parFilter particle filter method, the working domain for the filter is supercells + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_PBox pb, + uint64_cu* gCounter, + T_Filter filter, + T_Mapping const mapper, + T_ParticleFilter parFilter) const + { + using namespace mappings::threads; - using Frame = typename T_PBox::FrameType; - using FramePtr = typename T_PBox::FramePtr; - constexpr uint32_t dim = T_Mapping::Dim; - constexpr uint32_t frameSize = math::CT::volume< typename Frame::SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; + using Frame = typename T_PBox::FrameType; + using FramePtr = typename T_PBox::FramePtr; + constexpr uint32_t dim = T_Mapping::Dim; + constexpr uint32_t frameSize = math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; - PMACC_SMEM( - acc, - frame, - FramePtr - ); - PMACC_SMEM( - acc, - counter, - int - ); - PMACC_SMEM( - acc, - particlesInSuperCell, - lcellId_t - ); + PMACC_SMEM(acc, frame, FramePtr); + PMACC_SMEM(acc, counter, int); + PMACC_SMEM(acc, particlesInSuperCell, lcellId_t); - using SuperCellSize = typename T_Mapping::SuperCellSize; + using SuperCellSize = typename T_Mapping::SuperCellSize; - DataSpace< dim > const threadIndex( threadIdx ); - uint32_t const workerIdx = static_cast< uint32_t >( - DataSpaceOperations< dim >::template map< SuperCellSize >( threadIndex ) - ); + DataSpace const threadIndex(cupla::threadIdx(acc)); + uint32_t const workerIdx + = static_cast(DataSpaceOperations::template map(threadIndex)); - DataSpace< dim > const superCellIdx( mapper.getSuperCellIndex( DataSpace< dim >( blockIdx ) ) ); + DataSpace const superCellIdx(mapper.getSuperCellIndex(DataSpace(cupla::blockIdx(acc)))); - ForEachIdx< - IdxConfig< - 1, - numWorkers - > - > onlyMaster{ workerIdx }; + ForEachIdx> onlyMaster{workerIdx}; - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - frame = pb.getLastFrame( superCellIdx ); - particlesInSuperCell = pb.getSuperCell( superCellIdx ).getSizeLastFrame( ); + onlyMaster([&](uint32_t const, uint32_t const) { + frame = pb.getLastFrame(superCellIdx); + particlesInSuperCell = pb.getSuperCell(superCellIdx).getSizeLastFrame(); counter = 0; - } - ); + }); - __syncthreads( ); + cupla::__syncthreads(acc); - if( !frame.isValid() ) - return; //end kernel if we have no frames - filter.setSuperCellPosition( - ( superCellIdx - mapper.getGuardingSuperCells( ) ) * - mapper.getSuperCellSize( ) - ); + if(!frame.isValid()) + return; // end kernel if we have no frames + filter.setSuperCellPosition((superCellIdx - mapper.getGuardingSuperCells()) * mapper.getSuperCellSize()); - auto accParFilter = parFilter( - acc, - superCellIdx - mapper.getGuardingSuperCells( ), - WorkerCfg< numWorkers >{ workerIdx } - ); + auto accParFilter + = parFilter(acc, superCellIdx - mapper.getGuardingSuperCells(), WorkerCfg{workerIdx}); - ForEachIdx< - IdxConfig< - frameSize, - numWorkers - > - > forEachParticle( workerIdx ); + ForEachIdx> forEachParticle(workerIdx); - while( frame.isValid( ) ) - { - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if( linearIdx < particlesInSuperCell ) + while(frame.isValid()) + { + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(linearIdx < particlesInSuperCell) { - bool const useParticle = filter( - *frame, - linearIdx - ); - if( useParticle ) + bool const useParticle = filter(*frame, linearIdx); + if(useParticle) { - auto parSrc = ( frame[ linearIdx ] ); - if( - accParFilter( - acc, - parSrc - ) - ) - nvidia::atomicAllInc( acc, &counter, ::alpaka::hierarchy::Threads{} ); + auto parSrc = (frame[linearIdx]); + if(accParFilter(acc, parSrc)) + nvidia::atomicAllInc(acc, &counter, ::alpaka::hierarchy::Threads{}); } } - } - ); + }); - __syncthreads( ); + cupla::__syncthreads(acc); - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - frame = pb.getPreviousFrame( frame ); + onlyMaster([&](uint32_t const, uint32_t const) { + frame = pb.getPreviousFrame(frame); particlesInSuperCell = frameSize; - } - ); + }); - __syncthreads( ); - } - - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) - { - - atomicAdd( - gCounter, - static_cast< uint64_cu >( counter ), - ::alpaka::hierarchy::Blocks{} - ); + cupla::__syncthreads(acc); } - ); - } -}; - -struct CountParticles -{ - - /** Get particle count - * - * @tparam AREA area were particles are counted (CORE, BORDER, GUARD) - * - * @param buffer source particle buffer - * @param cellDescription instance of MappingDesction - * @param filter filter instance which must inharid from PositionFilter - * @param parFilter particle filter method, must fulfill the interface of pmacc::filter::Interface - * The working domain for the filter is supercells. - * @return number of particles in defined area - */ - template - static uint64_cu countOnDevice( PBuffer& buffer, CellDesc cellDescription, Filter filter, T_ParticleFilter & parFilter ) - { - GridBuffer< - uint64_cu, - DIM1 - > counter( DataSpace< DIM1 >( 1 ) ); - AreaMapping< - AREA, - CellDesc - > mapper( cellDescription ); - constexpr uint32_t numWorkers = traits::GetNumWorkers< - math::CT::volume< typename CellDesc::SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( KernelCountParticles< numWorkers >{ } )( - mapper.getGridDim( ), - numWorkers - )( - buffer.getDeviceParticlesBox( ), - counter.getDeviceBuffer( ).getBasePointer( ), - filter, - mapper, - parFilter - ); - - counter.deviceToHost( ); - return *( counter.getHostBuffer( ).getDataBox( ) ); - } + onlyMaster([&](uint32_t const, uint32_t const) { + cupla::atomicAdd(acc, gCounter, static_cast(counter), ::alpaka::hierarchy::Blocks{}); + }); + } + }; - /** Get particle count - * - * @param buffer source particle buffer - * @param cellDescription instance of MappingDesction - * @param filter filter instance which must inharid from PositionFilter - * @param parFilter particle filter method, must fulfill the interface of pmacc::filter::Interface - * The working domain for the filter is supercells. - * @return number of particles in defined area - */ - template< class PBuffer, class Filter, class CellDesc, typename T_ParticleFilter> - static uint64_cu countOnDevice(PBuffer& buffer, CellDesc cellDescription, Filter filter, T_ParticleFilter & parFilter) + struct CountParticles { - return pmacc::CountParticles::countOnDevice < CORE + BORDER + GUARD > (buffer, cellDescription, filter, parFilter); - } + /** Get particle count + * + * @tparam AREA area were particles are counted (CORE, BORDER, GUARD) + * + * @param buffer source particle buffer + * @param cellDescription instance of MappingDesction + * @param filter filter instance which must inharid from PositionFilter + * @param parFilter particle filter method, must fulfill the interface of pmacc::filter::Interface + * The working domain for the filter is supercells. + * @return number of particles in defined area + */ + template + static uint64_cu countOnDevice( + PBuffer& buffer, + CellDesc cellDescription, + Filter filter, + T_ParticleFilter& parFilter) + { + GridBuffer counter(DataSpace(1)); + + AreaMapping mapper(cellDescription); + constexpr uint32_t numWorkers + = traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(KernelCountParticles{}) + (mapper.getGridDim(), numWorkers)( + buffer.getDeviceParticlesBox(), + counter.getDeviceBuffer().getBasePointer(), + filter, + mapper, + parFilter); + + counter.deviceToHost(); + return *(counter.getHostBuffer().getDataBox()); + } - /** Get particle count - * - * @tparam AREA area were particles are counted (CORE, BORDER, GUARD) - * - * @param buffer source particle buffer - * @param cellDescription instance of MappingDesction - * @param origin local cell position (can be negative) - * @param size local size in cells for checked volume - * @param parFilter particle filter method, must fulfill the interface of pmacc::filter::Interface - * The working domain for the filter is supercells. - * @return number of particles in defined area - */ - template - static uint64_cu countOnDevice(PBuffer& buffer, CellDesc cellDescription, const Space& origin, const Space& size, T_ParticleFilter & parFilter) - { - typedef bmpl::vector< typename GetPositionFilter::type > usedFilters; - typedef typename FilterFactory::FilterType MyParticleFilter; - MyParticleFilter filter; - filter.setStatus(true); /*activeate filter pipline*/ - filter.setWindowPosition(origin, size); - return pmacc::CountParticles::countOnDevice(buffer, cellDescription, filter, parFilter); - } + /** Get particle count + * + * @param buffer source particle buffer + * @param cellDescription instance of MappingDesction + * @param filter filter instance which must inharid from PositionFilter + * @param parFilter particle filter method, must fulfill the interface of pmacc::filter::Interface + * The working domain for the filter is supercells. + * @return number of particles in defined area + */ + template + static uint64_cu countOnDevice( + PBuffer& buffer, + CellDesc cellDescription, + Filter filter, + T_ParticleFilter& parFilter) + { + return pmacc::CountParticles::countOnDevice( + buffer, + cellDescription, + filter, + parFilter); + } - /** Get particle count - * - * @param buffer source particle buffer - * @param cellDescription instance of MappingDesction - * @param origin local cell position (can be negative) - * @param size local size in cells for checked volume - * @param parFilter particle filter method, must fulfill the interface of pmacc::filter::Interface - * The working domain for the filter is supercells. - * @return number of particles in defined area - */ - template< class PBuffer, class Filter, class CellDesc, class Space, typename T_ParticleFilter> - static uint64_cu countOnDevice(PBuffer& buffer, CellDesc cellDescription, const Space& origin, const Space& size, T_ParticleFilter & parFilter) - { - return pmacc::CountParticles::countOnDevice < CORE + BORDER + GUARD > (buffer, cellDescription, origin, size, parFilter); - } + /** Get particle count + * + * @tparam AREA area were particles are counted (CORE, BORDER, GUARD) + * + * @param buffer source particle buffer + * @param cellDescription instance of MappingDesction + * @param origin local cell position (can be negative) + * @param size local size in cells for checked volume + * @param parFilter particle filter method, must fulfill the interface of pmacc::filter::Interface + * The working domain for the filter is supercells. + * @return number of particles in defined area + */ + template + static uint64_cu countOnDevice( + PBuffer& buffer, + CellDesc cellDescription, + const Space& origin, + const Space& size, + T_ParticleFilter& parFilter) + { + typedef bmpl::vector::type> usedFilters; + typedef typename FilterFactory::FilterType MyParticleFilter; + MyParticleFilter filter; + filter.setStatus(true); /*activeate filter pipline*/ + filter.setWindowPosition(origin, size); + return pmacc::CountParticles::countOnDevice(buffer, cellDescription, filter, parFilter); + } -}; + /** Get particle count + * + * @param buffer source particle buffer + * @param cellDescription instance of MappingDesction + * @param origin local cell position (can be negative) + * @param size local size in cells for checked volume + * @param parFilter particle filter method, must fulfill the interface of pmacc::filter::Interface + * The working domain for the filter is supercells. + * @return number of particles in defined area + */ + template + static uint64_cu countOnDevice( + PBuffer& buffer, + CellDesc cellDescription, + const Space& origin, + const Space& size, + T_ParticleFilter& parFilter) + { + return pmacc::CountParticles::countOnDevice( + buffer, + cellDescription, + origin, + size, + parFilter); + } + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/operations/Deselect.hpp b/include/pmacc/particles/operations/Deselect.hpp index 91c5228ef7..10b730d53e 100644 --- a/include/pmacc/particles/operations/Deselect.hpp +++ b/include/pmacc/particles/operations/Deselect.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -30,38 +30,36 @@ namespace pmacc { -namespace particles -{ -namespace operations -{ + namespace particles + { + namespace operations + { + namespace detail + { + /* functor for deselect attributes of an object + * + * - must be boost result_of compatible + * - must define a operator()(T_Object) + * + * @tparam T_Sequence any boost mpl sequence + * @tparam T_Object a type were we can deselect attributes from + */ + template + struct Deselect; -namespace detail -{ + } // namespace detail -/* functor for deselect attributes of an object - * - * - must be boost result_of compatible - * - must define a operator()(T_Object) - * - * @tparam T_Sequence any boost mpl sequence - * @tparam T_Object a type were we can deselect attributes from - */ -template -struct Deselect; - -} //namespace detail - -template -HDINLINE -typename boost::result_of < detail::Deselect::type,T_Object>(T_Object)>::type -deselect(T_Object& object) -{ - typedef typename ToSeq< T_Exclude >::type DeselectSeq; - typedef detail::Deselect BaseType; + template + HDINLINE + typename boost::result_of::type, T_Object>(T_Object)>::type + deselect(T_Object& object) + { + typedef typename ToSeq::type DeselectSeq; + typedef detail::Deselect BaseType; - return BaseType()(object); -} + return BaseType()(object); + } -}//operators -}//namespace particles -} //namespace pmacc + } // namespace operations + } // namespace particles +} // namespace pmacc diff --git a/include/pmacc/particles/operations/SetAttributeToDefault.hpp b/include/pmacc/particles/operations/SetAttributeToDefault.hpp index 0f99859487..089463ddd2 100644 --- a/include/pmacc/particles/operations/SetAttributeToDefault.hpp +++ b/include/pmacc/particles/operations/SetAttributeToDefault.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -26,29 +26,27 @@ namespace pmacc { - -/** set an attribute of a particle to its default value - * - * @tparam T_Attribute value_identifier or alias which is a value_identifier - */ -template -struct SetAttributeToDefault -{ - typedef T_Attribute Attribute; - - /** set an attribute to their default value + /** set an attribute of a particle to its default value * - * @tparam T_Partcile particle type + * @tparam T_Attribute value_identifier or alias which is a value_identifier */ - template - HDINLINE - void operator()(T_Particle& particle) + template + struct SetAttributeToDefault { - typedef typename pmacc::traits::Resolve::type ResolvedAttr; - /* set attribute to it's user defined default value */ - particle[Attribute()] = ResolvedAttr::getValue(); - } -}; + typedef T_Attribute Attribute; + + /** set an attribute to their default value + * + * @tparam T_Partcile particle type + */ + template + HDINLINE void operator()(T_Particle& particle) + { + typedef typename pmacc::traits::Resolve::type ResolvedAttr; + /* set attribute to it's user defined default value */ + particle[Attribute()] = ResolvedAttr::getValue(); + } + }; -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/operations/splitIntoListOfFrames.kernel b/include/pmacc/particles/operations/splitIntoListOfFrames.kernel index dab6bf82e6..aa97f5fc36 100644 --- a/include/pmacc/particles/operations/splitIntoListOfFrames.kernel +++ b/include/pmacc/particles/operations/splitIntoListOfFrames.kernel @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Alexander Grund +/* Copyright 2014-2021 Rene Widera, Alexander Grund * * This file is part of PMacc. * @@ -37,440 +37,298 @@ namespace pmacc { -namespace particles -{ -namespace operations -{ -namespace kernel -{ - /** transform a large frame into a list of small frames - * - * @tparam T_numWorkers number of workers - */ - template< uint32_t T_numWorkers > - struct SplitIntoListOfFrames + namespace particles { - /** Copy particles from big frame to PMacc frame structure - * (Opposite to ConcatListOfFrames) - * - * - convert a user-defined domainCellIdx to localCellIdx - * - processed particles per block <= number of cells per superCell - * - * @tparam T_CounterBox pmacc:DataBox, type of buffer for the statistics counter - * @tparam T_DestBox pmacc:ParticlesBox, type of the destination particle box - * @tparam T_SrcFrame pmacc:Frame, type of the source frame - * @tparam T_Space pmacc::DataSpace, type for indicies and offsets within the domain - * @tparam T_Identifier Identifier, type of the identifier for the total domain offset - * @tparam T_CellDescription pmacc::MappingDescription, type of the domain description - * @tparam T_Acc alpaka accelerator type - * - * @param acc alpaka accelerator - * @param counter box with three integers [sharedSrcParticleOffset, numLoadedParticles, numUsedFrames] - * @param destBox particle box where all particles are copied to (destination) - * @param srcFrame frame with particles (is used as source) - * @param maxParticles number of particles in srcFrame - * @param localDomainCellOffset offset in cells to user-defined domain (@see wiki PIConGPU domain definitions) - * @param domainCellIdxIdentifier the identifier for the particle domain cellIdx - * that is calculated back to the local domain - * with respect to localDomainCellOffset - * @param cellDesc supercell domain description - */ - template< - typename T_CounterBox, - typename T_DestBox, - typename T_SrcFrame, - typename T_Space, - typename T_Identifier, - typename T_CellDescription, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_CounterBox counter, - T_DestBox destBox, - T_SrcFrame srcFrame, - int const maxParticles, - T_Space const localDomainCellOffset, - T_Identifier const domainCellIdxIdentifier, - T_CellDescription const cellDesc - ) const + namespace operations { - using namespace pmacc::particles::operations; - using namespace mappings::threads; - - using SrcFrameType = T_SrcFrame; - using DestFrameType = typename T_DestBox::FrameType; - using DestFramePtr = typename T_DestBox::FramePtr; - using SuperCellSize = typename DestFrameType::SuperCellSize; - - constexpr uint32_t numWorkers = T_numWorkers; - constexpr uint32_t numDims = T_DestBox::Dim; - constexpr uint32_t particlesPerFrame = math::CT::volume< SuperCellSize >::type::value; - - PMACC_SMEM( - acc, - destFramePtr, - memory::Array< - DestFramePtr, - particlesPerFrame - > - ); - PMACC_SMEM( - acc, - sharedLinearSuperCellIds, - memory::Array< - int, - particlesPerFrame - > - ); - PMACC_SMEM( - acc, - sharedSrcParticleOffset, - int - ); - - uint32_t const workerIdx = threadIdx.x; - - DataSpace< numDims > const numSuperCells( - cellDesc.getGridSuperCells( ) - cellDesc.getGuardingSuperCells( ) * 2 - ); - - ForEachIdx< - IdxConfig< - 1, - numWorkers - > - > onlyMaster{ workerIdx }; - - onlyMaster( - [&]( - uint32_t const, - uint32_t const - ) + namespace kernel + { + /** transform a large frame into a list of small frames + * + * @tparam T_numWorkers number of workers + */ + template + struct SplitIntoListOfFrames { - /* apply for work for the full block, counter[0] contains the - * offset in srcFrame to load N particles + /** Copy particles from big frame to PMacc frame structure + * (Opposite to ConcatListOfFrames) + * + * - convert a user-defined domainCellIdx to localCellIdx + * - processed particles per block <= number of cells per superCell + * + * @tparam T_CounterBox pmacc:DataBox, type of buffer for the statistics counter + * @tparam T_DestBox pmacc:ParticlesBox, type of the destination particle box + * @tparam T_SrcFrame pmacc:Frame, type of the source frame + * @tparam T_Space pmacc::DataSpace, type for indicies and offsets within the domain + * @tparam T_Identifier Identifier, type of the identifier for the total domain offset + * @tparam T_CellDescription pmacc::MappingDescription, type of the domain description + * @tparam T_Acc alpaka accelerator type + * + * @param acc alpaka accelerator + * @param counter box with three integers [sharedSrcParticleOffset, numLoadedParticles, + * numUsedFrames] + * @param destBox particle box where all particles are copied to (destination) + * @param srcFrame frame with particles (is used as source) + * @param maxParticles number of particles in srcFrame + * @param localDomainCellOffset offset in cells to user-defined domain (@see wiki PIConGPU domain + * definitions) + * @param domainCellIdxIdentifier the identifier for the particle domain cellIdx + * that is calculated back to the local domain + * with respect to localDomainCellOffset + * @param cellDesc supercell domain description */ - sharedSrcParticleOffset = atomicAdd( - &( counter[ 0 ] ), - particlesPerFrame, - ::alpaka::hierarchy::Blocks{} - ); - } - ); - - __syncthreads(); - - using ParticleDomCfg = IdxConfig< - particlesPerFrame, - numWorkers - >; - - memory::CtxArray< - int, - ParticleDomCfg - > - srcParticleIdxCtx{ }; - - memory::CtxArray< - bool, - ParticleDomCfg - > - hasValidParticleCtx{ }; - - // loop over all particles in the frame - ForEachIdx< ParticleDomCfg > forEachParticle( workerIdx ); - - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - destFramePtr[ linearIdx ] = DestFramePtr{ }; - sharedLinearSuperCellIds[ linearIdx ] = -1; - - srcParticleIdxCtx[ idx ] = sharedSrcParticleOffset + linearIdx; - hasValidParticleCtx[ idx ] = srcParticleIdxCtx[ idx ] < maxParticles; - } - ); - - __syncthreads(); - - // supercell index of the particle relative to the origin of the local domain - memory::CtxArray< - DataSpace< numDims >, - ParticleDomCfg - > - particlesSuperCellCtx{ }; - - // linear cell index of the particle within the destination frame - memory::CtxArray< - lcellId_t, - ParticleDomCfg - > - lCellIdxCtx( INV_LOC_IDX ); - - memory::CtxArray< - int, - ParticleDomCfg - > - linearParticlesSuperCellCtx( -1 ); - - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if( hasValidParticleCtx[ idx ] ) + template< + typename T_CounterBox, + typename T_DestBox, + typename T_SrcFrame, + typename T_Space, + typename T_Identifier, + typename T_CellDescription, + typename T_Acc> + DINLINE void operator()( + T_Acc const& acc, + T_CounterBox counter, + T_DestBox destBox, + T_SrcFrame srcFrame, + int const maxParticles, + T_Space const localDomainCellOffset, + T_Identifier const domainCellIdxIdentifier, + T_CellDescription const cellDesc) const { - // offset of the particle relative to the origin of the local domain - DataSpace< numDims > const particleCellOffset = - srcFrame[ srcParticleIdxCtx[ idx ] ][ domainCellIdxIdentifier ] - - localDomainCellOffset; - particlesSuperCellCtx[ idx ] = particleCellOffset / SuperCellSize::toRT( ); - linearParticlesSuperCellCtx[ idx ] = - DataSpaceOperations< numDims >::map( - numSuperCells, - particlesSuperCellCtx[ idx ] - ); - sharedLinearSuperCellIds[ linearIdx ] = linearParticlesSuperCellCtx[ idx ]; - DataSpace< numDims > const localCellIdx( - particleCellOffset - - particlesSuperCellCtx[ idx ] * SuperCellSize::toRT() - ); - lCellIdxCtx[ idx ] = - DataSpaceOperations< numDims >::template map< SuperCellSize >( localCellIdx ); - } - } - ); - - __syncthreads(); - - memory::CtxArray< - int, - ParticleDomCfg - > - masterVirtualThreadIdxCtx( - workerIdx, - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - return linearIdx - 1; - } - ); + using namespace pmacc::particles::operations; + using namespace mappings::threads; - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) - { - if( hasValidParticleCtx[ idx ] ) - { - auto & vThreadMasterIdx = masterVirtualThreadIdxCtx[ idx ]; - /* search master thread index */ - while( vThreadMasterIdx >= 0 ) - { - if( - linearParticlesSuperCellCtx[ idx ] != - sharedLinearSuperCellIds[ vThreadMasterIdx ] - ) - break; - - --vThreadMasterIdx; - } - ++vThreadMasterIdx; - - // load empty frame if virtual thread is the master - if( vThreadMasterIdx == linearIdx ) - { - /* counter[2] -> number of used frames */ - nvidia::atomicAllInc( - acc, - &( counter[ 2 ] ), - ::alpaka::hierarchy::Blocks{} - ); - DestFramePtr tmpFrame = destBox.getEmptyFrame( ); - destFramePtr[ linearIdx ] = tmpFrame; - destBox.setAsFirstFrame( + using SrcFrameType = T_SrcFrame; + using DestFrameType = typename T_DestBox::FrameType; + using DestFramePtr = typename T_DestBox::FramePtr; + using SuperCellSize = typename DestFrameType::SuperCellSize; + + constexpr uint32_t numWorkers = T_numWorkers; + constexpr uint32_t numDims = T_DestBox::Dim; + constexpr uint32_t particlesPerFrame = math::CT::volume::type::value; + + PMACC_SMEM(acc, destFramePtr, memory::Array); + PMACC_SMEM(acc, sharedLinearSuperCellIds, memory::Array); + PMACC_SMEM(acc, sharedSrcParticleOffset, int); + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + DataSpace const numSuperCells( + cellDesc.getGridSuperCells() - cellDesc.getGuardingSuperCells() * 2); + + ForEachIdx> onlyMaster{workerIdx}; + + onlyMaster([&](uint32_t const, uint32_t const) { + /* apply for work for the full block, counter[0] contains the + * offset in srcFrame to load N particles + */ + sharedSrcParticleOffset = cupla::atomicAdd( acc, - tmpFrame, - particlesSuperCellCtx[ idx ] + cellDesc.getGuardingSuperCells( ) - ); - } + &(counter[0]), + particlesPerFrame, + ::alpaka::hierarchy::Blocks{}); + }); + + cupla::__syncthreads(acc); + + using ParticleDomCfg = IdxConfig; + + memory::CtxArray srcParticleIdxCtx{}; + + memory::CtxArray hasValidParticleCtx{}; + + // loop over all particles in the frame + ForEachIdx forEachParticle(workerIdx); + + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + destFramePtr[linearIdx] = DestFramePtr{}; + sharedLinearSuperCellIds[linearIdx] = -1; + + srcParticleIdxCtx[idx] = sharedSrcParticleOffset + linearIdx; + hasValidParticleCtx[idx] = srcParticleIdxCtx[idx] < maxParticles; + }); + + cupla::__syncthreads(acc); + + // supercell index of the particle relative to the origin of the local domain + memory::CtxArray, ParticleDomCfg> particlesSuperCellCtx{}; + + // linear cell index of the particle within the destination frame + memory::CtxArray lCellIdxCtx(INV_LOC_IDX); + + memory::CtxArray linearParticlesSuperCellCtx(-1); + + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(hasValidParticleCtx[idx]) + { + // offset of the particle relative to the origin of the local domain + DataSpace const particleCellOffset + = srcFrame[srcParticleIdxCtx[idx]][domainCellIdxIdentifier] + - localDomainCellOffset; + particlesSuperCellCtx[idx] = particleCellOffset / SuperCellSize::toRT(); + linearParticlesSuperCellCtx[idx] + = DataSpaceOperations::map(numSuperCells, particlesSuperCellCtx[idx]); + sharedLinearSuperCellIds[linearIdx] = linearParticlesSuperCellCtx[idx]; + DataSpace const localCellIdx( + particleCellOffset - particlesSuperCellCtx[idx] * SuperCellSize::toRT()); + lCellIdxCtx[idx] + = DataSpaceOperations::template map(localCellIdx); + } + }); + + cupla::__syncthreads(acc); + + memory::CtxArray masterVirtualThreadIdxCtx( + workerIdx, + [&](uint32_t const linearIdx, uint32_t const) { return linearIdx - 1; }); + + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(hasValidParticleCtx[idx]) + { + auto& vThreadMasterIdx = masterVirtualThreadIdxCtx[idx]; + /* search master thread index */ + while(vThreadMasterIdx >= 0) + { + if(linearParticlesSuperCellCtx[idx] != sharedLinearSuperCellIds[vThreadMasterIdx]) + break; + + --vThreadMasterIdx; + } + ++vThreadMasterIdx; + + // load empty frame if virtual thread is the master + if(vThreadMasterIdx == linearIdx) + { + /* counter[2] -> number of used frames */ + nvidia::atomicAllInc(acc, &(counter[2]), ::alpaka::hierarchy::Blocks{}); + DestFramePtr tmpFrame = destBox.getEmptyFrame(acc); + destFramePtr[linearIdx] = tmpFrame; + destBox.setAsFirstFrame( + acc, + tmpFrame, + particlesSuperCellCtx[idx] + cellDesc.getGuardingSuperCells()); + } + } + }); + + cupla::__syncthreads(acc); + + forEachParticle([&](uint32_t const linearIdx, uint32_t const idx) { + if(hasValidParticleCtx[idx]) + { + /* copy attributes and activate particle*/ + auto parDest = destFramePtr[masterVirtualThreadIdxCtx[idx]][linearIdx]; + auto parDestDeselect = deselect>(parDest); + + assign(parDestDeselect, srcFrame[srcParticleIdxCtx[idx]]); + parDest[localCellIdx_] = lCellIdxCtx[idx]; + parDest[multiMask_] = 1; + /* counter[1] -> number of loaded particles + * this counter is evaluated on host side + * (check that loaded particles by this kernel == loaded particles from HDF5 file)*/ + nvidia::atomicAllInc(acc, &(counter[1]), ::alpaka::hierarchy::Blocks{}); + } + }); } + }; + } // namespace kernel + + /** Copy particles from big frame to PMacc frame structure + * (Opposite to ConcatListOfFrames) + * + * - convert a user-defined domainCellIdx to localCellIdx + * - processed particles per block <= number of cells per superCell + * + * @tparam T_LogLvl type of the loc level for debuging output + * @tparam T_DestSpecies pmacc:ParticlesBase, type of the destination species + * @tparam T_SrcFrame pmacc:ParticlesBox, type of the source particle frame + * @tparam T_Space pmacc::DataSpace, type for indicies and offsets within the domain + * @tparam T_Identifier Identifier, type of the identifier for the total domain offset + * @tparam T_CellDescription pmacc::MappingDescription, type of the domain description + * + * @param destSpecies particle species instance whose deviceBuffer is written + * @param srcFrame device frame with particles (is used as source) + * @param numParticles number of particles in srcFrame + * @param chunkSize number of particles to process in one kernel call + * @param localDomainCellOffset offset in cells to user-defined domain (@see wiki PIConGPU domain + * definitions) + * @param domainCellIdxIdentifier the identifier for the particle domain cellIdx + * that is calculated back to the local domain + * with respect to localDomainCellOffset + * @param cellDesc supercell domain description + * @param logLvl Log level used for information logging + */ + template< + typename T_LogLvl, + typename T_DestSpecies, + typename T_SrcFrame, + typename T_Space, + typename T_Identifier, + typename T_CellDescription> + HINLINE void splitIntoListOfFrames( + T_DestSpecies& destSpecies, + T_SrcFrame srcFrame, + uint32_t numParticles, + uint32_t const chunkSize, + T_Space const& localDomainCellOffset, + T_Identifier const domainCellIdxIdentifier, + T_CellDescription const& cellDesc, + T_LogLvl const& logLvl = T_LogLvl()) + { + using SuperCellSize = typename T_CellDescription::SuperCellSize; + uint32_t const cellsInSuperCell = pmacc::math::CT::volume::type::value; + + /* counter is used to apply for work, count used frames and count loaded particles + * [0] -> offset for loading particles + * [1] -> number of loaded particles + * [2] -> number of used frames + * + * all values are zero after initialization + */ + GridBuffer counterBuffer(DataSpace(3)); + + uint32_t const iterationsForLoad + = math::float2int_ru(static_cast(numParticles) / static_cast(chunkSize)); + uint32_t leftOverParticles = numParticles; + + for(uint32_t i = 0; i < iterationsForLoad; ++i) + { + /* only load a chunk of particles per iteration to avoid blow up of frame usage */ + uint32_t currentChunkSize = std::min(leftOverParticles, chunkSize); + log(logLvl, "load particles on device chunk offset=%1%; chunk size=%2%; left particles %3%") + % (i * chunkSize) % currentChunkSize % leftOverParticles; + + constexpr uint32_t numWorkers + = pmacc::traits::GetNumWorkers::type::value>::value; + + PMACC_KERNEL(kernel::SplitIntoListOfFrames{}) + (math::float2int_ru(double(currentChunkSize) / double(cellsInSuperCell)), numWorkers)( + counterBuffer.getDeviceBuffer().getDataBox(), + destSpecies.getDeviceParticlesBox(), + srcFrame, + static_cast(numParticles), + localDomainCellOffset, + domainCellIdxIdentifier, + cellDesc); + destSpecies.fillAllGaps(); + leftOverParticles -= currentChunkSize; } - ); - __syncthreads(); + counterBuffer.deviceToHost(); + log(logLvl, "wait for last processed chunk: %1%") % T_SrcFrame::getName(); + + __getTransactionEvent().waitForFinished(); - forEachParticle( - [&]( - uint32_t const linearIdx, - uint32_t const idx - ) + log(logLvl, "used frames to load particles: %1%") % counterBuffer.getHostBuffer().getDataBox()[2]; + + if(static_cast(counterBuffer.getHostBuffer().getDataBox()[1]) != numParticles) { - if( hasValidParticleCtx[ idx ] ) - { - /* copy attributes and activate particle*/ - auto parDest = destFramePtr[ masterVirtualThreadIdxCtx[ idx ] ][ linearIdx ]; - auto parDestDeselect = deselect< - bmpl::vector2< - localCellIdx, - multiMask - > - >( parDest ); - - assign( - parDestDeselect, - srcFrame[ srcParticleIdxCtx[ idx ] ] - ); - parDest[ localCellIdx_ ] = lCellIdxCtx[ idx ]; - parDest[ multiMask_ ] = 1; - /* counter[1] -> number of loaded particles - * this counter is evaluated on host side - * (check that loaded particles by this kernel == loaded particles from HDF5 file)*/ - nvidia::atomicAllInc( - acc, - &( counter[ 1 ] ), - ::alpaka::hierarchy::Blocks{} - ); - } + log(logLvl, "error load species | counter is %1% but should %2%") + % counterBuffer.getHostBuffer().getDataBox()[1] % numParticles; + throw std::runtime_error("Failed to load expected number of particles to GPU."); } - ); - } - }; -} // namespace kernel - - /** Copy particles from big frame to PMacc frame structure - * (Opposite to ConcatListOfFrames) - * - * - convert a user-defined domainCellIdx to localCellIdx - * - processed particles per block <= number of cells per superCell - * - * @tparam T_LogLvl type of the loc level for debuging output - * @tparam T_DestSpecies pmacc:ParticlesBase, type of the destination species - * @tparam T_SrcFrame pmacc:ParticlesBox, type of the source particle frame - * @tparam T_Space pmacc::DataSpace, type for indicies and offsets within the domain - * @tparam T_Identifier Identifier, type of the identifier for the total domain offset - * @tparam T_CellDescription pmacc::MappingDescription, type of the domain description - * - * @param destSpecies particle species instance whose deviceBuffer is written - * @param srcFrame device frame with particles (is used as source) - * @param numParticles number of particles in srcFrame - * @param chunkSize number of particles to process in one kernel call - * @param localDomainCellOffset offset in cells to user-defined domain (@see wiki PIConGPU domain definitions) - * @param domainCellIdxIdentifier the identifier for the particle domain cellIdx - * that is calculated back to the local domain - * with respect to localDomainCellOffset - * @param cellDesc supercell domain description - * @param logLvl Log level used for information logging - */ - template< - typename T_LogLvl, - typename T_DestSpecies, - typename T_SrcFrame, - typename T_Space, - typename T_Identifier, - typename T_CellDescription - > - HINLINE void splitIntoListOfFrames( - T_DestSpecies & destSpecies, - T_SrcFrame srcFrame, - uint32_t numParticles, - uint32_t const chunkSize, - T_Space const & localDomainCellOffset, - T_Identifier const domainCellIdxIdentifier, - T_CellDescription const & cellDesc, - T_LogLvl const & logLvl = T_LogLvl( ) - ) - { - using SuperCellSize = typename T_CellDescription::SuperCellSize; - uint32_t const cellsInSuperCell = pmacc::math::CT::volume< SuperCellSize >::type::value; - - /* counter is used to apply for work, count used frames and count loaded particles - * [0] -> offset for loading particles - * [1] -> number of loaded particles - * [2] -> number of used frames - * - * all values are zero after initialization - */ - GridBuffer< - uint32_t, - DIM1 - > counterBuffer( DataSpace( 3 ) ); - - uint32_t const iterationsForLoad = algorithms::math::float2int_ru( - static_cast< double >( numParticles ) / - static_cast< double >( chunkSize ) - ); - uint32_t leftOverParticles = numParticles; - - for( uint32_t i = 0; i < iterationsForLoad; ++i ) - { - /* only load a chunk of particles per iteration to avoid blow up of frame usage */ - uint32_t currentChunkSize = std::min( - leftOverParticles, - chunkSize - ); - log( - logLvl, - "load particles on device chunk offset=%1%; chunk size=%2%; left particles %3%" - ) % ( i * chunkSize ) % - currentChunkSize % - leftOverParticles; - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - pmacc::math::CT::volume< SuperCellSize >::type::value - >::value; - - PMACC_KERNEL( kernel::SplitIntoListOfFrames< numWorkers >{ } )( - algorithms::math::float2int_ru( double( currentChunkSize ) / double( cellsInSuperCell ) ), - numWorkers - )( - counterBuffer.getDeviceBuffer( ).getDataBox( ), - destSpecies.getDeviceParticlesBox( ), - srcFrame, - static_cast< int >( numParticles ), - localDomainCellOffset, - domainCellIdxIdentifier, - cellDesc - ); - destSpecies.fillAllGaps( ); - leftOverParticles -= currentChunkSize; - } - - counterBuffer.deviceToHost( ); - log( - logLvl, - "wait for last processed chunk: %1%" - ) % T_SrcFrame::getName( ); - - __getTransactionEvent( ).waitForFinished( ); - - log( - logLvl, - "used frames to load particles: %1%" - ) % counterBuffer.getHostBuffer( ).getDataBox( )[ 2 ]; - - if( - static_cast( counterBuffer.getHostBuffer().getDataBox( )[ 1 ] ) != - numParticles - ) - { - log( - logLvl, - "error load species | counter is %1% but should %2%" - ) % counterBuffer.getHostBuffer( ).getDataBox( )[ 1 ] % - numParticles; - throw std::runtime_error( "Failed to load expected number of particles to GPU." ); - } - } - -} // namespace operations -} // namespace particles + } + + } // namespace operations + } // namespace particles } // namespace pmacc diff --git a/include/pmacc/particles/particleFilter/FilterFactory.hpp b/include/pmacc/particles/particleFilter/FilterFactory.hpp index 92b3237b3d..649d8465e8 100644 --- a/include/pmacc/particles/particleFilter/FilterFactory.hpp +++ b/include/pmacc/particles/particleFilter/FilterFactory.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -37,27 +37,12 @@ namespace pmacc { - - - -template > + template> class FilterFactory -{ -public: - - typedef - typename LinearInherit - < - typename MakeSeq< - DefaultFilter<> , - UserTypeList, - TrueFilter - >::type - >::type FilterType; - -}; - -}//namespace pmacc - - + { + public: + typedef + typename LinearInherit, UserTypeList, TrueFilter>::type>::type FilterType; + }; +} // namespace pmacc diff --git a/include/pmacc/particles/particleFilter/PositionFilter.hpp b/include/pmacc/particles/particleFilter/PositionFilter.hpp index 40ecf1eb83..47abf22426 100644 --- a/include/pmacc/particles/particleFilter/PositionFilter.hpp +++ b/include/pmacc/particles/particleFilter/PositionFilter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -29,87 +29,82 @@ namespace pmacc { - - -namespace privatePositionFilter -{ - -template -class PositionFilter : public Base -{ -public: - static constexpr uint32_t dim = T_dim; -protected: - DataSpace offset; - DataSpace max; - DataSpace superCellIdx; - -public: - - HDINLINE PositionFilter() + namespace privatePositionFilter { - } - - HDINLINE void setWindowPosition(DataSpace offset, DataSpace size) + template + class PositionFilter : public Base + { + public: + static constexpr uint32_t dim = T_dim; + + protected: + DataSpace offset; + DataSpace max; + DataSpace superCellIdx; + + public: + HDINLINE PositionFilter() + { + } + + HDINLINE void setWindowPosition(DataSpace offset, DataSpace size) + { + this->offset = offset; + this->max = offset + size; + } + + HDINLINE void setSuperCellPosition(DataSpace superCellIdx) + { + this->superCellIdx = superCellIdx; + } + + HDINLINE DataSpace getOffset() + { + return offset; + } + + template + HDINLINE bool operator()(FRAME& frame, lcellId_t id) + { + DataSpace localCellIdx = DataSpaceOperations::template map( + (uint32_t)(frame[id][localCellIdx_])); + DataSpace pos = this->superCellIdx + localCellIdx; + bool result = true; + for(uint32_t d = 0; d < dim; ++d) + result = result && (this->offset[d] <= pos[d]) && (pos[d] < this->max[d]); + return Base::operator()(frame, id) && result; + } + }; + + } // namespace privatePositionFilter + + /** This wrapper class is needed because for filters we are only allowed to + * define one template parameter "base" (it is a constrain from FilterFactory) + */ + template + class PositionFilter3D : public privatePositionFilter::PositionFilter { - this->offset = offset; - this->max = offset + size; - } + }; - HDINLINE void setSuperCellPosition(DataSpace superCellIdx) + template + class PositionFilter2D : public privatePositionFilter::PositionFilter { - this->superCellIdx = superCellIdx; - } + }; - HDINLINE DataSpace getOffset() - { - return offset; - } + template + struct GetPositionFilter; - template - HDINLINE bool operator()(FRAME & frame, lcellId_t id) + template<> + struct GetPositionFilter { - DataSpace localCellIdx = DataSpaceOperations::template map< - typename FRAME::SuperCellSize - > ((uint32_t) (frame[id][localCellIdx_])); - DataSpace pos = this->superCellIdx + localCellIdx; - bool result = true; - for (uint32_t d = 0; d < dim; ++d) - result= result && (this->offset[d] <= pos[d]) && (pos[d]max[d]); - return Base::operator() (frame, id) && result; - } - -}; - -} //namespace privatePositionFilter - -/** This wrapper class is needed because for filters we are only allowed to - * define one template parameter "base" (it is a constrain from FilterFactory) - */ -template -class PositionFilter3D : public privatePositionFilter::PositionFilter -{ -}; - -template -class PositionFilter2D : public privatePositionFilter::PositionFilter -{ -}; - -template -struct GetPositionFilter; + typedef PositionFilter3D<> type; + }; -template<> -struct GetPositionFilter -{ - typedef PositionFilter3D<> type; -}; - -template<> -struct GetPositionFilter -{ - typedef PositionFilter2D<> type; -}; + template<> + struct GetPositionFilter + { + typedef PositionFilter2D<> type; + }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/particleFilter/system/DefaultFilter.hpp b/include/pmacc/particles/particleFilter/system/DefaultFilter.hpp index 1825772ec4..6e588a2a52 100644 --- a/include/pmacc/particles/particleFilter/system/DefaultFilter.hpp +++ b/include/pmacc/particles/particleFilter/system/DefaultFilter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -27,22 +27,21 @@ namespace pmacc { - - -template -class DefaultFilter : public Base -{ + template + class DefaultFilter : public Base + { private: bool filterActive; - public: + public: HDINLINE DefaultFilter() : filterActive(false) - {} + { + } template - HDINLINE bool operator()(FRAME & frame,lcellId_t id) + HDINLINE bool operator()(FRAME& frame, lcellId_t id) { - return (!filterActive)||Base::operator() (frame,id); + return (!filterActive) || Base::operator()(frame, id); } /*disable or enable filter @@ -51,41 +50,42 @@ class DefaultFilter : public Base */ HDINLINE void setStatus(bool active) { - filterActive=active; + filterActive = active; } HDINLINE bool getStatus() { return filterActive; } -}; + }; -template<> -class DefaultFilter -{ + template<> + class DefaultFilter + { private: bool alwaysTrue; - public: + public: HDINLINE DefaultFilter() : alwaysTrue(true) - {} + { + } template - HDINLINE bool operator()(FRAME & frame,lcellId_t id) + HDINLINE bool operator()(FRAME& frame, lcellId_t id) { return alwaysTrue; } HDINLINE void setDefault(bool value) { - alwaysTrue=value; + alwaysTrue = value; } HDINLINE bool getDefault() { return alwaysTrue; } -}; + }; -} //namespace Frame +} // namespace pmacc diff --git a/include/pmacc/particles/particleFilter/system/FalseFilter.hpp b/include/pmacc/particles/particleFilter/system/FalseFilter.hpp index dcde30aace..10c1dfa2cd 100644 --- a/include/pmacc/particles/particleFilter/system/FalseFilter.hpp +++ b/include/pmacc/particles/particleFilter/system/FalseFilter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -27,12 +27,9 @@ namespace pmacc { - class FalseFilter { - public: - FalseFilter() { } @@ -42,10 +39,10 @@ namespace pmacc } template - bool operator()(FRAME & frame, lcellId_t id) + bool operator()(FRAME& frame, lcellId_t id) { return false; } }; -} //namespace Frame +} // namespace pmacc diff --git a/include/pmacc/particles/particleFilter/system/TrueFilter.hpp b/include/pmacc/particles/particleFilter/system/TrueFilter.hpp index 4d0f82f20e..6bd93eeaf9 100644 --- a/include/pmacc/particles/particleFilter/system/TrueFilter.hpp +++ b/include/pmacc/particles/particleFilter/system/TrueFilter.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -27,12 +27,9 @@ namespace pmacc { - class TrueFilter { - public: - HDINLINE TrueFilter() { } @@ -44,4 +41,4 @@ namespace pmacc } }; -} //namespace Frame +} // namespace pmacc diff --git a/include/pmacc/particles/policies/DeleteParticles.hpp b/include/pmacc/particles/policies/DeleteParticles.hpp index 7924dd3fda..d08cecdbd0 100644 --- a/include/pmacc/particles/policies/DeleteParticles.hpp +++ b/include/pmacc/particles/policies/DeleteParticles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -21,28 +21,29 @@ #pragma once -namespace pmacc{ -namespace particles { -namespace policies { - - /** - * Policy for HandleGuardParticles that removes all particles from guard cells - */ - struct DeleteParticles +namespace pmacc +{ + namespace particles { - template< class T_Particles > - void - handleOutgoing(T_Particles& par, int32_t direction) const + namespace policies { - par.deleteGuardParticles(direction); - } + /** + * Policy for HandleGuardParticles that removes all particles from guard cells + */ + struct DeleteParticles + { + template + void handleOutgoing(T_Particles& par, int32_t direction) const + { + par.deleteGuardParticles(direction); + } - template< class T_Particles > - void - handleIncoming(T_Particles& par, int32_t direction) const - {} - }; + template + void handleIncoming(T_Particles& par, int32_t direction) const + { + } + }; -} // namespace policies -} // namespace particles -} // namespace pmacc + } // namespace policies + } // namespace particles +} // namespace pmacc diff --git a/include/pmacc/particles/policies/ExchangeParticles.hpp b/include/pmacc/particles/policies/ExchangeParticles.hpp index 1a431131e1..e92506bb37 100644 --- a/include/pmacc/particles/policies/ExchangeParticles.hpp +++ b/include/pmacc/particles/policies/ExchangeParticles.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -24,31 +24,31 @@ #include "pmacc/types.hpp" #include "pmacc/Environment.hpp" -namespace pmacc{ -namespace particles { -namespace policies { - - /** - * Policy for \see HandleGuardRegion that moves particles from guard cells to exchange buffers - * and sends those to the correct neighbors - */ - struct ExchangeParticles +namespace pmacc +{ + namespace particles { - template< class T_Particles > - void - handleOutgoing(T_Particles& par, int32_t direction) const + namespace policies { - Environment<>::get().ParticleFactory().createTaskSendParticlesExchange(par, direction); - } + /** + * Policy for \see HandleGuardRegion that moves particles from guard cells to exchange buffers + * and sends those to the correct neighbors + */ + struct ExchangeParticles + { + template + void handleOutgoing(T_Particles& par, int32_t direction) const + { + Environment<>::get().ParticleFactory().createTaskSendParticlesExchange(par, direction); + } - template< class T_Particles > - void - handleIncoming(T_Particles& par, int32_t direction) const - { - Environment<>::get().ParticleFactory().createTaskReceiveParticlesExchange(par, direction); - } - }; + template + void handleIncoming(T_Particles& par, int32_t direction) const + { + Environment<>::get().ParticleFactory().createTaskReceiveParticlesExchange(par, direction); + } + }; -} // namespace policies -} // namespace particles -} // namespace pmacc + } // namespace policies + } // namespace particles +} // namespace pmacc diff --git a/include/pmacc/particles/tasks/ParticleFactory.hpp b/include/pmacc/particles/tasks/ParticleFactory.hpp index 8b30832b54..4f71881cd1 100644 --- a/include/pmacc/particles/tasks/ParticleFactory.hpp +++ b/include/pmacc/particles/tasks/ParticleFactory.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -27,7 +27,6 @@ namespace pmacc { - /** * Singleton Factory-pattern class for creation of several types of EventTasks. * Tasks are not actually 'returned' but immediately initialised and @@ -36,38 +35,40 @@ namespace pmacc class ParticleFactory { public: - /** * Creates a TaskReceive. * @param ex Exchange to create new TaskReceive with * @param task_out returns the newly created task - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ template - EventTask createTaskParticlesReceive(ParBase &parBuffer, - ITask *registeringTask = nullptr); + EventTask createTaskParticlesReceive(ParBase& parBuffer, ITask* registeringTask = nullptr); template - EventTask createTaskReceiveParticlesExchange(ParBase &parBase, uint32_t exchange, - ITask *registeringTask = nullptr); + EventTask createTaskReceiveParticlesExchange( + ParBase& parBase, + uint32_t exchange, + ITask* registeringTask = nullptr); /** * Creates a TaskSend. * @param ex Exchange to create new TaskSend with * @param task_in TaskReceive to register at new TaskSend - * @param registeringTask optional pointer to an ITask which should be registered at the new task as an observer + * @param registeringTask optional pointer to an ITask which should be registered at the new task as an + * observer */ template - EventTask createTaskParticlesSend(ParBase &parBase, - ITask *registeringTask = nullptr); + EventTask createTaskParticlesSend(ParBase& parBase, ITask* registeringTask = nullptr); template - EventTask createTaskSendParticlesExchange(ParBase &parBase, uint32_t exchange, - ITask *registeringTask = nullptr); + EventTask createTaskSendParticlesExchange( + ParBase& parBase, + uint32_t exchange, + ITask* registeringTask = nullptr); private: - friend struct detail::Environment; /** @@ -80,10 +81,9 @@ namespace pmacc return instance; } - ParticleFactory() { }; - - ParticleFactory(const ParticleFactory&) { }; + ParticleFactory(){}; + ParticleFactory(const ParticleFactory&){}; }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/tasks/ParticleFactory.tpp b/include/pmacc/particles/tasks/ParticleFactory.tpp index 7698dc3772..23228f4681 100644 --- a/include/pmacc/particles/tasks/ParticleFactory.tpp +++ b/include/pmacc/particles/tasks/ParticleFactory.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -33,47 +33,43 @@ namespace pmacc { - template - inline EventTask ParticleFactory::createTaskParticlesReceive(ParBase &parBase, - ITask *registeringTask) + inline EventTask ParticleFactory::createTaskParticlesReceive(ParBase& parBase, ITask* registeringTask) { - TaskParticlesReceive* task = new TaskParticlesReceive (parBase); + TaskParticlesReceive* task = new TaskParticlesReceive(parBase); return Environment<>::get().Factory().startTask(*task, registeringTask); } template - inline EventTask ParticleFactory::createTaskReceiveParticlesExchange(ParBase &parBase, uint32_t exchange, - ITask *registeringTask) + inline EventTask ParticleFactory::createTaskReceiveParticlesExchange( + ParBase& parBase, + uint32_t exchange, + ITask* registeringTask) { - TaskReceiveParticlesExchange* task = new TaskReceiveParticlesExchange (parBase, exchange); + TaskReceiveParticlesExchange* task = new TaskReceiveParticlesExchange(parBase, exchange); return Environment<>::get().Factory().startTask(*task, registeringTask); } template - inline EventTask ParticleFactory::createTaskParticlesSend(ParBase &parBase, - ITask *registeringTask) + inline EventTask ParticleFactory::createTaskParticlesSend(ParBase& parBase, ITask* registeringTask) { - TaskParticlesSend* task = new TaskParticlesSend (parBase); + TaskParticlesSend* task = new TaskParticlesSend(parBase); return Environment<>::get().Factory().startTask(*task, registeringTask); } template - inline EventTask ParticleFactory::createTaskSendParticlesExchange(ParBase &parBase, uint32_t exchange, - ITask *registeringTask) + inline EventTask ParticleFactory::createTaskSendParticlesExchange( + ParBase& parBase, + uint32_t exchange, + ITask* registeringTask) { - TaskSendParticlesExchange* task = new TaskSendParticlesExchange (parBase, exchange); + TaskSendParticlesExchange* task = new TaskSendParticlesExchange(parBase, exchange); return Environment<>::get().Factory().startTask(*task, registeringTask); } - -} //namespace pmacc - - - - +} // namespace pmacc diff --git a/include/pmacc/particles/tasks/TaskParticlesReceive.hpp b/include/pmacc/particles/tasks/TaskParticlesReceive.hpp index ee96923fec..c604bfd604 100644 --- a/include/pmacc/particles/tasks/TaskParticlesReceive.hpp +++ b/include/pmacc/particles/tasks/TaskParticlesReceive.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -28,12 +28,10 @@ namespace pmacc { - template class TaskParticlesReceive : public MPITask { public: - typedef T_Particles Particles; typedef typename Particles::HandleGuardRegion HandleGuardRegion; typedef typename HandleGuardRegion::HandleExchanged HandleExchanged; @@ -45,9 +43,9 @@ namespace pmacc Exchanges = traits::NumberOfExchanges::value }; - TaskParticlesReceive(Particles &parBase) : - parBase(parBase), - state(Constructor){ } + TaskParticlesReceive(Particles& parBase) : parBase(parBase), state(Constructor) + { + } virtual void init() { @@ -56,13 +54,13 @@ namespace pmacc HandleExchanged handleExchanged; HandleNotExchanged handleNotExchanged; - for (int i = 1; i < Exchanges; ++i) + for(int i = 1; i < Exchanges; ++i) { /* Start new transaction */ __startTransaction(serialEvent); /* Handle particles */ - if (parBase.getParticlesBuffer().hasReceiveExchange(i)) + if(parBase.getParticlesBuffer().hasReceiveExchange(i)) handleExchanged.handleIncoming(parBase, i); else handleNotExchanged.handleIncoming(parBase, i); @@ -76,27 +74,27 @@ namespace pmacc bool executeIntern() { - switch (state) + switch(state) { - case Init: - break; - case WaitForReceived: - if (nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId())) - state = CallFillGaps; - break; - case CallFillGaps: - state = WaitForFillGaps; - __startTransaction(); - parBase.fillBorderGaps(); - tmpEvent = __endTransaction(); - state = Finish; - break; - case WaitForFillGaps: - break; - case Finish: - return nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId()); - default: - return false; + case Init: + break; + case WaitForReceived: + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId())) + state = CallFillGaps; + break; + case CallFillGaps: + state = WaitForFillGaps; + __startTransaction(); + parBase.fillBorderGaps(); + tmpEvent = __endTransaction(); + state = Finish; + break; + case WaitForFillGaps: + break; + case Finish: + return nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId()); + default: + return false; } return false; @@ -107,7 +105,9 @@ namespace pmacc notify(this->myId, RECVFINISHED, nullptr); } - void event(id_t, EventType, IEventData*) { } + void event(id_t, EventType, IEventData*) + { + } std::string toString() { @@ -115,7 +115,6 @@ namespace pmacc } private: - enum state_t { Constructor, @@ -131,7 +130,6 @@ namespace pmacc Particles& parBase; state_t state; EventTask tmpEvent; - }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/tasks/TaskParticlesSend.hpp b/include/pmacc/particles/tasks/TaskParticlesSend.hpp index 706be03e62..cbff5240ed 100644 --- a/include/pmacc/particles/tasks/TaskParticlesSend.hpp +++ b/include/pmacc/particles/tasks/TaskParticlesSend.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -27,97 +27,92 @@ namespace pmacc { - -template -class TaskParticlesSend : public MPITask -{ -public: - - typedef T_Particles Particles; - typedef typename Particles::HandleGuardRegion HandleGuardRegion; - typedef typename HandleGuardRegion::HandleExchanged HandleExchanged; - typedef typename HandleGuardRegion::HandleNotExchanged HandleNotExchanged; - - enum - { - Dim = Particles::Dim, - Exchanges = traits::NumberOfExchanges::value - }; - - TaskParticlesSend(Particles &parBase) : - parBase(parBase), - state(Constructor) + template + class TaskParticlesSend : public MPITask { - } + public: + typedef T_Particles Particles; + typedef typename Particles::HandleGuardRegion HandleGuardRegion; + typedef typename HandleGuardRegion::HandleExchanged HandleExchanged; + typedef typename HandleGuardRegion::HandleNotExchanged HandleNotExchanged; - virtual void init() - { - state = Init; - EventTask serialEvent = __getTransactionEvent(); - HandleExchanged handleExchanged; - HandleNotExchanged handleNotExchanged; - - for (int i = 1; i < Exchanges; ++i) + enum { - /* Start new transaction */ - __startTransaction(serialEvent); + Dim = Particles::Dim, + Exchanges = traits::NumberOfExchanges::value + }; - /* Handle particles */ - if (parBase.getParticlesBuffer().hasSendExchange(i)) - handleExchanged.handleOutgoing(parBase, i); - else - handleNotExchanged.handleOutgoing(parBase, i); - - /* End transaction */ - tmpEvent += __endTransaction(); + TaskParticlesSend(Particles& parBase) : parBase(parBase), state(Constructor) + { } - state = WaitForSend; - } + virtual void init() + { + state = Init; + EventTask serialEvent = __getTransactionEvent(); + HandleExchanged handleExchanged; + HandleNotExchanged handleNotExchanged; + + for(int i = 1; i < Exchanges; ++i) + { + /* Start new transaction */ + __startTransaction(serialEvent); + + /* Handle particles */ + if(parBase.getParticlesBuffer().hasSendExchange(i)) + handleExchanged.handleOutgoing(parBase, i); + else + handleNotExchanged.handleOutgoing(parBase, i); + + /* End transaction */ + tmpEvent += __endTransaction(); + } + + state = WaitForSend; + } - bool executeIntern() - { - switch (state) + bool executeIntern() { - case Init: - break; - case WaitForSend: - return nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId()); - default: + switch(state) + { + case Init: + break; + case WaitForSend: + return nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId()); + default: + return false; + } + return false; } - return false; - } + virtual ~TaskParticlesSend() + { + notify(this->myId, RECVFINISHED, nullptr); + } - virtual ~TaskParticlesSend() - { - notify(this->myId, RECVFINISHED, nullptr); - } + void event(id_t, EventType, IEventData*) + { + } - void event(id_t, EventType, IEventData*) - { - } + std::string toString() + { + return "TaskParticlesSend"; + } - std::string toString() - { - return "TaskParticlesSend"; - } + private: + enum state_t + { + Constructor, + Init, + WaitForSend -private: + }; - enum state_t - { - Constructor, - Init, - WaitForSend + Particles& parBase; + state_t state; + EventTask tmpEvent; }; - - Particles& parBase; - state_t state; - EventTask tmpEvent; -}; - -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/tasks/TaskReceiveParticlesExchange.hpp b/include/pmacc/particles/tasks/TaskReceiveParticlesExchange.hpp index d583dbadc9..76285c0fec 100644 --- a/include/pmacc/particles/tasks/TaskReceiveParticlesExchange.hpp +++ b/include/pmacc/particles/tasks/TaskReceiveParticlesExchange.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -28,25 +28,25 @@ namespace pmacc { - template class TaskReceiveParticlesExchange : public MPITask { public: - enum { Dim = ParBase::Dim, Exchanges = traits::NumberOfExchanges::value }; - TaskReceiveParticlesExchange(ParBase &parBase, uint32_t exchange) : - parBase(parBase), - exchange(exchange), - state(Constructor), - maxSize(parBase.getParticlesBuffer().getReceiveExchangeStack(exchange).getMaxParticlesCount()), - initDependency(__getTransactionEvent()), - lastSize(0) { } + TaskReceiveParticlesExchange(ParBase& parBase, uint32_t exchange) + : parBase(parBase) + , exchange(exchange) + , state(Constructor) + , maxSize(parBase.getParticlesBuffer().getReceiveExchangeStack(exchange).getMaxParticlesCount()) + , initDependency(__getTransactionEvent()) + , lastSize(0) + { + } virtual void init() { @@ -58,48 +58,49 @@ namespace pmacc bool executeIntern() { - switch (state) + switch(state) { - case Init: - break; - case WaitForReceive: - - if (nullptr == Environment<>::get().Manager().getITaskIfNotFinished(lastReceiveEvent.getTaskId())) + case Init: + break; + case WaitForReceive: + + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(lastReceiveEvent.getTaskId())) + { + state = InitInsert; + // bash is finished + __startTransaction(); + lastSize + = parBase.getParticlesBuffer().getReceiveExchangeStack(exchange).getHostParticlesCurrentSize(); + parBase.insertParticles(exchange); + tmpEvent = __endTransaction(); + initDependency = tmpEvent; + state = WaitForInsert; + } + + break; + case InitInsert: + break; + case WaitForInsert: + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId())) + { + state = Wait; + PMACC_ASSERT(lastSize <= maxSize); + // check for next bash round + if(lastSize == maxSize) + init(); // call init and run a full send cycle + else { - state = InitInsert; - //bash is finished - __startTransaction(); - lastSize = parBase.getParticlesBuffer().getReceiveExchangeStack(exchange).getHostParticlesCurrentSize(); - parBase.insertParticles(exchange); - tmpEvent = __endTransaction(); - initDependency = tmpEvent; - state = WaitForInsert; + state = Finished; + return true; } - - break; - case InitInsert: - break; - case WaitForInsert: - if (nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId())) - { - state=Wait; - PMACC_ASSERT(lastSize <= maxSize); - //check for next bash round - if (lastSize == maxSize) - init(); //call init and run a full send cycle - else - { - state = Finished; - return true; - } - } - break; - case Wait: - break; - case Finished: - return true; - default: - return false; + } + break; + case Wait: + break; + case Finished: + return true; + default: + return false; } return false; @@ -110,7 +111,9 @@ namespace pmacc notify(this->myId, RECVFINISHED, nullptr); } - void event(id_t, EventType, IEventData*) { } + void event(id_t, EventType, IEventData*) + { + } std::string toString() { @@ -118,7 +121,6 @@ namespace pmacc } private: - enum state_t { Constructor, @@ -132,8 +134,6 @@ namespace pmacc }; - - ParBase& parBase; state_t state; EventTask tmpEvent; @@ -144,4 +144,4 @@ namespace pmacc size_t lastSize; }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/tasks/TaskSendParticlesExchange.hpp b/include/pmacc/particles/tasks/TaskSendParticlesExchange.hpp index a2fba34afc..830a186ca9 100644 --- a/include/pmacc/particles/tasks/TaskSendParticlesExchange.hpp +++ b/include/pmacc/particles/tasks/TaskSendParticlesExchange.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -28,24 +28,26 @@ namespace pmacc { - template class TaskSendParticlesExchange : public MPITask { public: - enum { Dim = ParBase::Dim, }; - TaskSendParticlesExchange(ParBase &parBase, uint32_t exchange) : - parBase(parBase), - exchange(exchange), - state(Constructor), - maxSize(parBase.getParticlesBuffer().getSendExchangeStack(exchange).getMaxParticlesCount()), - initDependency(__getTransactionEvent()), - lastSize(0),lastSendEvent(EventTask()),retryCounter(0){ } + TaskSendParticlesExchange(ParBase& parBase, uint32_t exchange) + : parBase(parBase) + , exchange(exchange) + , state(Constructor) + , maxSize(parBase.getParticlesBuffer().getSendExchangeStack(exchange).getMaxParticlesCount()) + , initDependency(__getTransactionEvent()) + , lastSize(0) + , lastSendEvent(EventTask()) + , retryCounter(0) + { + } virtual void init() { @@ -58,54 +60,54 @@ namespace pmacc bool executeIntern() { - switch (state) + switch(state) { - case Init: - break; - case WaitForBash: - - if (nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId()) && - nullptr == Environment<>::get().Manager().getITaskIfNotFinished(lastSendEvent.getTaskId())) + case Init: + break; + case WaitForBash: + + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId()) + && nullptr == Environment<>::get().Manager().getITaskIfNotFinished(lastSendEvent.getTaskId())) + { + state = InitSend; + // bash is finished + __startTransaction(); + lastSize + = parBase.getParticlesBuffer().getSendExchangeStack(exchange).getDeviceParticlesCurrentSize(); + lastSendEvent = parBase.getParticlesBuffer().asyncSendParticles(__getTransactionEvent(), exchange); + initDependency = lastSendEvent; + __endTransaction(); + state = WaitForSend; + } + + break; + case InitSend: + break; + case WaitForSend: + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId())) + { + PMACC_ASSERT(lastSize <= maxSize); + // check for next bash round + if(lastSize == maxSize) { - state = InitSend; - //bash is finished - __startTransaction(); - lastSize = parBase.getParticlesBuffer().getSendExchangeStack(exchange).getDeviceParticlesCurrentSize(); - lastSendEvent = parBase.getParticlesBuffer().asyncSendParticles(__getTransactionEvent(), exchange); - initDependency = lastSendEvent; - __endTransaction(); - state = WaitForSend; + ++retryCounter; + init(); // call init and run a full send cycle } - - break; - case InitSend: - break; - case WaitForSend: - if (nullptr == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId())) - { - PMACC_ASSERT(lastSize <= maxSize); - //check for next bash round - if (lastSize == maxSize) - { - ++retryCounter; - init(); //call init and run a full send cycle - - } - else - state = WaitForSendEnd; - } - break; - case WaitForSendEnd: - if (nullptr == Environment<>::get().Manager().getITaskIfNotFinished(lastSendEvent.getTaskId())) - { - state = Finished; - return true; - } - break; - case Finished: + else + state = WaitForSendEnd; + } + break; + case WaitForSendEnd: + if(nullptr == Environment<>::get().Manager().getITaskIfNotFinished(lastSendEvent.getTaskId())) + { + state = Finished; return true; - default: - return false; + } + break; + case Finished: + return true; + default: + return false; } return false; @@ -116,16 +118,16 @@ namespace pmacc notify(this->myId, RECVFINISHED, nullptr); if(retryCounter != 0) { - std::cerr << "Send/receive buffer for species " << - ParBase::FrameType::getName() << - " is too small (max: " << maxSize << - ", direction: " << exchange << " '" << ExchangeTypeNames{}[exchange] << "'" << - ", retries: " << retryCounter << - ")" << std::endl; + std::cerr << "Send/receive buffer for species " << ParBase::FrameType::getName() + << " is too small (max: " << maxSize << ", direction: " << exchange << " '" + << ExchangeTypeNames{}[exchange] << "'" + << ", retries: " << retryCounter << ")" << std::endl; } } - void event(id_t, EventType, IEventData*) { } + void event(id_t, EventType, IEventData*) + { + } std::string toString() { @@ -133,7 +135,6 @@ namespace pmacc } private: - enum state_t { Constructor, @@ -158,4 +159,4 @@ namespace pmacc size_t retryCounter; }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/particles/traits/FilterByFlag.hpp b/include/pmacc/particles/traits/FilterByFlag.hpp index 5d5c2ca045..5f008b3f32 100644 --- a/include/pmacc/particles/traits/FilterByFlag.hpp +++ b/include/pmacc/particles/traits/FilterByFlag.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau +/* Copyright 2015-2021 Heiko Burau * * This file is part of PMacc. * @@ -28,33 +28,30 @@ namespace pmacc { -namespace particles -{ -namespace traits -{ - -/** Return a new sequence of particle species carrying flag. - * - * @tparam T_MPLSeq sequence of particle species - * @tparam T_Flag flag to be filtered - */ -template -struct FilterByFlag -{ - typedef T_MPLSeq MPLSeq; - typedef T_Flag Flag; - - template - struct HasFlag + namespace particles { - typedef typename ::pmacc::traits::HasFlag< - typename T_Species::FrameType, - Flag>::type type; - }; + namespace traits + { + /** Return a new sequence of particle species carrying flag. + * + * @tparam T_MPLSeq sequence of particle species + * @tparam T_Flag flag to be filtered + */ + template + struct FilterByFlag + { + typedef T_MPLSeq MPLSeq; + typedef T_Flag Flag; + + template + struct HasFlag + { + typedef typename ::pmacc::traits::HasFlag::type type; + }; - typedef typename bmpl::copy_if >::type type; -}; + typedef typename bmpl::copy_if>::type type; + }; -}//namespace traits -}//namespace particles -}//namespace pmacc + } // namespace traits + } // namespace particles +} // namespace pmacc diff --git a/include/pmacc/particles/traits/FilterByIdentifier.hpp b/include/pmacc/particles/traits/FilterByIdentifier.hpp index da20b3e675..772555d121 100644 --- a/include/pmacc/particles/traits/FilterByIdentifier.hpp +++ b/include/pmacc/particles/traits/FilterByIdentifier.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Heiko Burau, Rene Widera +/* Copyright 2015-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -29,42 +29,33 @@ namespace pmacc { -namespace particles -{ -namespace traits -{ - - /** Return a new sequence of species which carry the identifier. - * - * @tparam T_MPLSeq sequence of particle species - * @tparam T_Identifier identifier to be filtered - * - * @typedef type boost mpl forward sequence - */ - template< - typename T_MPLSeq, - typename T_Identifier - > - struct FilterByIdentifier + namespace particles { - using MPLSeq = T_MPLSeq; - using Identifier = T_Identifier; - - template< typename T_Species > - struct HasIdentifier + namespace traits { - using type = typename ::pmacc::traits::HasIdentifier< - typename T_Species::FrameType, - Identifier - >::type; - }; - - using type = typename bmpl::copy_if< - MPLSeq, - HasIdentifier< bmpl::_ > - >::type; - }; - -}//namespace traits -}//namespace particles -}//namespace pmacc + /** Return a new sequence of species which carry the identifier. + * + * @tparam T_MPLSeq sequence of particle species + * @tparam T_Identifier identifier to be filtered + * + * @typedef type boost mpl forward sequence + */ + template + struct FilterByIdentifier + { + using MPLSeq = T_MPLSeq; + using Identifier = T_Identifier; + + template + struct HasIdentifier + { + using type = + typename ::pmacc::traits::HasIdentifier::type; + }; + + using type = typename bmpl::copy_if>::type; + }; + + } // namespace traits + } // namespace particles +} // namespace pmacc diff --git a/include/pmacc/particles/traits/ResolveAliasFromSpecies.hpp b/include/pmacc/particles/traits/ResolveAliasFromSpecies.hpp index 8d1e080634..1301854890 100644 --- a/include/pmacc/particles/traits/ResolveAliasFromSpecies.hpp +++ b/include/pmacc/particles/traits/ResolveAliasFromSpecies.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PMacc. * @@ -29,59 +29,58 @@ namespace pmacc { -namespace particles -{ -namespace traits -{ - -/** Resolves a custom alias in the flag list of a particle species. - * - * Example: - * - * \code{.cpp} - * typedef bmpl::vector< - * particlePusher, - * shape, - * interpolation, - * current, - * massRatio, - * chargeRatio, - * synchrotronPhotons - * > ParticleFlagsElectrons; - * - * typedef picongpu::Particles< - * PMACC_CSTRING( "e" ), - * ParticleFlagsElectrons, - * DefaultAttributesSeq - * > PIC_Electrons; - * - * typedef typename ResolveAliasFromSpecies< - * PIC_Electrons, - * synchrotronPhotons<> - * >::type PhotonSpecies; - * boost::static_assert(boost::is_same::value); - * \endcode - * - * \tparam T_SpeciesType particle species - * \tparam T_Alias alias - */ -template -struct ResolveAliasFromSpecies; + namespace particles + { + namespace traits + { + /** Resolves a custom alias in the flag list of a particle species. + * + * Example: + * + * \code{.cpp} + * typedef bmpl::vector< + * particlePusher, + * shape, + * interpolation, + * current, + * massRatio, + * chargeRatio, + * synchrotronPhotons + * > ParticleFlagsElectrons; + * + * typedef picongpu::Particles< + * PMACC_CSTRING( "e" ), + * ParticleFlagsElectrons, + * DefaultAttributesSeq + * > PIC_Electrons; + * + * typedef typename ResolveAliasFromSpecies< + * PIC_Electrons, + * synchrotronPhotons<> + * >::type PhotonSpecies; + * boost::static_assert(boost::is_same::value); + * \endcode + * + * \tparam T_SpeciesType particle species + * \tparam T_Alias alias + */ + template + struct ResolveAliasFromSpecies; -template class T_Object, typename T_AnyType> -struct ResolveAliasFromSpecies > -{ - typedef T_SpeciesType SpeciesType; - typedef T_Object Alias; - typedef typename SpeciesType::FrameType FrameType; + template class T_Object, typename T_AnyType> + struct ResolveAliasFromSpecies> + { + typedef T_SpeciesType SpeciesType; + typedef T_Object Alias; + typedef typename SpeciesType::FrameType FrameType; - /* The following line only fetches the alias */ - typedef typename pmacc::traits::GetFlagType::type FoundAlias; + /* The following line only fetches the alias */ + typedef typename pmacc::traits::GetFlagType::type FoundAlias; - /* This now resolves the alias into the actual object type */ - typedef typename pmacc::traits::Resolve::type type; -}; // struct ResolveAliasFromSpecies + /* This now resolves the alias into the actual object type */ + typedef typename pmacc::traits::Resolve::type type; + }; // struct ResolveAliasFromSpecies -} // namespace traits -} // namespace particles + } // namespace traits + } // namespace particles } // namespace pmacc diff --git a/include/pmacc/pluginSystem/INotify.hpp b/include/pmacc/pluginSystem/INotify.hpp index 92be33c262..268bce9623 100644 --- a/include/pmacc/pluginSystem/INotify.hpp +++ b/include/pmacc/pluginSystem/INotify.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Axel Huebl, +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Axel Huebl, * Richard Pausch * * This file is part of PMacc. @@ -34,7 +34,6 @@ namespace pmacc uint32_t lastNotify; public: - INotify() : lastNotify(0) { } @@ -50,7 +49,7 @@ namespace pmacc * * @param currentStep current simulation iteration step */ - virtual void notify( uint32_t currentStep ) = 0; + virtual void notify(uint32_t currentStep) = 0; /** When was the plugin notified last? * @@ -65,10 +64,9 @@ namespace pmacc * * @param currentStep current simulation iteration step */ - void setLastNotify( uint32_t currentStep ) + void setLastNotify(uint32_t currentStep) { lastNotify = currentStep; } - }; -} +} // namespace pmacc diff --git a/include/pmacc/pluginSystem/IPlugin.hpp b/include/pmacc/pluginSystem/IPlugin.hpp index cdb4671834..d4ce16b2b2 100644 --- a/include/pmacc/pluginSystem/IPlugin.hpp +++ b/include/pmacc/pluginSystem/IPlugin.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Richard Pausch +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Richard Pausch * * This file is part of PMacc. * @@ -39,7 +39,6 @@ namespace pmacc class PluginException : public std::runtime_error { public: - PluginException(const char* message) : std::runtime_error(message) { } @@ -55,11 +54,8 @@ namespace pmacc class IPlugin : public INotify { public: - - IPlugin() : - loaded(false), lastCheckpoint(0) + IPlugin() : loaded(false), lastCheckpoint(0) { - } virtual ~IPlugin() @@ -128,7 +124,8 @@ namespace pmacc * \param direction the direction the particles are leaving the simulation */ virtual void onParticleLeave(const std::string& /*speciesName*/, const int32_t /*direction*/) - {} + { + } /** When was the plugin checkpointed last? * @@ -143,7 +140,7 @@ namespace pmacc * * @param currentStep current simulation iteration step */ - void setLastCheckpoint( uint32_t currentStep ) + void setLastCheckpoint(uint32_t currentStep) { lastCheckpoint = currentStep; } @@ -162,4 +159,4 @@ namespace pmacc bool loaded; uint32_t lastCheckpoint; }; -} +} // namespace pmacc diff --git a/include/pmacc/pluginSystem/PluginConnector.hpp b/include/pmacc/pluginSystem/PluginConnector.hpp index d163715d4c..17f36206b6 100644 --- a/include/pmacc/pluginSystem/PluginConnector.hpp +++ b/include/pmacc/pluginSystem/PluginConnector.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Felix Schmitt, Axel Huebl, Benjamin Worpitz, +/* Copyright 2013-2021 Rene Widera, Felix Schmitt, Axel Huebl, Benjamin Worpitz, * Heiko Burau * * This file is part of PMacc. @@ -43,15 +43,11 @@ namespace pmacc class PluginConnector { private: - using SeqOfTimeSlices = std::vector< pluginSystem::TimeSlice >; - using PluginPair = std::pair< - INotify*, - SeqOfTimeSlices - >; - using NotificationList = std::list< PluginPair >; + using SeqOfTimeSlices = std::vector; + using PluginPair = std::pair; + using NotificationList = std::list; public: - /** Register a plugin for loading/unloading and notifications * * Plugins are loaded in the order they are registered and unloaded in reverse order. @@ -60,9 +56,9 @@ namespace pmacc * * @param plugin plugin to register */ - void registerPlugin(IPlugin *plugin) + void registerPlugin(IPlugin* plugin) { - if (plugin != nullptr) + if(plugin != nullptr) { plugins.push_back(plugin); } @@ -76,10 +72,9 @@ namespace pmacc void loadPlugins() { // load all plugins - for (std::list::iterator iter = plugins.begin(); - iter != plugins.end(); ++iter) + for(std::list::iterator iter = plugins.begin(); iter != plugins.end(); ++iter) { - if (!(*iter)->isLoaded()) + if(!(*iter)->isLoaded()) { (*iter)->load(); } @@ -92,10 +87,9 @@ namespace pmacc void unloadPlugins() { // unload all plugins - for (std::list::reverse_iterator iter = plugins.rbegin(); - iter != plugins.rend(); ++iter) + for(std::list::reverse_iterator iter = plugins.rbegin(); iter != plugins.rend(); ++iter) { - if ((*iter)->isLoaded()) + if((*iter)->isLoaded()) { (*iter)->unload(); } @@ -111,8 +105,7 @@ namespace pmacc { std::list help_options; - for (std::list::iterator iter = plugins.begin(); - iter != plugins.end(); ++iter) + for(std::list::iterator iter = plugins.begin(); iter != plugins.end(); ++iter) { // create a new help options section for this plugin, // fill it and add to list of options @@ -129,17 +122,14 @@ namespace pmacc * @param notifiedObj the object to notify, e.g. an IPlugin instance * @param period notification period */ - void setNotificationPeriod(INotify* notifiedObj, std::string const & period) + void setNotificationPeriod(INotify* notifiedObj, std::string const& period) { - if (notifiedObj != nullptr) + if(notifiedObj != nullptr) { - if( !period.empty() ) + if(!period.empty()) { - SeqOfTimeSlices seqTimeSlices = pluginSystem::toTimeSlice( period ); - notificationList.push_back( std::make_pair( - notifiedObj, - seqTimeSlices - ) ); + SeqOfTimeSlices seqTimeSlices = pluginSystem::toTimeSlice(period); + notificationList.push_back(std::make_pair(notifiedObj, seqTimeSlices)); } } else @@ -153,15 +143,9 @@ namespace pmacc */ void notifyPlugins(uint32_t currentStep) { - for (NotificationList::iterator iter = notificationList.begin(); - iter != notificationList.end(); ++iter) + for(NotificationList::iterator iter = notificationList.begin(); iter != notificationList.end(); ++iter) { - if( - containsStep( - (*iter).second, - currentStep - ) - ) + if(containsStep((*iter).second, currentStep)) { INotify* notifiedObj = iter->first; notifiedObj->notify(currentStep); @@ -178,8 +162,7 @@ namespace pmacc */ void checkpointPlugins(uint32_t currentStep, const std::string checkpointDirectory) { - for (std::list::iterator iter = plugins.begin(); - iter != plugins.end(); ++iter) + for(std::list::iterator iter = plugins.begin(); iter != plugins.end(); ++iter) { (*iter)->checkpoint(currentStep, checkpointDirectory); (*iter)->setLastCheckpoint(currentStep); @@ -194,8 +177,7 @@ namespace pmacc */ void restartPlugins(uint32_t restartStep, const std::string restartDirectory) { - for (std::list::iterator iter = plugins.begin(); - iter != plugins.end(); ++iter) + for(std::list::iterator iter = plugins.begin(); iter != plugins.end(); ++iter) { (*iter)->restart(restartStep, restartDirectory); } @@ -211,9 +193,7 @@ namespace pmacc std::vector getPluginsFromType() { std::vector result; - for(std::list::iterator iter = plugins.begin(); - iter != plugins.end(); - iter++) + for(std::list::iterator iter = plugins.begin(); iter != plugins.end(); iter++) { Plugin* plugin = dynamic_cast(*iter); if(plugin != nullptr) @@ -232,7 +212,6 @@ namespace pmacc } private: - friend struct detail::Environment; static PluginConnector& getInstance() @@ -243,15 +222,13 @@ namespace pmacc PluginConnector() { - } virtual ~PluginConnector() { - } std::list plugins; NotificationList notificationList; }; -} +} // namespace pmacc diff --git a/include/pmacc/pluginSystem/TimeSlice.hpp b/include/pmacc/pluginSystem/TimeSlice.hpp index e101248824..86da516aba 100644 --- a/include/pmacc/pluginSystem/TimeSlice.hpp +++ b/include/pmacc/pluginSystem/TimeSlice.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PMacc. * @@ -30,54 +30,50 @@ namespace pmacc { -namespace pluginSystem -{ - struct TimeSlice + namespace pluginSystem { - /** time slice configuration - * - * 0 = begin of the interval - * 1 = end of the interval - * 2 = period - */ - std::array< uint32_t, 3 > values; - - std::string toString() const + struct TimeSlice { - std::string result; - result = std::to_string(values[0]) + ":" + - std::to_string(values[1]) + ":" + - std::to_string(values[2]); - return result; - } + /** time slice configuration + * + * 0 = begin of the interval + * 1 = end of the interval + * 2 = period + */ + std::array values; - /** set the value - * - * if str is empty the default value for the given index is selected - * - * @param idx index to set, range [0,3) - * @param str value to set, can be empty - */ - void setValue(uint32_t const idx, std::string const & str) - { - if(!str.empty()) + std::string toString() const { - uint32_t value = std::stoul( str ); - PMACC_VERIFY_MSG( - !( idx == 2 && value == 0 ), - "Zero is not a valid period" - ); - values.at( idx ) = value; + std::string result; + result = std::to_string(values[0]) + ":" + std::to_string(values[1]) + ":" + std::to_string(values[2]); + return result; } - } - //! create a time slice instance - TimeSlice() : - /* default: start:end:period - * -1 stored as unsigned is the highest available unsigned integer + /** set the value + * + * if str is empty the default value for the given index is selected + * + * @param idx index to set, range [0,3) + * @param str value to set, can be empty */ - values( { 0, uint32_t( -1 ), 1 } ) - { } - }; -} // namespace pluginSystem + void setValue(uint32_t const idx, std::string const& str) + { + if(!str.empty()) + { + uint32_t value = std::stoul(str); + PMACC_VERIFY_MSG(!(idx == 2 && value == 0), "Zero is not a valid period"); + values.at(idx) = value; + } + } + + //! create a time slice instance + TimeSlice() + : /* default: start:end:period + * -1 stored as unsigned is the highest available unsigned integer + */ + values({0, uint32_t(-1), 1}) + { + } + }; + } // namespace pluginSystem } // namespace pmacc diff --git a/include/pmacc/pluginSystem/containsStep.hpp b/include/pmacc/pluginSystem/containsStep.hpp index cbd473bed8..b82fa46c21 100644 --- a/include/pmacc/pluginSystem/containsStep.hpp +++ b/include/pmacc/pluginSystem/containsStep.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PMacc. * @@ -28,32 +28,26 @@ namespace pmacc { -namespace pluginSystem -{ - /** check if a given step is within an interval list - * - * @param seqTimeSlices vector with time intervals - * @param timeStep simulation time step to check - * @return true if step is included in the interval list else false - */ - HINLINE bool containsStep( - std::vector< pluginSystem::TimeSlice > const & seqTimeSlices, - uint32_t const timeStep - ) + namespace pluginSystem { - for(auto const & timeSlice : seqTimeSlices) + /** check if a given step is within an interval list + * + * @param seqTimeSlices vector with time intervals + * @param timeStep simulation time step to check + * @return true if step is included in the interval list else false + */ + HINLINE bool containsStep(std::vector const& seqTimeSlices, uint32_t const timeStep) { - if( - timeStep >= timeSlice.values[ 0 ] && - timeStep <= timeSlice.values[ 1 ] - ) + for(auto const& timeSlice : seqTimeSlices) { - uint32_t const timeRelativeToStart = timeStep - timeSlice.values[ 0 ]; - if( timeRelativeToStart % timeSlice.values[ 2 ] == 0 ) - return true; + if(timeStep >= timeSlice.values[0] && timeStep <= timeSlice.values[1]) + { + uint32_t const timeRelativeToStart = timeStep - timeSlice.values[0]; + if(timeRelativeToStart % timeSlice.values[2] == 0) + return true; + } } + return false; } - return false; - } -} // namespace pluginSystem + } // namespace pluginSystem } // namespace pmacc diff --git a/include/pmacc/pluginSystem/toTimeSlice.hpp b/include/pmacc/pluginSystem/toTimeSlice.hpp index e1eac66114..76b21242f0 100644 --- a/include/pmacc/pluginSystem/toTimeSlice.hpp +++ b/include/pmacc/pluginSystem/toTimeSlice.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PMacc. * @@ -36,73 +36,56 @@ namespace pmacc { -namespace pluginSystem -{ -namespace detail -{ - /** check if string contains only digits - * - * @param str string to check - * @return true if str contains only digits else false - */ - HINLINE bool is_number( std::string const & str ) + namespace pluginSystem { - return std::all_of( - str.begin(), - str.end(), - ::isdigit - ); - } -} // namespace detail - - /** create a TimeSlice out of an string - * - * Parse a comma separated list of time slices and creates a vector of TimeSlices. - * TimeSlice Syntax: - * - `start:stop:period` - * - a number ``N is equal to `::N` - */ - HINLINE std::vector< TimeSlice > toTimeSlice( std::string const & str ) - { - std::vector< TimeSlice > result; - auto const seqOfSlices = misc::splitString( - str, - "," - ); - for( auto const & slice : seqOfSlices ) + namespace detail { - auto const sliceComponents = misc::splitString( - slice, - ":" - ); - PMACC_VERIFY_MSG( - !sliceComponents.empty( ), - std::string( "time slice without a defined element is not allowed" ) + str - ); + /** check if string contains only digits + * + * @param str string to check + * @return true if str contains only digits else false + */ + HINLINE bool is_number(std::string const& str) + { + return std::all_of(str.begin(), str.end(), ::isdigit); + } + } // namespace detail - // id of the component - size_t n = 0; - bool const hasOnlyPeriod = sliceComponents.size() == 1u; - TimeSlice timeSlice; - for( auto& component : sliceComponents ) + /** create a TimeSlice out of an string + * + * Parse a comma separated list of time slices and creates a vector of TimeSlices. + * TimeSlice Syntax: + * - `start:stop:period` + * - a number ``N is equal to `::N` + */ + HINLINE std::vector toTimeSlice(std::string const& str) + { + std::vector result; + auto const seqOfSlices = misc::splitString(str, ","); + for(auto const& slice : seqOfSlices) { - // be sure that component it is a number or empty + auto const sliceComponents = misc::splitString(slice, ":"); PMACC_VERIFY_MSG( - component.empty() || detail::is_number( component ), - std::string("value") + component + - " in " + str + "is not a number" - ); + !sliceComponents.empty(), + std::string("time slice without a defined element is not allowed") + str); - timeSlice.setValue( - hasOnlyPeriod ? 2 : n, - component - ); - n++; - } - result.push_back( timeSlice ); + // id of the component + size_t n = 0; + bool const hasOnlyPeriod = sliceComponents.size() == 1u; + TimeSlice timeSlice; + for(auto& component : sliceComponents) + { + // be sure that component it is a number or empty + PMACC_VERIFY_MSG( + component.empty() || detail::is_number(component), + std::string("value") + component + " in " + str + "is not a number"); + timeSlice.setValue(hasOnlyPeriod ? 2 : n, component); + n++; + } + result.push_back(timeSlice); + } + return result; } - return result; - } -} // namespace pluginSystem + } // namespace pluginSystem } // namespace pmacc diff --git a/include/pmacc/ppFunctions.hpp b/include/pmacc/ppFunctions.hpp index 1ed6980142..fa0d1a4a13 100644 --- a/include/pmacc/ppFunctions.hpp +++ b/include/pmacc/ppFunctions.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Rene Widera * * This file is part of PMacc. * @@ -26,22 +26,22 @@ #include -#define PMACC_MIN(x,y) (((x)<=(y))?x:y) -#define PMACC_MAX(x,y) (((x)>(y))?x:y) +#define PMACC_MIN(x, y) (((x) <= (y)) ? x : y) +#define PMACC_MAX(x, y) (((x) > (y)) ? x : y) -#define PMACC_JOIN_DO(x,y) x##y -#define PMACC_JOIN(x,y) PMACC_JOIN_DO(x,y) +#define PMACC_JOIN_DO(x, y) x##y +#define PMACC_JOIN(x, y) PMACC_JOIN_DO(x, y) -#define PMACC_MAX_DO(what,x,y) (((x)>(y))?x what:y what) -#define PMACC_MIN_DO(what,x,y) (((x)<(y))?x what:y what) +#define PMACC_MAX_DO(what, x, y) (((x) > (y)) ? x what : y what) +#define PMACC_MIN_DO(what, x, y) (((x) < (y)) ? x what : y what) #ifdef PMACC_PP_VARIADIC_SIZE -# define PMACC_COUNT_ARGS_DEF(type,...) (PMACC_PP_VARIADIC_SIZE(__VA_ARGS__)) +# define PMACC_COUNT_ARGS_DEF(type, ...) (PMACC_PP_VARIADIC_SIZE(__VA_ARGS__)) #else - // A fallback implementation using compound literals, supported by some compilers -# define PMACC_COUNT_ARGS_DEF(type,...) (sizeof((type[]){type{}, ##__VA_ARGS__})/sizeof(type)-1u) +// A fallback implementation using compound literals, supported by some compilers +# define PMACC_COUNT_ARGS_DEF(type, ...) (sizeof((type[]){type{}, ##__VA_ARGS__}) / sizeof(type) - 1u) #endif /** @@ -50,7 +50,7 @@ * @param type type of the arguments in ... * @param ... arguments */ -#define PMACC_COUNT_ARGS(type,...) PMACC_COUNT_ARGS_DEF(type,__VA_ARGS__) +#define PMACC_COUNT_ARGS(type, ...) PMACC_COUNT_ARGS_DEF(type, __VA_ARGS__) /** * Check if ... has arguments or not @@ -60,7 +60,7 @@ * @param ... arguments * @return false if no arguments are given, else true */ -#define PMACC_HAS_ARGS(...) (PMACC_COUNT_ARGS(int,__VA_ARGS__)>0) +#define PMACC_HAS_ARGS(...) (PMACC_COUNT_ARGS(int, __VA_ARGS__) > 0) /** round up to next higher pow 2 value * @@ -71,12 +71,12 @@ * @param value integral number between [1,Inf] * @return next higher pow 2 value */ -#define PMACC_ROUND_UP_NEXT_POW2(value) \ - ((value)==1?1: \ - ((value)<=2?2: \ - ((value)<=4?4: \ - ((value)<=8?8: \ - ((value)<=16?16: \ - ((value)<=32?32: \ - ((value)<=64?64:128 \ - ))))))) +#define PMACC_ROUND_UP_NEXT_POW2(value) \ + ((value) == 1 \ + ? 1 \ + : ((value) <= 2 \ + ? 2 \ + : ((value) <= 4 \ + ? 4 \ + : ((value) <= 8 ? 8 \ + : ((value) <= 16 ? 16 : ((value) <= 32 ? 32 : ((value) <= 64 ? 64 : 128))))))) diff --git a/include/pmacc/preprocessor/facilities.hpp b/include/pmacc/preprocessor/facilities.hpp index 04ee2ba3ec..1906c20069 100644 --- a/include/pmacc/preprocessor/facilities.hpp +++ b/include/pmacc/preprocessor/facilities.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PMacc. * @@ -28,14 +28,14 @@ #define PMACC_PP_DEFER_ECHO() PMACC_PP_ECHO /** get the first element of a preprocessor pair */ -#define PMACC_PP_FIRST(first,second) first +#define PMACC_PP_FIRST(first, second) first /** get the first element of a preprocessor pair with delay */ #define PMACC_PP_DEFER_FIRST() PMACC_PP_FIRST /** get the second element of a preprocessor pair */ -#define PMACC_PP_SECOND(first,second) second +#define PMACC_PP_SECOND(first, second) second /** get the second element of a preprocessor pair with delay */ #define PMACC_PP_DEFER_SECOND() PMACC_PP_SECOND @@ -52,4 +52,4 @@ /** call the given macro with the given argument. * can be used as a helper for expanding arguments that are lists */ -#define PMACC_PP_CALL(macro,argument) macro argument +#define PMACC_PP_CALL(macro, argument) macro argument diff --git a/include/pmacc/preprocessor/size.hpp b/include/pmacc/preprocessor/size.hpp index 72ba10e57c..666ef07fb5 100644 --- a/include/pmacc/preprocessor/size.hpp +++ b/include/pmacc/preprocessor/size.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Sergei Bastrakov +/* Copyright 2018-2021 Sergei Bastrakov * * This file is part of PMacc. * @@ -32,51 +32,382 @@ * for usage check if PMACC_PP_VARIADIC_SIZE is defined. * Implementation is essentially the same as BOOST_PP_VARIADIC_SIZE, * but supports up to 120 arguments instead of 64. - * The implementation uses the “paired, sliding arg list†trick + * The implementation uses the "paired, sliding arg list" trick * explained in https://codecraft.co/2014/11/25/variadic-macros-tricks/ */ -#if( BOOST_PP_VARIADICS == 1 ) -# define PMACC_PP_VARIADIC_SIZE_I( \ - e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, \ - e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, \ - e30, e31, e32, e33, e34, e35, e36, e37, e38, e39, e40, e41, e42, e43, \ - e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55, e56, e57, \ - e58, e59, e60, e61, e62, e63, e64, e65, e66, e67, e68, e69, e70, e71, \ - e72, e73, e74, e75, e76, e77, e78, e79, e80, e81, e82, e83, e84, e85, \ - e86, e87, e88, e89, e90, e91, e92, e93, e94, e95, e96, e97, e98, e99, \ - e100, e101, e102, e103, e104, e105, e106, e107, e108, e109, e110, \ - e111, e112, e113, e114, e115, e116, e117, e118, e119, \ - size, ... \ - ) size -# if BOOST_PP_VARIADICS_MSVC -# define PMACC_PP_VARIADIC_SIZE(...) \ - BOOST_PP_CAT( \ - PMACC_PP_VARIADIC_SIZE_I( \ - __VA_ARGS__, \ - 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, \ - 109, 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, \ - 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, \ - 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, \ - 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, \ - 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, \ - 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, \ - 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, \ - 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, \ - ), \ - ) -# else -# define PMACC_PP_VARIADIC_SIZE(...) \ - PMACC_PP_VARIADIC_SIZE_I( \ - __VA_ARGS__, \ - 120, 119, 118, 117, 116, 115, 114, 113, 112, 111, 110, 109, \ - 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, 96, \ - 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, \ - 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, \ - 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \ - 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, \ - 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, \ - 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, \ - 4, 3, 2, 1, \ - ) -# endif +#if(BOOST_PP_VARIADICS == 1) +# define PMACC_PP_VARIADIC_SIZE_I( \ + e0, \ + e1, \ + e2, \ + e3, \ + e4, \ + e5, \ + e6, \ + e7, \ + e8, \ + e9, \ + e10, \ + e11, \ + e12, \ + e13, \ + e14, \ + e15, \ + e16, \ + e17, \ + e18, \ + e19, \ + e20, \ + e21, \ + e22, \ + e23, \ + e24, \ + e25, \ + e26, \ + e27, \ + e28, \ + e29, \ + e30, \ + e31, \ + e32, \ + e33, \ + e34, \ + e35, \ + e36, \ + e37, \ + e38, \ + e39, \ + e40, \ + e41, \ + e42, \ + e43, \ + e44, \ + e45, \ + e46, \ + e47, \ + e48, \ + e49, \ + e50, \ + e51, \ + e52, \ + e53, \ + e54, \ + e55, \ + e56, \ + e57, \ + e58, \ + e59, \ + e60, \ + e61, \ + e62, \ + e63, \ + e64, \ + e65, \ + e66, \ + e67, \ + e68, \ + e69, \ + e70, \ + e71, \ + e72, \ + e73, \ + e74, \ + e75, \ + e76, \ + e77, \ + e78, \ + e79, \ + e80, \ + e81, \ + e82, \ + e83, \ + e84, \ + e85, \ + e86, \ + e87, \ + e88, \ + e89, \ + e90, \ + e91, \ + e92, \ + e93, \ + e94, \ + e95, \ + e96, \ + e97, \ + e98, \ + e99, \ + e100, \ + e101, \ + e102, \ + e103, \ + e104, \ + e105, \ + e106, \ + e107, \ + e108, \ + e109, \ + e110, \ + e111, \ + e112, \ + e113, \ + e114, \ + e115, \ + e116, \ + e117, \ + e118, \ + e119, \ + size, \ + ...) \ + size +# if BOOST_PP_VARIADICS_MSVC +# define PMACC_PP_VARIADIC_SIZE(...) \ + BOOST_PP_CAT( \ + PMACC_PP_VARIADIC_SIZE_I( \ + __VA_ARGS__, \ + 120, \ + 119, \ + 118, \ + 117, \ + 116, \ + 115, \ + 114, \ + 113, \ + 112, \ + 111, \ + 110, \ + 109, \ + 108, \ + 107, \ + 106, \ + 105, \ + 104, \ + 103, \ + 102, \ + 101, \ + 100, \ + 99, \ + 98, \ + 97, \ + 96, \ + 95, \ + 94, \ + 93, \ + 92, \ + 91, \ + 90, \ + 89, \ + 88, \ + 87, \ + 86, \ + 85, \ + 84, \ + 83, \ + 82, \ + 81, \ + 80, \ + 79, \ + 78, \ + 77, \ + 76, \ + 75, \ + 74, \ + 73, \ + 72, \ + 71, \ + 70, \ + 69, \ + 68, \ + 67, \ + 66, \ + 65, \ + 64, \ + 63, \ + 62, \ + 61, \ + 60, \ + 59, \ + 58, \ + 57, \ + 56, \ + 55, \ + 54, \ + 53, \ + 52, \ + 51, \ + 50, \ + 49, \ + 48, \ + 47, \ + 46, \ + 45, \ + 44, \ + 43, \ + 42, \ + 41, \ + 40, \ + 39, \ + 38, \ + 37, \ + 36, \ + 35, \ + 34, \ + 33, \ + 32, \ + 31, \ + 30, \ + 29, \ + 28, \ + 27, \ + 26, \ + 25, \ + 24, \ + 23, \ + 22, \ + 21, \ + 20, \ + 19, \ + 18, \ + 17, \ + 16, \ + 15, \ + 14, \ + 13, \ + 12, \ + 11, \ + 10, \ + 9, \ + 8, \ + 7, \ + 6, \ + 5, \ + 4, \ + 3, \ + 2, \ + 1, ), ) +# else +# define PMACC_PP_VARIADIC_SIZE(...) \ + PMACC_PP_VARIADIC_SIZE_I( \ + __VA_ARGS__, \ + 120, \ + 119, \ + 118, \ + 117, \ + 116, \ + 115, \ + 114, \ + 113, \ + 112, \ + 111, \ + 110, \ + 109, \ + 108, \ + 107, \ + 106, \ + 105, \ + 104, \ + 103, \ + 102, \ + 101, \ + 100, \ + 99, \ + 98, \ + 97, \ + 96, \ + 95, \ + 94, \ + 93, \ + 92, \ + 91, \ + 90, \ + 89, \ + 88, \ + 87, \ + 86, \ + 85, \ + 84, \ + 83, \ + 82, \ + 81, \ + 80, \ + 79, \ + 78, \ + 77, \ + 76, \ + 75, \ + 74, \ + 73, \ + 72, \ + 71, \ + 70, \ + 69, \ + 68, \ + 67, \ + 66, \ + 65, \ + 64, \ + 63, \ + 62, \ + 61, \ + 60, \ + 59, \ + 58, \ + 57, \ + 56, \ + 55, \ + 54, \ + 53, \ + 52, \ + 51, \ + 50, \ + 49, \ + 48, \ + 47, \ + 46, \ + 45, \ + 44, \ + 43, \ + 42, \ + 41, \ + 40, \ + 39, \ + 38, \ + 37, \ + 36, \ + 35, \ + 34, \ + 33, \ + 32, \ + 31, \ + 30, \ + 29, \ + 28, \ + 27, \ + 26, \ + 25, \ + 24, \ + 23, \ + 22, \ + 21, \ + 20, \ + 19, \ + 18, \ + 17, \ + 16, \ + 15, \ + 14, \ + 13, \ + 12, \ + 11, \ + 10, \ + 9, \ + 8, \ + 7, \ + 6, \ + 5, \ + 4, \ + 3, \ + 2, \ + 1, ) +# endif #endif diff --git a/include/pmacc/preprocessor/struct.hpp b/include/pmacc/preprocessor/struct.hpp index 72a430f011..04a9bc6a13 100644 --- a/include/pmacc/preprocessor/struct.hpp +++ b/include/pmacc/preprocessor/struct.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera +/* Copyright 2015-2021 Rene Widera * * This file is part of PMacc. * @@ -60,10 +60,12 @@ * static const float2_64 center_SI = float2_64(1.134e-5, 1.134e-5); * @endcode */ -#define PMACC_C_VECTOR(type,name,...) (0,(typename pmacc::traits::GetValueType::type, \ - name, \ - pmacc::traits::GetNComponents::value, \ - __VA_ARGS__)) +#define PMACC_C_VECTOR(type, name, ...) \ + (0, \ + (typename pmacc::traits::GetValueType::type, \ + name, \ + pmacc::traits::GetNComponents::value, \ + __VA_ARGS__)) /** create static const member vector that needs no memory inside of the struct @@ -79,7 +81,7 @@ * static const Vector center_SI = Vector(1.134e-5, 1.134e-5, 1.134e-5); * @endcode */ -#define PMACC_C_VECTOR_DIM(type,dim,name,...) (0,(type,name,dim,__VA_ARGS__)) +#define PMACC_C_VECTOR_DIM(type, dim, name, ...) (0, (type, name, dim, __VA_ARGS__)) /** create static constexpr member * @@ -93,7 +95,7 @@ * static constexpr float_64 power_SI = float_64(2.0); * @endcode */ -#define PMACC_C_VALUE(type,name,value) (1,(type,name,value)) +#define PMACC_C_VALUE(type, name, value) (1, (type, name, value)) /** create changeable member * @@ -107,7 +109,7 @@ * float_64 power_SI(2.0); * @endcode */ -#define PMACC_VALUE(type,name,initValue) (2,(type,name,initValue)) +#define PMACC_VALUE(type, name, initValue) (2, (type, name, initValue)) /** create changeable member vector @@ -122,7 +124,7 @@ * float2_64 center_SI(1.134e-5, 1.134e-5); * @endcode */ -#define PMACC_VECTOR(type,name,...) (5,(type,name, type(__VA_ARGS__) )) +#define PMACC_VECTOR(type, name, ...) (5, (type, name, type(__VA_ARGS__))) /** create changeable member vector * @@ -137,14 +139,8 @@ * Vector center_SI(1.134e-5, 1.134e-5, 1.134e-5); * @endcode */ -#define PMACC_VECTOR_DIM(type,dim,name,...) \ - (5, \ - ( \ - (pmacc::math::Vector), \ - name, \ - pmacc::math::Vector(__VA_ARGS__) \ - ) \ - ) +#define PMACC_VECTOR_DIM(type, dim, name, ...) \ + (5, ((pmacc::math::Vector), name, pmacc::math::Vector(__VA_ARGS__))) /** create static const character string * @@ -157,7 +153,7 @@ * static const char* filename = (char*)"fooFile.txt"; * @endcode */ -#define PMACC_C_STRING(name,initValue) (3,(_,name,initValue)) +#define PMACC_C_STRING(name, initValue) (3, (_, name, initValue)) /** create any code extension * @@ -169,8 +165,7 @@ * typedef float FooFloat; * @endcode */ -#define PMACC_EXTENT(...) (4,(_,_,__VA_ARGS__)) - +#define PMACC_EXTENT(...) (4, (_, _, __VA_ARGS__)) /** select member description @@ -182,8 +177,8 @@ * @return result of `(op def)` if `selectTypeID == typeID` * `( )` else */ -#define PMACC_PP_X_SELECT_TYPEID(selectTypeID,op,typeID,def) \ - BOOST_PP_IF( BOOST_PP_EQUAL(typeID,selectTypeID), (op def) , () ) +#define PMACC_PP_X_SELECT_TYPEID(selectTypeID, op, typeID, def) \ + BOOST_PP_IF(BOOST_PP_EQUAL(typeID, selectTypeID), (op def), ()) /** select member description of a TypeMemberPair for a specific type id * @@ -193,8 +188,8 @@ * @return result of `op(secound(...))` if type is selected * `( )` else */ -#define PMACC_PP_SELECT_TYPEID(typeID,op,...) \ - PMACC_PP_X_SELECT_TYPEID( typeID,op,PMACC_PP_DEFER_FIRST() __VA_ARGS__ ,PMACC_PP_DEFER_SECOND() __VA_ARGS__ ) +#define PMACC_PP_SELECT_TYPEID(typeID, op, ...) \ + PMACC_PP_X_SELECT_TYPEID(typeID, op, PMACC_PP_DEFER_FIRST() __VA_ARGS__, PMACC_PP_DEFER_SECOND() __VA_ARGS__) /** run macro which calls accessor on the given element @@ -208,52 +203,47 @@ * * @{ */ -#define PMACC_PP_SEQ_MACRO_WITH_ACCESSOR(r,accessor,elem) PMACC_PP_REMOVE_PAREN( accessor(elem)) +#define PMACC_PP_SEQ_MACRO_WITH_ACCESSOR(r, accessor, elem) PMACC_PP_REMOVE_PAREN(accessor(elem)) -#define PMACC_PP_X_CREATE_C_VECTOR_DEF(data,type,name,dim,...) PMACC_CONST_VECTOR_DEF(type,dim,name,__VA_ARGS__); -#define PMACC_PP_CREATE_C_VECTOR_DEF(elem) \ - PMACC_PP_SELECT_TYPEID( 0,PMACC_PP_X_CREATE_C_VECTOR_DEF, elem ) +#define PMACC_PP_X_CREATE_C_VECTOR_DEF(data, type, name, dim, ...) \ + PMACC_CONST_VECTOR_DEF(type, dim, name, __VA_ARGS__); +#define PMACC_PP_CREATE_C_VECTOR_DEF(elem) PMACC_PP_SELECT_TYPEID(0, PMACC_PP_X_CREATE_C_VECTOR_DEF, elem) -#define PMACC_PP_X_CREATE_C_VECTOR_VARIABLE(data,type,name,dim,...) const BOOST_PP_CAT(name,_t) name; -#define PMACC_PP_CREATE_C_VECTOR_VARIABLE(elem) \ - PMACC_PP_SELECT_TYPEID( 0,PMACC_PP_X_CREATE_C_VECTOR_VARIABLE, elem ) +#define PMACC_PP_X_CREATE_C_VECTOR_VARIABLE(data, type, name, dim, ...) const BOOST_PP_CAT(name, _t) name; +#define PMACC_PP_CREATE_C_VECTOR_VARIABLE(elem) PMACC_PP_SELECT_TYPEID(0, PMACC_PP_X_CREATE_C_VECTOR_VARIABLE, elem) -#define PMACC_PP_X_CREATE_VALUE_VARIABLE(data,type,name,...) type name; -#define PMACC_PP_CREATE_VALUE_VARIABLE(elem) \ - PMACC_PP_SELECT_TYPEID( 2,PMACC_PP_X_CREATE_VALUE_VARIABLE, elem ) +#define PMACC_PP_X_CREATE_VALUE_VARIABLE(data, type, name, ...) type name; +#define PMACC_PP_CREATE_VALUE_VARIABLE(elem) PMACC_PP_SELECT_TYPEID(2, PMACC_PP_X_CREATE_VALUE_VARIABLE, elem) -#define PMACC_PP_X_CREATE_VALUE_VARIABLE_WITH_PAREN(data,type,name,...) PMACC_PP_REMOVE_PAREN(type) name; -#define PMACC_PP_CREATE_VALUE_VARIABLE_WITH_PAREN(elem) \ - PMACC_PP_SELECT_TYPEID( 5,PMACC_PP_X_CREATE_VALUE_VARIABLE_WITH_PAREN, elem ) +#define PMACC_PP_X_CREATE_VALUE_VARIABLE_WITH_PAREN(data, type, name, ...) PMACC_PP_REMOVE_PAREN(type) name; +#define PMACC_PP_CREATE_VALUE_VARIABLE_WITH_PAREN(elem) \ + PMACC_PP_SELECT_TYPEID(5, PMACC_PP_X_CREATE_VALUE_VARIABLE_WITH_PAREN, elem) -#define PMACC_PP_X_CREATE_C_VALUE_VARIABLE(data,type,name,...) static constexpr type name = __VA_ARGS__; -#define PMACC_PP_CREATE_C_VALUE_VARIABLE(elem) \ - PMACC_PP_SELECT_TYPEID( 1,PMACC_PP_X_CREATE_C_VALUE_VARIABLE,elem ) +#define PMACC_PP_X_CREATE_C_VALUE_VARIABLE(data, type, name, ...) static constexpr type name = __VA_ARGS__; +#define PMACC_PP_CREATE_C_VALUE_VARIABLE(elem) PMACC_PP_SELECT_TYPEID(1, PMACC_PP_X_CREATE_C_VALUE_VARIABLE, elem) -#define PMACC_PP_X1_INIT_VALUE_VARIABLE(data,type,name,...) (name(__VA_ARGS__)) -#define PMACC_PP_X_INIT_VALUE_VARIABLE(elem) \ - PMACC_PP_SELECT_TYPEID( 2,PMACC_PP_X1_INIT_VALUE_VARIABLE,elem ) +#define PMACC_PP_X1_INIT_VALUE_VARIABLE(data, type, name, ...) (name(__VA_ARGS__)) +#define PMACC_PP_X_INIT_VALUE_VARIABLE(elem) PMACC_PP_SELECT_TYPEID(2, PMACC_PP_X1_INIT_VALUE_VARIABLE, elem) -#define PMACC_PP_X_INIT_VALUE_VARIABLE_WITH_PAREN(elem) \ - PMACC_PP_SELECT_TYPEID( 5,PMACC_PP_X1_INIT_VALUE_VARIABLE,elem ) +#define PMACC_PP_X_INIT_VALUE_VARIABLE_WITH_PAREN(elem) \ + PMACC_PP_SELECT_TYPEID(5, PMACC_PP_X1_INIT_VALUE_VARIABLE, elem) -#define PMACC_PP_X_CREATE_C_STRING_VARIABLE(data,type,name,...) static constexpr const char* name = __VA_ARGS__; -#define PMACC_PP_CREATE_C_STRING_VARIABLE(elem) \ - PMACC_PP_SELECT_TYPEID( 3,PMACC_PP_X_CREATE_C_STRING_VARIABLE, elem ) +#define PMACC_PP_X_CREATE_C_STRING_VARIABLE(data, type, name, ...) static constexpr const char* name = __VA_ARGS__; +#define PMACC_PP_CREATE_C_STRING_VARIABLE(elem) PMACC_PP_SELECT_TYPEID(3, PMACC_PP_X_CREATE_C_STRING_VARIABLE, elem) -#define PMACC_PP_X_CREATE_EXTENT(data,type,name,...) __VA_ARGS__ -#define PMACC_PP_CREATE_EXTENT(elem) \ - PMACC_PP_SELECT_TYPEID( 4,PMACC_PP_X_CREATE_EXTENT,elem ) +#define PMACC_PP_X_CREATE_EXTENT(data, type, name, ...) __VA_ARGS__ +#define PMACC_PP_CREATE_EXTENT(elem) PMACC_PP_SELECT_TYPEID(4, PMACC_PP_X_CREATE_EXTENT, elem) -#define PMACC_PP_X1_ADD_DATA_TO_TYPEDESCRIPTION_MACRO(data,first,second) ((first,(data,PMACC_PP_REMOVE_PAREN(second)))) -#define PMACC_PP_X_ADD_DATA_TO_TYPEDESCRIPTION_MACRO(data,value) \ - PMACC_PP_CALL(PMACC_PP_X1_ADD_DATA_TO_TYPEDESCRIPTION_MACRO,(data,value)) +#define PMACC_PP_X1_ADD_DATA_TO_TYPEDESCRIPTION_MACRO(data, first, second) \ + ((first, (data, PMACC_PP_REMOVE_PAREN(second)))) +#define PMACC_PP_X_ADD_DATA_TO_TYPEDESCRIPTION_MACRO(data, value) \ + PMACC_PP_CALL(PMACC_PP_X1_ADD_DATA_TO_TYPEDESCRIPTION_MACRO, (data, value)) /** @} */ -#define PMACC_PP_ADD_DATA_TO_TYPEDESCRIPTION_MACRO(r,data,elem) \ - PMACC_PP_X_ADD_DATA_TO_TYPEDESCRIPTION_MACRO(data,PMACC_PP_REMOVE_PAREN(elem)) +#define PMACC_PP_ADD_DATA_TO_TYPEDESCRIPTION_MACRO(r, data, elem) \ + PMACC_PP_X_ADD_DATA_TO_TYPEDESCRIPTION_MACRO(data, PMACC_PP_REMOVE_PAREN(elem)) /** create constructor initialization of non static variables * @@ -261,14 +251,10 @@ * * @param ... preprocessor sequence with TypeMemberPair's to inherit from */ -#define PMACC_PP_INIT_VALUE_VARIABLES(op,emptyStruct,...) \ - PMACC_PP_DEFER_REMOVE_PAREN() ( \ - BOOST_PP_EXPAND( \ - BOOST_PP_SEQ_TO_TUPLE ( \ - BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR,op,__VA_ARGS__ emptyStruct) \ - ) \ - ) \ - ) +#define PMACC_PP_INIT_VALUE_VARIABLES(op, emptyStruct, ...) \ + PMACC_PP_DEFER_REMOVE_PAREN() \ + (BOOST_PP_EXPAND( \ + BOOST_PP_SEQ_TO_TUPLE(BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR, op, __VA_ARGS__ emptyStruct)))) /** generate the definition of a struct * @@ -276,26 +262,44 @@ * @param name name of the struct * @param ... preprocessor sequence with TypeMemberPair's */ -#define PMACC_PP_STRUCT_DEF(namespace_name,name,...) \ -namespace namespace_name{ \ - BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR,PMACC_PP_CREATE_C_VECTOR_DEF,__VA_ARGS__) \ - struct EmptyStruct{}; \ - struct EmptyStruct2{}; \ - struct name : private EmptyStruct, private EmptyStruct2 { \ - name(): \ - PMACC_PP_INIT_VALUE_VARIABLES(PMACC_PP_X_INIT_VALUE_VARIABLE,((2,(a,b,EmptyStruct))),__VA_ARGS__), \ - PMACC_PP_INIT_VALUE_VARIABLES(PMACC_PP_X_INIT_VALUE_VARIABLE_WITH_PAREN,((5,(a,b,EmptyStruct2))),__VA_ARGS__) \ - {} \ - \ - BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR,PMACC_PP_CREATE_C_VALUE_VARIABLE,__VA_ARGS__) \ - BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR,PMACC_PP_CREATE_VALUE_VARIABLE,__VA_ARGS__) \ - BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR,PMACC_PP_CREATE_C_VECTOR_VARIABLE,__VA_ARGS__) \ - BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR,PMACC_PP_CREATE_C_STRING_VARIABLE,__VA_ARGS__) \ - BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR,PMACC_PP_CREATE_EXTENT,__VA_ARGS__) \ - BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR,PMACC_PP_CREATE_VALUE_VARIABLE_WITH_PAREN,__VA_ARGS__) \ - }; \ -} /*namespace*/ \ -using namespace_name::name +#define PMACC_PP_STRUCT_DEF(namespace_name, name, ...) \ + namespace namespace_name \ + { \ + BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR, PMACC_PP_CREATE_C_VECTOR_DEF, __VA_ARGS__) \ + struct EmptyStruct \ + { \ + }; \ + struct EmptyStruct2 \ + { \ + }; \ + struct name \ + : private EmptyStruct \ + , private EmptyStruct2 \ + { \ + name() \ + : PMACC_PP_INIT_VALUE_VARIABLES( \ + PMACC_PP_X_INIT_VALUE_VARIABLE, \ + ((2, (a, b, EmptyStruct))), \ + __VA_ARGS__) \ + , PMACC_PP_INIT_VALUE_VARIABLES( \ + PMACC_PP_X_INIT_VALUE_VARIABLE_WITH_PAREN, \ + ((5, (a, b, EmptyStruct2))), \ + __VA_ARGS__) \ + { \ + } \ + \ + BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR, PMACC_PP_CREATE_C_VALUE_VARIABLE, __VA_ARGS__) \ + BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR, PMACC_PP_CREATE_VALUE_VARIABLE, __VA_ARGS__) \ + BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR, PMACC_PP_CREATE_C_VECTOR_VARIABLE, __VA_ARGS__) \ + BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR, PMACC_PP_CREATE_C_STRING_VARIABLE, __VA_ARGS__) \ + BOOST_PP_SEQ_FOR_EACH(PMACC_PP_SEQ_MACRO_WITH_ACCESSOR, PMACC_PP_CREATE_EXTENT, __VA_ARGS__) \ + BOOST_PP_SEQ_FOR_EACH( \ + PMACC_PP_SEQ_MACRO_WITH_ACCESSOR, \ + PMACC_PP_CREATE_VALUE_VARIABLE_WITH_PAREN, \ + __VA_ARGS__) \ + }; \ + } /*namespace*/ \ + using namespace_name::name /** add data to TypeMemberPair's @@ -305,7 +309,8 @@ using namespace_name::name * @param data any data which should be added to the TypeMemberPair's * @param ... preprocessor sequence with TypeMemberPair's */ -#define PMACC_PP_ADD_DATA_TO_TYPEDESCRIPTION(data,...) BOOST_PP_SEQ_FOR_EACH(PMACC_PP_ADD_DATA_TO_TYPEDESCRIPTION_MACRO,data,__VA_ARGS__) +#define PMACC_PP_ADD_DATA_TO_TYPEDESCRIPTION(data, ...) \ + BOOST_PP_SEQ_FOR_EACH(PMACC_PP_ADD_DATA_TO_TYPEDESCRIPTION_MACRO, data, __VA_ARGS__) /** generate a struct with static and dynamic members * @@ -334,5 +339,8 @@ using namespace_name::name * ); * @endcode */ -#define PMACC_STRUCT(name,...) \ - PMACC_PP_STRUCT_DEF(BOOST_PP_CAT(BOOST_PP_CAT(pmacc_,name),__COUNTER__),name,PMACC_PP_ADD_DATA_TO_TYPEDESCRIPTION(name,__VA_ARGS__)) +#define PMACC_STRUCT(name, ...) \ + PMACC_PP_STRUCT_DEF( \ + BOOST_PP_CAT(BOOST_PP_CAT(pmacc_, name), __COUNTER__), \ + name, \ + PMACC_PP_ADD_DATA_TO_TYPEDESCRIPTION(name, __VA_ARGS__)) diff --git a/include/pmacc/random/RNGHandle.hpp b/include/pmacc/random/RNGHandle.hpp index bbf2cd9403..a788f00349 100644 --- a/include/pmacc/random/RNGHandle.hpp +++ b/include/pmacc/random/RNGHandle.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -27,76 +27,71 @@ namespace pmacc { -namespace random -{ - - /** - * A reference to a state of a RNG provider - */ - template - struct RNGHandle + namespace random { - typedef T_RNGProvider RNGProvider; - static constexpr uint32_t rngDim = RNGProvider::dim; - typedef typename RNGProvider::DataBoxType RNGBox; - typedef typename RNGProvider::RNGMethod RNGMethod; - typedef typename RNGMethod::StateType RNGState; - typedef pmacc::DataSpace RNGSpace; - - template - struct GetRandomType - { - typedef typename T_Distribution::template applyMethod::type Distribution; - typedef Random type; - }; - - /** - * Creates an instance of the functor - * - * @param rngBox Databox of the RNG provider - */ - RNGHandle(const RNGBox& rngBox): m_rngBox(rngBox) - {} - /** - * Initializes this instance - * - * \param cellIdx index into the underlying RNG provider + * A reference to a state of a RNG provider */ - HDINLINE void - init(const RNGSpace& cellIdx) + template + struct RNGHandle { - m_rngBox = m_rngBox.shift(cellIdx); - } + typedef T_RNGProvider RNGProvider; + static constexpr uint32_t rngDim = RNGProvider::dim; + typedef typename RNGProvider::DataBoxType RNGBox; + typedef typename RNGProvider::RNGMethod RNGMethod; + typedef typename RNGMethod::StateType RNGState; + typedef pmacc::DataSpace RNGSpace; - HDINLINE RNGState& - getState() - { - return m_rngBox(RNGSpace::create(0)); - } + template + struct GetRandomType + { + typedef typename T_Distribution::template applyMethod::type Distribution; + typedef Random type; + }; - HDINLINE RNGState& - operator*() - { - return m_rngBox(RNGSpace::create(0)); - } + /** + * Creates an instance of the functor + * + * @param rngBox Databox of the RNG provider + */ + RNGHandle(const RNGBox& rngBox) : m_rngBox(rngBox) + { + } - HDINLINE RNGState& - operator->() - { - return m_rngBox(RNGSpace::create(0)); - } + /** + * Initializes this instance + * + * \param cellIdx index into the underlying RNG provider + */ + HDINLINE void init(const RNGSpace& cellIdx) + { + m_rngBox = m_rngBox.shift(cellIdx); + } - template - HDINLINE typename GetRandomType::type - applyDistribution() - { - return typename GetRandomType::type(&getState()); - } + HDINLINE RNGState& getState() + { + return m_rngBox(RNGSpace::create(0)); + } - protected: - PMACC_ALIGN8(m_rngBox, RNGBox); - }; + HDINLINE RNGState& operator*() + { + return m_rngBox(RNGSpace::create(0)); + } + + HDINLINE RNGState& operator->() + { + return m_rngBox(RNGSpace::create(0)); + } + + template + HDINLINE typename GetRandomType::type applyDistribution() + { + return typename GetRandomType::type(&getState()); + } + + protected: + PMACC_ALIGN8(m_rngBox, RNGBox); + }; -} // namespace random -} // namespace pmacc + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/RNGProvider.hpp b/include/pmacc/random/RNGProvider.hpp index 14cc7e75e1..744b98c9b9 100644 --- a/include/pmacc/random/RNGProvider.hpp +++ b/include/pmacc/random/RNGProvider.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -29,101 +29,99 @@ namespace pmacc { -namespace random -{ - - /** - * Provider of a per cell random number generator - * - * \tparam T_dim Number of dimensions of the grid - * \tparam T_RNGMethod Method to use for random number generation - */ - template - class RNGProvider : public ISimulationData + namespace random { - public: - static constexpr uint32_t dim = T_dim; - typedef T_RNGMethod RNGMethod; - typedef DataSpace Space; - - private: - typedef typename RNGMethod::StateType RNGState; - - public: - typedef HostDeviceBuffer< RNGState, dim > Buffer; - typedef typename Buffer::DataBoxType DataBoxType; - typedef RNGHandle Handle; - - template - struct GetRandomType - { - typedef typename T_Distribution::template applyMethod::type Distribution; - typedef Random type; - }; - /** - * Create the RNGProvider and allocate memory for the given size + * Provider of a per cell random number generator * - * @param size Size of the grid for which RNGs should be provided - * @param uniqueId Unique ID for this instance. If none is given the default - * (as returned by \ref getName()) is used + * \tparam T_dim Number of dimensions of the grid + * \tparam T_RNGMethod Method to use for random number generation */ - RNGProvider(const Space& size, const std::string& uniqueId = ""); - virtual ~RNGProvider() + template + class RNGProvider : public ISimulationData { - __delete(buffer) - } - /** - * Initializes the random number generators - * Must be called before usage - * @param seed Base seed to be used - */ - void init(uint32_t seed); + public: + static constexpr uint32_t dim = T_dim; + typedef T_RNGMethod RNGMethod; + typedef DataSpace Space; - /** - * Factory method - * Creates a handle to a state that can be used to create actual RNGs - * - * @param id SimulationDataId of the RNGProvider to use. Defaults to the default Id of the type - */ - static Handle - createHandle(const std::string& id = getName()); + private: + typedef typename RNGMethod::StateType RNGState; - /** - * Factory method - * Creates functor that creates random numbers with a given distribution - * Similar to the Handle but can be used directly - * - * @param id SimulationDataId of the RNGProvider to use. Defaults to the default Id of the type - */ - template - static typename GetRandomType::type - createRandom(const std::string& id = getName()); + public: + typedef HostDeviceBuffer Buffer; + typedef typename Buffer::DataBoxType DataBoxType; + typedef RNGHandle Handle; - /** - * Returns the default id for this type - */ - static std::string getName(); - SimulationDataId getUniqueId() override; - void synchronize() override; + template + struct GetRandomType + { + typedef typename T_Distribution::template applyMethod::type Distribution; + typedef Random type; + }; - /** - * Return a reference to the buffer containing the states - * Note: This buffer might be empty - */ - Buffer& getStateBuffer(); - private: - /** - * Gets the device data box - */ - DataBoxType getDeviceDataBox(); + /** + * Create the RNGProvider and allocate memory for the given size + * + * @param size Size of the grid for which RNGs should be provided + * @param uniqueId Unique ID for this instance. If none is given the default + * (as returned by \ref getName()) is used + */ + RNGProvider(const Space& size, const std::string& uniqueId = ""); + virtual ~RNGProvider() + { + __delete(buffer) + } + /** + * Initializes the random number generators + * Must be called before usage + * @param seed Base seed to be used + */ + void init(uint32_t seed); + + /** + * Factory method + * Creates a handle to a state that can be used to create actual RNGs + * + * @param id SimulationDataId of the RNGProvider to use. Defaults to the default Id of the type + */ + static Handle createHandle(const std::string& id = getName()); + + /** + * Factory method + * Creates functor that creates random numbers with a given distribution + * Similar to the Handle but can be used directly + * + * @param id SimulationDataId of the RNGProvider to use. Defaults to the default Id of the type + */ + template + static typename GetRandomType::type createRandom(const std::string& id = getName()); + + /** + * Returns the default id for this type + */ + static std::string getName(); + SimulationDataId getUniqueId() override; + void synchronize() override; - const Space m_size; - Buffer* buffer; - const std::string m_uniqueId; - }; + /** + * Return a reference to the buffer containing the states + * Note: This buffer might be empty + */ + Buffer& getStateBuffer(); + + private: + /** + * Gets the device data box + */ + DataBoxType getDeviceDataBox(); + + const Space m_size; + Buffer* buffer; + const std::string m_uniqueId; + }; -} // namespace random -} // namespace pmacc + } // namespace random +} // namespace pmacc #include "pmacc/random/RNGProvider.tpp" diff --git a/include/pmacc/random/RNGProvider.tpp b/include/pmacc/random/RNGProvider.tpp index 11dbae9992..0da2d080a3 100644 --- a/include/pmacc/random/RNGProvider.tpp +++ b/include/pmacc/random/RNGProvider.tpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -33,163 +33,115 @@ namespace pmacc { -namespace random -{ - - namespace kernel { - - template< - uint32_t T_numWorkers, - uint32_t T_blockSize, - typename T_RNGMethod - > - struct InitRNGProvider + namespace random + { + namespace kernel { - template< - typename T_RNGBox, - typename T_Space, - typename T_Acc - > - DINLINE void - operator()( - T_Acc const & acc, - T_RNGBox rngBox, - uint32_t seed, - const T_Space size - ) const + template + struct InitRNGProvider { - using namespace mappings::threads; - - constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; - - using SupercellDomCfg = IdxConfig< - T_blockSize, - numWorkers - >; - - // each virtual worker initialize one rng state - ForEachIdx< SupercellDomCfg > forEachCell( workerIdx ); - - forEachCell( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { - uint32_t const linearTid = blockIdx.x * T_blockSize + linearIdx; - if( linearTid >= size.productOfComponents() ) + template + DINLINE void operator()(T_Acc const& acc, T_RNGBox rngBox, uint32_t seed, const T_Space size) const + { + using namespace mappings::threads; + + constexpr uint32_t numWorkers = T_numWorkers; + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + using SupercellDomCfg = IdxConfig; + + // each virtual worker initialize one rng state + ForEachIdx forEachCell(workerIdx); + + forEachCell([&](uint32_t const linearIdx, uint32_t const) { + uint32_t const linearTid = cupla::blockIdx(acc).x * T_blockSize + linearIdx; + if(linearTid >= size.productOfComponents()) return; - T_Space const cellIdx = DataSpaceOperations< T_Space::dim >::map(size, linearTid); - T_RNGMethod().init( - acc, - rngBox( cellIdx ), - seed, - linearTid - ); - } - ); - } - }; - - } // namespace kernel - - template - RNGProvider::RNGProvider(const Space& size, const std::string& uniqueId): - m_size(size), m_uniqueId(uniqueId.empty() ? getName() : uniqueId), - buffer(new Buffer(size)) - { - if(m_size.productOfComponents() == 0) - throw std::invalid_argument("Cannot create RNGProvider with zero size"); - } + T_Space const cellIdx = DataSpaceOperations::map(size, linearTid); + T_RNGMethod().init(acc, rngBox(cellIdx), seed, linearTid); + }); + } + }; - template - void RNGProvider::init(uint32_t seed) - { + } // namespace kernel - const uint32_t blockSize = 256; - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - blockSize - >::value; - - const uint32_t gridSize = (m_size.productOfComponents() + blockSize - 1u) / blockSize; // Round up - - auto bufferBox = buffer->getDeviceBuffer().getDataBox(); - - PMACC_KERNEL( - kernel::InitRNGProvider< - numWorkers, - blockSize, - RNGMethod>{} - )( - gridSize, - numWorkers - )( - bufferBox, - seed, - m_size - ); - } - - template - typename RNGProvider::Handle - RNGProvider::createHandle(const std::string& id) - { - auto provider = - Environment<>::get().DataConnector().get< RNGProvider >( id, true ); - Handle result( provider->getDeviceDataBox() ); - Environment<>::get().DataConnector().releaseData( id ); - return result; - } - - template - template - typename RNGProvider::template GetRandomType::type - RNGProvider::createRandom(const std::string& id) - { - typedef typename GetRandomType::type ResultType; - return ResultType(createHandle()); - } + template + RNGProvider::RNGProvider(const Space& size, const std::string& uniqueId) + : m_size(size) + , m_uniqueId(uniqueId.empty() ? getName() : uniqueId) + , buffer(new Buffer(size)) + { + if(m_size.productOfComponents() == 0) + throw std::invalid_argument("Cannot create RNGProvider with zero size"); + } - template - typename RNGProvider::Buffer& - RNGProvider::getStateBuffer() - { - return *buffer; - } + template + void RNGProvider::init(uint32_t seed) + { + const uint32_t blockSize = 256; - template - typename RNGProvider::DataBoxType - RNGProvider::getDeviceDataBox() - { - return buffer->getDeviceBuffer().getDataBox(); - } + constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers::value; - template - std::string - RNGProvider::getName() - { - /* generate a unique name (for this type!) to use as a default ID */ - return std::string("RNGProvider") - + char('0' + dim) /* valid for 0..9 */ + const uint32_t gridSize = (m_size.productOfComponents() + blockSize - 1u) / blockSize; // Round up + + auto bufferBox = buffer->getDeviceBuffer().getDataBox(); + + PMACC_KERNEL(kernel::InitRNGProvider{}) + (gridSize, numWorkers)(bufferBox, seed, m_size); + } + + template + typename RNGProvider::Handle RNGProvider::createHandle( + const std::string& id) + { + auto provider = Environment<>::get().DataConnector().get(id, true); + Handle result(provider->getDeviceDataBox()); + Environment<>::get().DataConnector().releaseData(id); + return result; + } + + template + template + typename RNGProvider::template GetRandomType::type RNGProvider< + T_dim, + T_RNGMethod>::createRandom(const std::string& id) + { + typedef typename GetRandomType::type ResultType; + return ResultType(createHandle()); + } + + template + typename RNGProvider::Buffer& RNGProvider::getStateBuffer() + { + return *buffer; + } + + template + typename RNGProvider::DataBoxType RNGProvider::getDeviceDataBox() + { + return buffer->getDeviceBuffer().getDataBox(); + } + + template + std::string RNGProvider::getName() + { + /* generate a unique name (for this type!) to use as a default ID */ + return std::string("RNGProvider") + char('0' + dim) /* valid for 0..9 */ + RNGMethod::getName(); - } + } - template - SimulationDataId - RNGProvider::getUniqueId() - { - return m_uniqueId; - } + template + SimulationDataId RNGProvider::getUniqueId() + { + return m_uniqueId; + } - template - void - RNGProvider::synchronize() - { - buffer->deviceToHost(); - } + template + void RNGProvider::synchronize() + { + buffer->deviceToHost(); + } -} // namespace random -} // namespace pmacc + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/RNGState.hpp b/include/pmacc/random/RNGState.hpp index 7be9137f10..8416aa1c51 100644 --- a/include/pmacc/random/RNGState.hpp +++ b/include/pmacc/random/RNGState.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -25,34 +25,35 @@ namespace pmacc { -namespace random -{ - - /** - * Wrapper class for a state of a random number generator - * Can be used for aligned storing of states - */ - template - class RNGState + namespace random { - public: - typedef T_RNGMethod RNGMethod; - typedef typename RNGMethod::StateType StateType; + /** + * Wrapper class for a state of a random number generator + * Can be used for aligned storing of states + */ + template + class RNGState + { + public: + typedef T_RNGMethod RNGMethod; + typedef typename RNGMethod::StateType StateType; - HDINLINE RNGState() - {} + HDINLINE RNGState() + { + } - HDINLINE RNGState(const StateType& other): state(other) - {} + HDINLINE RNGState(const StateType& other) : state(other) + { + } - HDINLINE StateType& - getState() - { - return state; - } - private: - PMACC_ALIGN8(StateType) state; - }; - -} // namespace random -} // namespace pmacc + HDINLINE StateType& getState() + { + return state; + } + + private: + PMACC_ALIGN8(StateType, ) state; + }; + + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/Random.hpp b/include/pmacc/random/Random.hpp index 3451628876..b59ddf7007 100644 --- a/include/pmacc/random/Random.hpp +++ b/include/pmacc/random/Random.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -27,91 +27,81 @@ namespace pmacc { -namespace random -{ - - /** - * Random Number Generator. Functor that returns a random number per call - * - * Default implementation assumes a RNGHandle - */ - template< - class T_Distribution, - class T_RNGMethod, - class T_RNGStatePtrOrHandle = typename T_RNGMethod::StateType* - > - struct Random: private T_Distribution, private T_RNGStatePtrOrHandle + namespace random { - typedef T_RNGMethod RNGMethod; - /* RNGHandle assumed */ - typedef T_RNGStatePtrOrHandle RNGHandle; - typedef T_Distribution Distribution; - typedef typename boost::result_of::type result_type; - - /** This can be constructed with either the RNGBox (like the RNGHandle) or from an RNGHandle instance */ - template - explicit HINLINE Random(const T_RNGBoxOrHandle& rngBox): RNGHandle(rngBox) - {} - /** - * Initializes this instance + * Random Number Generator. Functor that returns a random number per call * - * \param cellIdx index into the underlying RNG Provider + * Default implementation assumes a RNGHandle */ - template - HDINLINE void - init(const T_Offset& cellIdx) - { - RNGHandle::init(cellIdx); - } - - /** Returns a new random number advancing the state */ - template< typename T_Acc > - DINLINE result_type - operator()( T_Acc const & acc ) + template< + class T_Distribution, + class T_RNGMethod, + class T_RNGStatePtrOrHandle = typename T_RNGMethod::StateType*> + struct Random + : private T_Distribution + , private T_RNGStatePtrOrHandle { - return Distribution::operator()( - acc, - RNGHandle::getState() - ); - } - }; + typedef T_RNGMethod RNGMethod; + /* RNGHandle assumed */ + typedef T_RNGStatePtrOrHandle RNGHandle; + typedef T_Distribution Distribution; + typedef typename boost::result_of::type result_type; - /** - * Specialization when the state is a pointer - */ - template< - class T_Distribution, - class T_RNGMethod, - class T_RNGState - > - struct Random: private T_Distribution - { - typedef T_RNGMethod RNGMethod; - typedef T_RNGState RNGState; - typedef T_Distribution Distribution; - typedef typename boost::result_of::type result_type; + /** This can be constructed with either the RNGBox (like the RNGHandle) or from an RNGHandle instance */ + template + explicit HINLINE Random(const T_RNGBoxOrHandle& rngBox) : RNGHandle(rngBox) + { + } - HDINLINE Random(): m_rngState(nullptr) - {} + /** + * Initializes this instance + * + * \param cellIdx index into the underlying RNG Provider + */ + template + HDINLINE void init(const T_Offset& cellIdx) + { + RNGHandle::init(cellIdx); + } - HDINLINE Random(RNGState* m_rngState): m_rngState(m_rngState) - {} + /** Returns a new random number advancing the state */ + template + DINLINE result_type operator()(T_Acc const& acc) + { + return Distribution::operator()(acc, RNGHandle::getState()); + } + }; - /** Returns a new random number advancing the state */ - template< typename T_Acc > - DINLINE result_type - operator()( T_Acc const & acc ) + /** + * Specialization when the state is a pointer + */ + template + struct Random : private T_Distribution { - return Distribution::operator()( - acc, - *m_rngState - ); - } + typedef T_RNGMethod RNGMethod; + typedef T_RNGState RNGState; + typedef T_Distribution Distribution; + typedef typename boost::result_of::type result_type; + + HDINLINE Random() : m_rngState(nullptr) + { + } + + HDINLINE Random(RNGState* m_rngState) : m_rngState(m_rngState) + { + } + + /** Returns a new random number advancing the state */ + template + DINLINE result_type operator()(T_Acc const& acc) + { + return Distribution::operator()(acc, *m_rngState); + } - protected: - PMACC_ALIGN(m_rngState, RNGState*); - }; + protected: + PMACC_ALIGN(m_rngState, RNGState*); + }; -} // namespace random -} // namespace pmacc + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/distributions/Normal.hpp b/include/pmacc/random/distributions/Normal.hpp index f5e2b74fbc..6f34bc707e 100644 --- a/include/pmacc/random/distributions/Normal.hpp +++ b/include/pmacc/random/distributions/Normal.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -26,36 +26,33 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ - namespace detail + namespace random { - /** Only this must be specialized for different types */ - template - class Normal; - } - - /** - * Returns a random, normal distributed value of the given type - */ - template - struct Normal: public detail::Normal - { - template< typename T_Method > - struct applyMethod + namespace distributions { - using type = Normal< - T_Type, - T_Method - >; - }; - }; + namespace detail + { + /** Only this must be specialized for different types */ + template + class Normal; + } // namespace detail + + /** + * Returns a random, normal distributed value of the given type + */ + template + struct Normal : public detail::Normal + { + template + struct applyMethod + { + using type = Normal; + }; + }; -} // namespace distributions -} // namespace random -} // namespace pmacc + } // namespace distributions + } // namespace random +} // namespace pmacc #include "pmacc/random/distributions/normal/Normal_generic.hpp" #include "pmacc/random/distributions/normal/Normal_float.hpp" diff --git a/include/pmacc/random/distributions/Uniform.hpp b/include/pmacc/random/distributions/Uniform.hpp index 1f7ee392f1..cc1c095b4b 100644 --- a/include/pmacc/random/distributions/Uniform.hpp +++ b/include/pmacc/random/distributions/Uniform.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -30,45 +30,42 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ -namespace detail { - - /** Only this must be specialized for different types */ - template - class Uniform; - -} // namespace detail - - /** - * Returns a random, uniformly distributed value of the given type - * - * @tparam T_Type the result type or a range description @see uniform/Range.hpp - * \code - * Uniform::Reduced> UniformReducedDistribution; //default - * Uniform UniformDefaultDistribution; //equal to line one - * Uniform > UniformNoZeroDistribution; - * \endcode - * @tparam T_RNGMethod method to create a random number - */ - template - struct Uniform: public detail::Uniform + namespace random { - template< typename T_Method > - struct applyMethod + namespace distributions { - using type = Uniform< - T_Type, - T_Method - >; - }; - }; + namespace detail + { + /** Only this must be specialized for different types */ + template + class Uniform; + + } // namespace detail + + /** + * Returns a random, uniformly distributed value of the given type + * + * @tparam T_Type the result type or a range description @see uniform/Range.hpp + * \code + * Uniform::Reduced> UniformReducedDistribution; //default + * Uniform UniformDefaultDistribution; //equal to line one + * Uniform > UniformNoZeroDistribution; + * \endcode + * @tparam T_RNGMethod method to create a random number + */ + template + struct Uniform : public detail::Uniform + { + template + struct applyMethod + { + using type = Uniform; + }; + }; -} // namespace distributions -} // namespace random -} // namespace pmacc + } // namespace distributions + } // namespace random +} // namespace pmacc #include "pmacc/random/distributions/uniform/Uniform_float.hpp" #include "pmacc/random/distributions/uniform/Uniform_double.hpp" diff --git a/include/pmacc/random/distributions/distributions.hpp b/include/pmacc/random/distributions/distributions.hpp index b64d5d34b2..d08fbd3604 100644 --- a/include/pmacc/random/distributions/distributions.hpp +++ b/include/pmacc/random/distributions/distributions.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PMacc. * diff --git a/include/pmacc/random/distributions/misc/MullerBox.hpp b/include/pmacc/random/distributions/misc/MullerBox.hpp index c3f79da7ee..bbf0b5dec8 100644 --- a/include/pmacc/random/distributions/misc/MullerBox.hpp +++ b/include/pmacc/random/distributions/misc/MullerBox.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -28,115 +28,84 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ - - /** create a normal distributed random number - * - * Create a random number with mean 0 and standard deviation 1. - * The implementation based on the Wikipedia article: - * - source: https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform - * - date: 01/12/2017 - */ - template< - typename T_Type, - typename T_RNGMethod - > - class MullerBox : - Uniform< - uniform::ExcludeZero< T_Type >, - T_RNGMethod - > + namespace random { - /** The muller box is creating two random number, each second time - * this number is valid and can be used. - */ - T_Type secondRngNumber; - //! true if secondRngNumber is valid else false - bool hasSecondRngNumber = false; + namespace distributions + { + /** create a normal distributed random number + * + * Create a random number with mean 0 and standard deviation 1. + * The implementation based on the Wikipedia article: + * - source: https://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform + * - date: 01/12/2017 + */ + template + class MullerBox : Uniform, T_RNGMethod> + { + /** The muller box is creating two random number, each second time + * this number is valid and can be used. + */ + T_Type secondRngNumber; + //! true if secondRngNumber is valid else false + bool hasSecondRngNumber = false; - using RNGMethod = T_RNGMethod; - using UniformRng = Uniform< - uniform::ExcludeZero< T_Type >, - RNGMethod - >; - using StateType = typename RNGMethod::StateType; + using RNGMethod = T_RNGMethod; + using UniformRng = Uniform, RNGMethod>; + using StateType = typename RNGMethod::StateType; - /** generate a normal distributed random number - * - * @param acc alpaka accelerator - * @param state the state of an pmacc random number generator - */ - template< typename T_Acc > - DINLINE T_Type getNormal( - T_Acc const & acc, - StateType& state - ) - { - constexpr T_Type valueTwoPI = 6.2831853071795860; + /** generate a normal distributed random number + * + * @param acc alpaka accelerator + * @param state the state of an pmacc random number generator + */ + template + DINLINE T_Type getNormal(T_Acc const& acc, StateType& state) + { + constexpr T_Type valueTwoPI = 6.2831853071795860; - T_Type u1 = UniformRng::operator()( - acc, - state - ); - T_Type u2 = UniformRng::operator()( - acc, - state - ) * valueTwoPI; + T_Type u1 = UniformRng::operator()(acc, state); + T_Type u2 = UniformRng::operator()(acc, state) * valueTwoPI; - T_Type s = algorithms::math::sqrt( T_Type( -2.0 ) * algorithms::math::log( u1 ) ); + T_Type s = cupla::math::sqrt(T_Type(-2.0) * cupla::math::log(u1)); - T_Type firstRngNumber; - algorithms::math::sincos( - u2, - firstRngNumber, - secondRngNumber - ); + T_Type firstRngNumber; + pmacc::math::sincos(u2, firstRngNumber, secondRngNumber); - firstRngNumber *= s; - secondRngNumber *= s; - hasSecondRngNumber = true; - return firstRngNumber; - } + firstRngNumber *= s; + secondRngNumber *= s; + hasSecondRngNumber = true; + return firstRngNumber; + } - public: - //! result type of the random number - using result_type = T_Type; + public: + //! result type of the random number + using result_type = T_Type; - /** generate a normal distributed random number - * - * Generates two random numbers with the first call, each second call - * the precomputed random number is returned. - * - * @param acc alpaka accelerator - * @param state the state of an pmacc random number generator - */ - template< typename T_Acc > - DINLINE result_type - operator()( - T_Acc const & acc, - StateType& state - ) - { - T_Type result; - if( hasSecondRngNumber ) - { - result = secondRngNumber; - hasSecondRngNumber = false; - } - else - { - result = getNormal( - acc, - state - ); - } - return result; - } - }; + /** generate a normal distributed random number + * + * Generates two random numbers with the first call, each second call + * the precomputed random number is returned. + * + * @param acc alpaka accelerator + * @param state the state of an pmacc random number generator + */ + template + DINLINE result_type operator()(T_Acc const& acc, StateType& state) + { + T_Type result; + if(hasSecondRngNumber) + { + result = secondRngNumber; + hasSecondRngNumber = false; + } + else + { + result = getNormal(acc, state); + } + return result; + } + }; -} // namespace distributions -} // namespace random -} // namespace pmacc + } // namespace distributions + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/distributions/normal/Normal_double.hpp b/include/pmacc/random/distributions/normal/Normal_double.hpp index 40337e07fe..2cd7570167 100644 --- a/include/pmacc/random/distributions/normal/Normal_double.hpp +++ b/include/pmacc/random/distributions/normal/Normal_double.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund, Rene Widera +/* Copyright 2015-2021 Alexander Grund, Rene Widera * * This file is part of PMacc. * @@ -34,51 +34,30 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ -namespace detail -{ + namespace random + { + namespace distributions + { + namespace detail + { /* XorMin and MRG32k3aMin uses the alpaka RNG as fallback for CPU accelerators * therefore we are not allowed to add a specialization for those RNG methods */ -#if( PMACC_CUDA_ENABLED == 1 ) - //! specialization for XorMin - template< - typename T_Acc - > - struct Normal< - double, - methods::XorMin< T_Acc >, - void - > : - public MullerBox< - double, - methods::XorMin< T_Acc > - > - { - - }; - - //! specialization for MRG32k3aMin - template< - typename T_Acc - > - struct Normal< - double, - methods::MRG32k3aMin< T_Acc >, - void - > : - public MullerBox< - double, - methods::MRG32k3aMin< T_Acc > - > - { - - }; +#if(PMACC_CUDA_ENABLED == 1 || ALPAKA_ACC_GPU_HIP_ENABLED == 1) + //! specialization for XorMin + template + struct Normal, void> : public MullerBox> + { + }; + + //! specialization for MRG32k3aMin + template + struct Normal, void> + : public MullerBox> + { + }; #endif -} // namespace detail -} // namespace distributions -} // namespace random -} // namespace pmacc + } // namespace detail + } // namespace distributions + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/distributions/normal/Normal_float.hpp b/include/pmacc/random/distributions/normal/Normal_float.hpp index 7b434806e0..bf4a89b34d 100644 --- a/include/pmacc/random/distributions/normal/Normal_float.hpp +++ b/include/pmacc/random/distributions/normal/Normal_float.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund, Rene Widera +/* Copyright 2015-2021 Alexander Grund, Rene Widera * * This file is part of PMacc. * @@ -34,51 +34,30 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ -namespace detail -{ + namespace random + { + namespace distributions + { + namespace detail + { /* XorMin and MRG32k3aMin uses the alpaka RNG as fallback for CPU accelerators * therefore we are not allowed to add a specialization for those RNG methods */ -#if( PMACC_CUDA_ENABLED == 1 ) - //! specialization for XorMin - template< - typename T_Acc - > - struct Normal< - float, - methods::XorMin< T_Acc >, - void - > : - public MullerBox< - float, - methods::XorMin< T_Acc > - > - { - - }; - - //! specialization for MRG32k3aMin - template< - typename T_Acc - > - struct Normal< - float, - methods::MRG32k3aMin< T_Acc >, - void - > : - public MullerBox< - float, - methods::MRG32k3aMin< T_Acc > - > - { - - }; +#if(PMACC_CUDA_ENABLED == 1 || ALPAKA_ACC_GPU_HIP_ENABLED == 1) + //! specialization for XorMin + template + struct Normal, void> : public MullerBox> + { + }; + + //! specialization for MRG32k3aMin + template + struct Normal, void> + : public MullerBox> + { + }; #endif -} // namespace detail -} // namespace distributions -} // namespace random -} // namespace pmacc + } // namespace detail + } // namespace distributions + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/distributions/normal/Normal_generic.hpp b/include/pmacc/random/distributions/normal/Normal_generic.hpp index 5be4bba79e..dffbc6805d 100644 --- a/include/pmacc/random/distributions/normal/Normal_generic.hpp +++ b/include/pmacc/random/distributions/normal/Normal_generic.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund, Rene Widera +/* Copyright 2015-2021 Alexander Grund, Rene Widera * * This file is part of PMacc. * @@ -29,43 +29,30 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ -namespace detail -{ - - //!Returns a normally distributed floating point with value with mean 0.0 and standard deviation 1.0 - template< - typename T_Type, - typename T_RNGMethod - > - class Normal< - T_Type, - T_RNGMethod, - void - > + namespace random { - using RNGMethod = T_RNGMethod; - using StateType = typename RNGMethod::StateType; - public: - using result_type = T_Type; - - template< typename T_Acc > - DINLINE result_type - operator()( - T_Acc const & acc, - StateType& state - ) + namespace distributions { - return ::alpaka::rand::distribution::createNormalReal< T_Type >( - acc - )( state ); - } - }; - -} // namespace detail -} // namespace distributions -} // namespace random -} // namespace pmacc + namespace detail + { + //! Returns a normally distributed floating point with value with mean 0.0 and standard deviation 1.0 + template + class Normal + { + using RNGMethod = T_RNGMethod; + using StateType = typename RNGMethod::StateType; + + public: + using result_type = T_Type; + + template + DINLINE result_type operator()(T_Acc const& acc, StateType& state) + { + return ::alpaka::rand::distribution::createNormalReal(acc)(state); + } + }; + + } // namespace detail + } // namespace distributions + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/distributions/uniform/Range.hpp b/include/pmacc/random/distributions/uniform/Range.hpp index 52c34f0ce4..140c2633d5 100644 --- a/include/pmacc/random/distributions/uniform/Range.hpp +++ b/include/pmacc/random/distributions/uniform/Range.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera * * This file is part of PMacc. * @@ -26,58 +26,60 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ -namespace uniform -{ - /** floating point number in the range (0,1] - * - * @tparam T_Type type of the result - * @return value in the range (0,1] - */ - template - struct ExcludeZero - {}; - - /** floating point number in the range [0,1) - * - * @tparam T_Type type of the result - */ - template - struct ExcludeOne + namespace random { + namespace distributions + { + namespace uniform + { + /** floating point number in the range (0,1] + * + * @tparam T_Type type of the result + * @return value in the range (0,1] + */ + template + struct ExcludeZero + { + }; - /** Reduce the random range - * - * number of unique random numbers for - * - `float` is `2^24` - * - `double` is `2^53` - * - * Creates intervals with the width of epsilon/2. - */ - struct Reduced - {}; - - /** Loops until a random value inside the defined range is created - * - * The runtime of this method is not deterministic. - * @warning zero is excluded which results in a range (0,1) - */ - struct Repeat - {}; + /** floating point number in the range [0,1) + * + * @tparam T_Type type of the result + */ + template + struct ExcludeOne + { + /** Reduce the random range + * + * number of unique random numbers for + * - `float` is `2^24` + * - `double` is `2^53` + * + * Creates intervals with the width of epsilon/2. + */ + struct Reduced + { + }; - /** Swap the value one to zero - * - * This method creates a small error in uniform distribution - */ - struct SwapOneToZero - {}; + /** Loops until a random value inside the defined range is created + * + * The runtime of this method is not deterministic. + * @warning zero is excluded which results in a range (0,1) + */ + struct Repeat + { + }; - }; + /** Swap the value one to zero + * + * This method creates a small error in uniform distribution + */ + struct SwapOneToZero + { + }; + }; -} // namespace uniform -} // namespace distributions -} // namespace random -} // namespace pmacc + } // namespace uniform + } // namespace distributions + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/distributions/uniform/Uniform_Integral32Bit.hpp b/include/pmacc/random/distributions/uniform/Uniform_Integral32Bit.hpp index 52c3b5be0d..688274c8e3 100644 --- a/include/pmacc/random/distributions/uniform/Uniform_Integral32Bit.hpp +++ b/include/pmacc/random/distributions/uniform/Uniform_Integral32Bit.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund +/* Copyright 2015-2021 Alexander Grund * * This file is part of PMacc. * @@ -27,44 +27,35 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ -namespace detail -{ - - /** - * Returns a random, uniformly distributed (up to) 32 bit integral value - */ - template - class Uniform< - T_Type, - T_RNGMethod, - typename bmpl::if_c< - boost::is_integral::value && sizeof(T_Type) <= 4, - void, - T_Type - >::type - > + namespace random { - typedef T_RNGMethod RNGMethod; - typedef typename RNGMethod::StateType StateType; - public: - typedef T_Type result_type; - - template< typename T_Acc > - DINLINE result_type - operator()( - T_Acc const & acc, - StateType& state - ) + namespace distributions { - return static_cast(RNGMethod().get32Bits(acc, state)); - } - }; + namespace detail + { + /** + * Returns a random, uniformly distributed (up to) 32 bit integral value + */ + template + class Uniform< + T_Type, + T_RNGMethod, + typename bmpl::if_c::value && sizeof(T_Type) <= 4, void, T_Type>::type> + { + typedef T_RNGMethod RNGMethod; + typedef typename RNGMethod::StateType StateType; + + public: + typedef T_Type result_type; + + template + DINLINE result_type operator()(T_Acc const& acc, StateType& state) + { + return static_cast(RNGMethod().get32Bits(acc, state)); + } + }; -} // namespace detail -} // namespace distributions -} // namespace random -} // namespace pmacc + } // namespace detail + } // namespace distributions + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/distributions/uniform/Uniform_Integral64Bit.hpp b/include/pmacc/random/distributions/uniform/Uniform_Integral64Bit.hpp index bfa1a6cbe4..51d0671af5 100644 --- a/include/pmacc/random/distributions/uniform/Uniform_Integral64Bit.hpp +++ b/include/pmacc/random/distributions/uniform/Uniform_Integral64Bit.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund, Rene Widera +/* Copyright 2015-2021 Alexander Grund, Rene Widera * * This file is part of PMacc. * @@ -29,50 +29,35 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ -namespace detail -{ - - /** - * Returns a random, uniformly distributed (up to) 64 bit integral value - */ - template< - typename T_Type, - class T_RNGMethod - > - class Uniform< - T_Type, - T_RNGMethod, - typename bmpl::if_c< - boost::is_integral< T_Type >::value && sizeof( T_Type ) == 8, - void, - T_Type - >::type - > + namespace random { - typedef T_RNGMethod RNGMethod; - typedef typename RNGMethod::StateType StateType; - public: - typedef T_Type result_type; - - template< typename T_Acc > - DINLINE result_type - operator()( - T_Acc const & acc, - StateType& state - ) + namespace distributions { - return static_cast< result_type >( RNGMethod().get64Bits( - acc, - state - ) ); - } - }; - -} // namespace detail -} // namespace distributions -} // namespace random -} // namespace pmacc + namespace detail + { + /** + * Returns a random, uniformly distributed (up to) 64 bit integral value + */ + template + class Uniform< + T_Type, + T_RNGMethod, + typename bmpl::if_c::value && sizeof(T_Type) == 8, void, T_Type>::type> + { + typedef T_RNGMethod RNGMethod; + typedef typename RNGMethod::StateType StateType; + + public: + typedef T_Type result_type; + + template + DINLINE result_type operator()(T_Acc const& acc, StateType& state) + { + return static_cast(RNGMethod().get64Bits(acc, state)); + } + }; + + } // namespace detail + } // namespace distributions + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/distributions/uniform/Uniform_double.hpp b/include/pmacc/random/distributions/uniform/Uniform_double.hpp index 6c3b7a5b60..441b1ac957 100644 --- a/include/pmacc/random/distributions/uniform/Uniform_double.hpp +++ b/include/pmacc/random/distributions/uniform/Uniform_double.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund, Rene Widera +/* Copyright 2015-2021 Alexander Grund, Rene Widera * * This file is part of PMacc. * @@ -27,151 +27,109 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ -namespace detail -{ - - /** Returns a random double value uniformly distributed in (0,1] - * - * The smallest created value is `2^-65` (~ `2.710505431213761*10^-20`) - */ - template - class Uniform< - uniform::ExcludeZero, - T_RNGMethod, - void - > + namespace random { - public: - typedef T_RNGMethod RNGMethod; - typedef typename RNGMethod::StateType StateType; - typedef double result_type; - - template< typename T_Acc > - DINLINE double - operator()( - T_Acc const & acc, - StateType& state - ) const + namespace distributions { - double const value2pow64Inv = 5.421010862427522e-20; - uint64_t const random = RNGMethod().get64Bits( - acc, - state - ); - return static_cast< double >( random ) * value2pow64Inv + - ( value2pow64Inv / 2.0 ); - } - }; + namespace detail + { + /** Returns a random double value uniformly distributed in (0,1] + * + * The smallest created value is `2^-65` (~ `2.710505431213761*10^-20`) + */ + template + class Uniform, T_RNGMethod, void> + { + public: + typedef T_RNGMethod RNGMethod; + typedef typename RNGMethod::StateType StateType; + typedef double result_type; - /** Returns a random double value uniformly distributed in [0,1) - * - * Swap the value one to zero (creates a small error in uniform distribution) - */ - template - class Uniform< - uniform::ExcludeOne< double >::SwapOneToZero, - T_RNGMethod, - void - > - { - public: - typedef T_RNGMethod RNGMethod; - typedef typename RNGMethod::StateType StateType; - typedef double result_type; + template + DINLINE double operator()(T_Acc const& acc, StateType& state) const + { + double const value2pow64Inv = 5.421010862427522e-20; + uint64_t const random = RNGMethod().get64Bits(acc, state); + return static_cast(random) * value2pow64Inv + (value2pow64Inv / 2.0); + } + }; - template< typename T_Acc > - DINLINE double - operator()( - T_Acc const & acc, - StateType& state - ) const - { - double const randomValue = - pmacc::random::distributions::Uniform< - uniform::ExcludeZero< double >, - RNGMethod - >()(acc, state); - return randomValue == 1.0 ? 0.0 : randomValue; - } - }; + /** Returns a random double value uniformly distributed in [0,1) + * + * Swap the value one to zero (creates a small error in uniform distribution) + */ + template + class Uniform::SwapOneToZero, T_RNGMethod, void> + { + public: + typedef T_RNGMethod RNGMethod; + typedef typename RNGMethod::StateType StateType; + typedef double result_type; - /** Returns a random double value uniformly distributed in [0,1) - * - * Number of unique random numbers is reduced to `2^53`. - * Uses a uniform distance of `2^-53` (`epsilon/2`) between each possible - * random number. - */ - template - class Uniform< - uniform::ExcludeOne< double >::Reduced, - T_RNGMethod, - void - > - { - public: - typedef T_RNGMethod RNGMethod; - typedef typename RNGMethod::StateType StateType; - typedef double result_type; + template + DINLINE double operator()(T_Acc const& acc, StateType& state) const + { + double const randomValue + = pmacc::random::distributions::Uniform, RNGMethod>()( + acc, + state); + return randomValue == 1.0 ? 0.0 : randomValue; + } + }; - template< typename T_Acc > - DINLINE double - operator()( - T_Acc const & acc, - StateType& state - ) const - { - double const value2pow53Inv = 1.1102230246251565e-16; - double const randomValue53Bit = RNGMethod().get64Bits( acc, state ) >> 11; - return randomValue53Bit * value2pow53Inv; - } - }; + /** Returns a random double value uniformly distributed in [0,1) + * + * Number of unique random numbers is reduced to `2^53`. + * Uses a uniform distance of `2^-53` (`epsilon/2`) between each possible + * random number. + */ + template + class Uniform::Reduced, T_RNGMethod, void> + { + public: + typedef T_RNGMethod RNGMethod; + typedef typename RNGMethod::StateType StateType; + typedef double result_type; - /** Returns a random double value uniformly distributed in (0,1) - * - * Loops until a random value inside the defined range is created. - * The runtime of this method is not deterministic. - */ - template< - class T_RNGMethod - > - class Uniform< - typename uniform::ExcludeOne< double >::Repeat, - T_RNGMethod, - void - > - { - public: - typedef T_RNGMethod RNGMethod; - typedef typename RNGMethod::StateType StateType; - typedef double result_type; + template + DINLINE double operator()(T_Acc const& acc, StateType& state) const + { + double const value2pow53Inv = 1.1102230246251565e-16; + double const randomValue53Bit = RNGMethod().get64Bits(acc, state) >> 11; + return randomValue53Bit * value2pow53Inv; + } + }; - template< typename T_Acc > - DINLINE result_type - operator()( - T_Acc const & acc, - StateType& state - ) const - { - do - { - const double randomValue = - pmacc::random::distributions::Uniform< - uniform::ExcludeZero< double >, - RNGMethod - >()(acc, state); + /** Returns a random double value uniformly distributed in (0,1) + * + * Loops until a random value inside the defined range is created. + * The runtime of this method is not deterministic. + */ + template + class Uniform::Repeat, T_RNGMethod, void> + { + public: + typedef T_RNGMethod RNGMethod; + typedef typename RNGMethod::StateType StateType; + typedef double result_type; + + template + DINLINE result_type operator()(T_Acc const& acc, StateType& state) const + { + do + { + const double randomValue + = pmacc::random::distributions::Uniform, RNGMethod>()( + acc, + state); - if( randomValue != 1.0 ) - return randomValue; - } - while(true); - } - }; + if(randomValue != 1.0) + return randomValue; + } while(true); + } + }; -} // namespace detail -} // namespace distributions -} // namespace random -} // namespace pmacc + } // namespace detail + } // namespace distributions + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/distributions/uniform/Uniform_float.hpp b/include/pmacc/random/distributions/uniform/Uniform_float.hpp index 4477f6ba35..707c17cc0a 100644 --- a/include/pmacc/random/distributions/uniform/Uniform_float.hpp +++ b/include/pmacc/random/distributions/uniform/Uniform_float.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund, Rene Widera +/* Copyright 2015-2021 Alexander Grund, Rene Widera * * This file is part of PMacc. * @@ -27,146 +27,109 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ -namespace detail -{ - - /** Returns a random float value uniformly distributed in (0,1] - * - * The smallest created value is `2^-33` (~ `1.164*10^-10`) - */ - template - class Uniform< - uniform::ExcludeZero, - T_RNGMethod, - void - > + namespace random { - public: - typedef T_RNGMethod RNGMethod; - typedef typename RNGMethod::StateType StateType; - typedef float result_type; - - template< typename T_Acc > - DINLINE float - operator()( - T_Acc const & acc, - StateType& state - ) const + namespace distributions { - const float value2pow32Inv = 2.3283064e-10f; - const uint32_t random = RNGMethod().get32Bits(acc, state); - return static_cast( random ) * value2pow32Inv + - ( value2pow32Inv / 2.0f ); - } - }; + namespace detail + { + /** Returns a random float value uniformly distributed in (0,1] + * + * The smallest created value is `2^-33` (~ `1.164*10^-10`) + */ + template + class Uniform, T_RNGMethod, void> + { + public: + typedef T_RNGMethod RNGMethod; + typedef typename RNGMethod::StateType StateType; + typedef float result_type; - /** Returns a random float value uniformly distributed in [0,1) - * - * Swap the value one to zero (creates a small error in uniform distribution) - */ - template - class Uniform< - uniform::ExcludeOne::SwapOneToZero, - T_RNGMethod, - void - > - { - public: - typedef T_RNGMethod RNGMethod; - typedef typename RNGMethod::StateType StateType; - typedef float result_type; + template + DINLINE float operator()(T_Acc const& acc, StateType& state) const + { + const float value2pow32Inv = 2.3283064e-10f; + const uint32_t random = RNGMethod().get32Bits(acc, state); + return static_cast(random) * value2pow32Inv + (value2pow32Inv / 2.0f); + } + }; - template< typename T_Acc > - DINLINE float - operator()( - T_Acc const & acc, - StateType& state - ) const - { - const float randomValue = - pmacc::random::distributions::Uniform< - uniform::ExcludeZero, - RNGMethod - >()(acc, state); - return randomValue == 1.0f ? 0.0f : randomValue; - } - }; + /** Returns a random float value uniformly distributed in [0,1) + * + * Swap the value one to zero (creates a small error in uniform distribution) + */ + template + class Uniform::SwapOneToZero, T_RNGMethod, void> + { + public: + typedef T_RNGMethod RNGMethod; + typedef typename RNGMethod::StateType StateType; + typedef float result_type; - /** Returns a random float value uniformly distributed in [0,1) - * - * Number of unique random numbers is reduced to `2^24`. - * Uses a uniform distance of `2^-24` (`epsilon/2`) between each possible - * random number. - */ - template - class Uniform< - uniform::ExcludeOne::Reduced, - T_RNGMethod, - void - > - { - public: - typedef T_RNGMethod RNGMethod; - typedef typename RNGMethod::StateType StateType; - typedef float result_type; + template + DINLINE float operator()(T_Acc const& acc, StateType& state) const + { + const float randomValue + = pmacc::random::distributions::Uniform, RNGMethod>()( + acc, + state); + return randomValue == 1.0f ? 0.0f : randomValue; + } + }; - template< typename T_Acc > - DINLINE float - operator()( - T_Acc const & acc, - StateType& state - ) const - { - const float value2pow24Inv = 5.9604645e-08f; - const float randomValue24Bit = RNGMethod().get32Bits(acc, state) >> 8; - return static_cast( randomValue24Bit ) * value2pow24Inv; - } - }; + /** Returns a random float value uniformly distributed in [0,1) + * + * Number of unique random numbers is reduced to `2^24`. + * Uses a uniform distance of `2^-24` (`epsilon/2`) between each possible + * random number. + */ + template + class Uniform::Reduced, T_RNGMethod, void> + { + public: + typedef T_RNGMethod RNGMethod; + typedef typename RNGMethod::StateType StateType; + typedef float result_type; - /** Returns a random float value uniformly distributed in (0,1) - * - * Loops until a random value inside the defined range is created. - * The runtime of this method is not deterministic. - */ - template - class Uniform< - typename uniform::ExcludeOne::Repeat, - T_RNGMethod, - void - > - { - public: - typedef T_RNGMethod RNGMethod; - typedef typename RNGMethod::StateType StateType; - typedef float result_type; + template + DINLINE float operator()(T_Acc const& acc, StateType& state) const + { + const float value2pow24Inv = 5.9604645e-08f; + const float randomValue24Bit = RNGMethod().get32Bits(acc, state) >> 8; + return static_cast(randomValue24Bit) * value2pow24Inv; + } + }; - template< typename T_Acc > - DINLINE float - operator()( - T_Acc const & acc, - StateType& state - ) const - { - do - { - const float randomValue = - pmacc::random::distributions::Uniform< - uniform::ExcludeZero, - RNGMethod - >()(acc, state); + /** Returns a random float value uniformly distributed in (0,1) + * + * Loops until a random value inside the defined range is created. + * The runtime of this method is not deterministic. + */ + template + class Uniform::Repeat, T_RNGMethod, void> + { + public: + typedef T_RNGMethod RNGMethod; + typedef typename RNGMethod::StateType StateType; + typedef float result_type; + + template + DINLINE float operator()(T_Acc const& acc, StateType& state) const + { + do + { + const float randomValue + = pmacc::random::distributions::Uniform, RNGMethod>()( + acc, + state); - if( randomValue != 1.0f ) - return randomValue; - } - while(true); - } - }; + if(randomValue != 1.0f) + return randomValue; + } while(true); + } + }; -} // namespace detail -} // namespace distributions -} // namespace random -} // namespace pmacc + } // namespace detail + } // namespace distributions + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/distributions/uniform/Uniform_generic.hpp b/include/pmacc/random/distributions/uniform/Uniform_generic.hpp index 64b551444f..5b9e8269c1 100644 --- a/include/pmacc/random/distributions/uniform/Uniform_generic.hpp +++ b/include/pmacc/random/distributions/uniform/Uniform_generic.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund, Rene Widera +/* Copyright 2015-2021 Alexander Grund, Rene Widera * * This file is part of PMacc. * @@ -29,57 +29,40 @@ namespace pmacc { -namespace random -{ -namespace distributions -{ -namespace detail -{ - - /** Returns a random floating point value uniformly distributed in [0,1) - * - * Equivalent to uniform::ExcludeOne< T_Type >::Reduced - */ - template< - typename T_Type, - class T_RNGMethod - > - class Uniform< - T_Type, - T_RNGMethod, - typename std::enable_if< - std::is_floating_point< T_Type >::value - >::type - > : - public pmacc::random::distributions::Uniform< - typename uniform::ExcludeOne< T_Type >::Reduced, - T_RNGMethod - > + namespace random { - }; + namespace distributions + { + namespace detail + { + /** Returns a random floating point value uniformly distributed in [0,1) + * + * Equivalent to uniform::ExcludeOne< T_Type >::Reduced + */ + template + class Uniform< + T_Type, + T_RNGMethod, + typename std::enable_if::value>::type> + : public pmacc::random::distributions:: + Uniform::Reduced, T_RNGMethod> + { + }; - /** Returns a random floating point value uniformly distributed in [0,1) - * - * Equivalent to uniform::ExcludeOne< T_Type >::Reduced - */ - template< - typename T_Type, - class T_RNGMethod - > - class Uniform< - uniform::ExcludeOne< T_Type>, - T_RNGMethod, - typename std::enable_if< - std::is_floating_point< T_Type >::value - >::type - > : - public pmacc::random::distributions::Uniform< - typename uniform::ExcludeOne< T_Type >::Reduced, - T_RNGMethod - > - { - }; -} // namespace detail -} // namespace distributions -} // namespace random -} // namespace pmacc + /** Returns a random floating point value uniformly distributed in [0,1) + * + * Equivalent to uniform::ExcludeOne< T_Type >::Reduced + */ + template + class Uniform< + uniform::ExcludeOne, + T_RNGMethod, + typename std::enable_if::value>::type> + : public pmacc::random::distributions:: + Uniform::Reduced, T_RNGMethod> + { + }; + } // namespace detail + } // namespace distributions + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/methods/AlpakaRand.hpp b/include/pmacc/random/methods/AlpakaRand.hpp index efe66c50d6..4a273952c4 100644 --- a/include/pmacc/random/methods/AlpakaRand.hpp +++ b/include/pmacc/random/methods/AlpakaRand.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund, Rene Widera +/* Copyright 2015-2021 Alexander Grund, Rene Widera * * This file is part of PMacc. * @@ -25,72 +25,46 @@ namespace pmacc { -namespace random -{ -namespace methods -{ - - template< typename T_Acc = cupla::Acc> - class AlpakaRand + namespace random { - public: - using StateType = - decltype( - ::alpaka::rand::generator::createDefault( - alpaka::core::declval(), - alpaka::core::declval(), - alpaka::core::declval() - ) - ); - - DINLINE void - init( - T_Acc const & acc, - StateType& state, - uint32_t seed, - uint32_t subsequence = 0 - ) const + namespace methods { - state = ::alpaka::rand::generator::createDefault( - acc, - seed, - subsequence - ); - } + template + class AlpakaRand + { + public: + using StateType = decltype(::alpaka::rand::generator::createDefault( + alpaka::core::declval(), + alpaka::core::declval(), + alpaka::core::declval())); - DINLINE uint32_t - get32Bits( - T_Acc const & acc, - StateType& state - ) const - { - return ::alpaka::rand::distribution::createUniformUint< uint32_t >( - acc - )( state ); - } + DINLINE void init(T_Acc const& acc, StateType& state, uint32_t seed, uint32_t subsequence = 0) const + { + state = ::alpaka::rand::generator::createDefault(acc, seed, subsequence); + } - DINLINE uint64_t - get64Bits( - T_Acc const & acc, - StateType& state - ) const - { - /* Two 32bit values are packed into a 64bit value because alpaka is not - * supporting 64bit integer random numbers - */ - uint64_t result = get32Bits( acc, state); - result <<= 32; - result ^= get32Bits( acc, state); - return result; - } + DINLINE uint32_t get32Bits(T_Acc const& acc, StateType& state) const + { + return ::alpaka::rand::distribution::createUniformUint(acc)(state); + } - static std::string - getName() - { - return "AlpakaRand"; - } - }; + DINLINE uint64_t get64Bits(T_Acc const& acc, StateType& state) const + { + /* Two 32bit values are packed into a 64bit value because alpaka is not + * supporting 64bit integer random numbers + */ + uint64_t result = get32Bits(acc, state); + result <<= 32; + result ^= get32Bits(acc, state); + return result; + } + + static std::string getName() + { + return "AlpakaRand"; + } + }; -} // namespace methods -} // namespace random -} // namespace pmacc + } // namespace methods + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/methods/MRG32k3aMin.hpp b/include/pmacc/random/methods/MRG32k3aMin.hpp index 62a9768b6b..01694f61d7 100644 --- a/include/pmacc/random/methods/MRG32k3aMin.hpp +++ b/include/pmacc/random/methods/MRG32k3aMin.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Grund, Rene Widera +/* Copyright 2016-2021 Alexander Grund, Rene Widera * * This file is part of PMacc. * @@ -24,112 +24,81 @@ #include "pmacc/types.hpp" #include "pmacc/static_assert.hpp" -#if( PMACC_CUDA_ENABLED != 1 ) -# include "pmacc/random/methods/AlpakaRand.hpp" +#if(PMACC_CUDA_ENABLED != 1) +# include "pmacc/random/methods/AlpakaRand.hpp" #else -# include +# include #endif namespace pmacc { -namespace random -{ -namespace methods -{ - -#if( PMACC_CUDA_ENABLED != 1 ) - //! fallback to alpaka RNG if a cpu accelerator is used - template< typename T_Acc = cupla::Acc> - using MRG32k3aMin = AlpakaRand< T_Acc >; -#else - //! Mersenne-Twister random number generator with a reduced state - template< typename T_Acc = cupla::Acc> - class MRG32k3aMin + namespace random { - public: - struct StateType + namespace methods { - double s1[3]; - double s2[3]; - }; +#if(PMACC_CUDA_ENABLED != 1) + //! fallback to alpaka RNG if a cpu accelerator is used + template + using MRG32k3aMin = AlpakaRand; +#else + //! Mersenne-Twister random number generator with a reduced state + template + class MRG32k3aMin + { + public: + struct StateType + { + double s1[3]; + double s2[3]; + }; - DINLINE void - init( - T_Acc const & acc, - StateType & state, - uint32_t seed, - uint32_t subsequence = 0 - ) const - { - curandStateMRG32k3a tmpState; - curand_init( - seed, - subsequence, - 0, - &tmpState - ); - AssignState(state, tmpState); - } + DINLINE void init(T_Acc const& acc, StateType& state, uint32_t seed, uint32_t subsequence = 0) const + { + curandStateMRG32k3a tmpState; + curand_init(seed, subsequence, 0, &tmpState); + AssignState(state, tmpState); + } - DINLINE uint32_t - get32Bits( - T_Acc const & acc, - StateType& state - ) const - { - /* We can do this cast if: 1) Only state data is used and - * 2) Data is aligned and positioned the same way - */ - return curand( reinterpret_cast< curandStateMRG32k3a* >( &state ) ); - } + DINLINE uint32_t get32Bits(T_Acc const& acc, StateType& state) const + { + /* We can do this cast if: 1) Only state data is used and + * 2) Data is aligned and positioned the same way + */ + return curand(reinterpret_cast(&state)); + } - DINLINE uint64_t - get64Bits( - T_Acc const & acc, - StateType& state - ) const - { - // two 32bit values are packed into a 64bit value - uint64_t result = get32Bits( acc, state); - result <<= 32; - result ^= get32Bits( acc, state); - return result; - } + DINLINE uint64_t get64Bits(T_Acc const& acc, StateType& state) const + { + // two 32bit values are packed into a 64bit value + uint64_t result = get32Bits(acc, state); + result <<= 32; + result ^= get32Bits(acc, state); + return result; + } - static std::string - getName() - { - return "MRG32k3aMin"; - } + static std::string getName() + { + return "MRG32k3aMin"; + } - private: - // Sizes must match - PMACC_STATIC_ASSERT_MSG( - sizeof( StateType::s1 ) == sizeof( curandStateMRG32k3a::s1 ), - Unexpected_sizes - ); - PMACC_STATIC_ASSERT_MSG( - sizeof( StateType::s2 ) == sizeof( curandStateMRG32k3a::s2 ), - Unexpected_sizes - ); - // Offsets must match - PMACC_STATIC_ASSERT_MSG( - offsetof( StateType, s1 ) == offsetof( curandStateMRG32k3a, s1 ) && - offsetof( StateType, s2 ) == offsetof( curandStateMRG32k3a, s2 ), - Incompatible_structs - ); + private: + // Sizes must match + PMACC_STATIC_ASSERT_MSG(sizeof(StateType::s1) == sizeof(curandStateMRG32k3a::s1), Unexpected_sizes); + PMACC_STATIC_ASSERT_MSG(sizeof(StateType::s2) == sizeof(curandStateMRG32k3a::s2), Unexpected_sizes); + // Offsets must match + PMACC_STATIC_ASSERT_MSG( + offsetof(StateType, s1) == offsetof(curandStateMRG32k3a, s1) + && offsetof(StateType, s2) == offsetof(curandStateMRG32k3a, s2), + Incompatible_structs); - static DINLINE void AssignState( - StateType& dest, - curandStateMRG32k3a const & src - ) - { - // Check if we can do this cast - dest = reinterpret_cast< StateType const & >( src ); - } - }; + static DINLINE void AssignState(StateType& dest, curandStateMRG32k3a const& src) + { + // Check if we can do this cast + dest = reinterpret_cast(src); + } + }; #endif -} // namespace methods -} // namespace random -} // namespace pmacc + } // namespace methods + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/methods/RngPlaceholder.hpp b/include/pmacc/random/methods/RngPlaceholder.hpp index 22a1a427ed..5bf94b1ae4 100644 --- a/include/pmacc/random/methods/RngPlaceholder.hpp +++ b/include/pmacc/random/methods/RngPlaceholder.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund, Rene Widera +/* Copyright 2015-2021 Alexander Grund, Rene Widera * * This file is part of PMacc. * @@ -26,17 +26,16 @@ namespace pmacc { -namespace random -{ -namespace methods -{ - - //! placeholder for the rng method - struct RngPlaceholder + namespace random { - using StateType = int; - }; + namespace methods + { + //! placeholder for the rng method + struct RngPlaceholder + { + using StateType = int; + }; -} // namespace methods -} // namespace random -} // namespace pmacc + } // namespace methods + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/methods/XorMin.hpp b/include/pmacc/random/methods/XorMin.hpp index 10850befcf..dcea1c86b9 100644 --- a/include/pmacc/random/methods/XorMin.hpp +++ b/include/pmacc/random/methods/XorMin.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Alexander Grund, Rene Widera +/* Copyright 2015-2021 Alexander Grund, Rene Widera * * This file is part of PMacc. * @@ -24,115 +24,115 @@ #include "pmacc/types.hpp" #include "pmacc/static_assert.hpp" -#if( PMACC_CUDA_ENABLED != 1 ) -# include "pmacc/random/methods/AlpakaRand.hpp" +#if(BOOST_LANG_CUDA) +# include +#elif(BOOST_LANG_HIP) +# include #else -# include +# include "pmacc/random/methods/AlpakaRand.hpp" #endif namespace pmacc { -namespace random -{ -namespace methods -{ - -#if( PMACC_CUDA_ENABLED != 1 ) - //! fallback to alpaka RNG if a cpu accelerator is used - template< typename T_Acc = cupla::Acc> - using XorMin = AlpakaRand< T_Acc >; -#else - //! Uses the CUDA XORWOW RNG but does not store state members required for normal distribution - template< typename T_Acc = cupla::Acc> - class XorMin + namespace random { - public: - class StateType + namespace methods { - public: - PMACC_ALIGN( - d, - unsigned int - ); - PMACC_ALIGN( - v[ 5 ], - unsigned int - ); - - HDINLINE StateType( ) - { } - - DINLINE StateType( curandStateXORWOW_t const & other ): d( other.d ) +#if(ALPAKA_ACC_GPU_CUDA_ENABLED || ALPAKA_ACC_GPU_HIP_ENABLED) + //! Uses the CUDA XORWOW RNG but does not store state members required for normal distribution + template + class XorMin { - PMACC_STATIC_ASSERT_MSG( - sizeof( v ) == sizeof( other.v ), - Unexpected_sizes - ); - for( unsigned i = 0; i < sizeof( v ) / sizeof( v[ 0 ] ); i++ ) - v[ i ] = other.v[ i ]; - } - }; - - DINLINE void - init( - T_Acc const & acc, - StateType & state, - uint32_t seed, - uint32_t subsequence = 0 - ) const - { - curandStateXORWOW_t tmpState; - curand_init( - seed, - subsequence, - 0, - &tmpState - ); - state = tmpState; - } - - DINLINE uint32_t - get32Bits( - T_Acc const & acc, - StateType & state - ) const - { - /* This generator uses the xorwow formula of - * www.jstatsoft.org/v08/i14/paper page 5 - * Has period 2^192 - 2^32. - */ - uint32_t t; - t = ( state.v[ 0 ] ^ ( state.v[ 0 ] >> 2 ) ); - state.v[ 0 ] = state.v[ 1 ]; - state.v[ 1 ] = state.v[ 2 ]; - state.v[ 2 ] = state.v[ 3 ]; - state.v[ 3 ] = state.v[ 4 ]; - state.v[ 4 ] = ( state.v[ 4 ] ^ ( state.v[ 4 ] << 4 ) ) ^ ( t ^ ( t << 1 ) ); - state.d += 362437; - return state.v[ 4 ] + state.d; - } - - DINLINE uint64_t - get64Bits( - T_Acc const & acc, - StateType& state - ) const - { - // two 32bit values are packed into a 64bit value - uint64_t result = get32Bits( acc, state); - result <<= 32; - result ^= get32Bits( acc, state); - return result; - } - - static std::string - getName( ) - { - return "XorMin"; - } - }; +# if(BOOST_LANG_HIP) + using NativeStateType = hiprandStateXORWOW_t; +# elif(BOOST_LANG_CUDA) + using NativeStateType = curandStateXORWOW_t; +# endif + + public: + class StateType + { + public: + PMACC_ALIGN(d, unsigned int); + PMACC_ALIGN(v[5], unsigned int); + + HDINLINE StateType() + { + } + + DINLINE StateType(NativeStateType const& other) + { +# if(BOOST_LANG_HIP) + // @todo avoid using pointer casts to copy the rng state + auto baseObjectPtr + = reinterpret_cast(&other); + d = baseObjectPtr->d; + auto const* nativeStateArray = baseObjectPtr->x; + PMACC_STATIC_ASSERT_MSG(sizeof(v) == sizeof(baseObjectPtr->x), Unexpected_sizes); +# elif(BOOST_LANG_CUDA) + d = other.d; + auto const* nativeStateArray = other.v; + PMACC_STATIC_ASSERT_MSG(sizeof(v) == sizeof(other.v), Unexpected_sizes); +# endif + for(unsigned i = 0; i < sizeof(v) / sizeof(v[0]); i++) + v[i] = nativeStateArray[i]; + } + }; + + DINLINE void init(T_Acc const& acc, StateType& state, uint32_t seed, uint32_t subsequence = 0) const + { + NativeStateType tmpState; + +# if(ALPAKA_ACC_GPU_HIP_ENABLED == 1) +# define PMACC_RNG_INIT_FN hiprand_init +# elif(ALPAKA_ACC_GPU_CUDA_ENABLED == 1) +# define PMACC_RNG_INIT_FN curand_init +# endif + + PMACC_RNG_INIT_FN(seed, subsequence, 0, &tmpState); + +# undef PMACC_RNG_INIT_FN + + state = tmpState; + } + + DINLINE uint32_t get32Bits(T_Acc const& acc, StateType& state) const + { + /* This generator uses the xorwow formula of + * www.jstatsoft.org/v08/i14/paper page 5 + * Has period 2^192 - 2^32. + */ + uint32_t t; + t = (state.v[0] ^ (state.v[0] >> 2)); + state.v[0] = state.v[1]; + state.v[1] = state.v[2]; + state.v[2] = state.v[3]; + state.v[3] = state.v[4]; + state.v[4] = (state.v[4] ^ (state.v[4] << 4)) ^ (t ^ (t << 1)); + state.d += 362437; + return state.v[4] + state.d; + } + + DINLINE uint64_t get64Bits(T_Acc const& acc, StateType& state) const + { + // two 32bit values are packed into a 64bit value + uint64_t result = get32Bits(acc, state); + result <<= 32; + result ^= get32Bits(acc, state); + return result; + } + + static std::string getName() + { + return "XorMin"; + } + }; +#else + //! fallback to alpaka RNG if a cpu accelerator is used + template + using XorMin = AlpakaRand; #endif -} // namespace methods -} // namespace random -} // namespace pmacc + } // namespace methods + } // namespace random +} // namespace pmacc diff --git a/include/pmacc/random/methods/methods.hpp b/include/pmacc/random/methods/methods.hpp index 419a7634b2..e8cc891408 100644 --- a/include/pmacc/random/methods/methods.hpp +++ b/include/pmacc/random/methods/methods.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PMacc. * diff --git a/include/pmacc/result_of_Functor.hpp b/include/pmacc/result_of_Functor.hpp index 818f195701..62a1241440 100644 --- a/include/pmacc/result_of_Functor.hpp +++ b/include/pmacc/result_of_Functor.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -27,26 +27,28 @@ namespace mpl = boost::mpl; namespace pmacc { -namespace result_of -{ -template -struct Functor -{ - typedef typename _Functor::result_type type; -}; + namespace result_of + { + template< + typename _Functor, + typename Arg0 = mpl::void_, + typename Arg1 = mpl::void_, + typename Arg2 = mpl::void_, + typename Arg3 = mpl::void_, + typename Arg4 = mpl::void_, + typename Arg5 = mpl::void_, + typename Arg6 = mpl::void_, + typename Arg7 = mpl::void_, + typename Arg8 = mpl::void_, + typename Arg9 = mpl::void_, + typename Arg10 = mpl::void_, + typename Arg11 = mpl::void_, + typename Arg12 = mpl::void_, + typename dummy = mpl::void_> + struct Functor + { + typedef typename _Functor::result_type type; + }; -} // result_of -} // PMacc + } // namespace result_of +} // namespace pmacc diff --git a/include/pmacc/simulationControl/SimulationDescription.hpp b/include/pmacc/simulationControl/SimulationDescription.hpp index 31fd6afb65..5f8133ee8c 100644 --- a/include/pmacc/simulationControl/SimulationDescription.hpp +++ b/include/pmacc/simulationControl/SimulationDescription.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Axel Huebl +/* Copyright 2015-2021 Axel Huebl * * This file is part of PMacc. * @@ -27,106 +27,101 @@ namespace pmacc { -namespace simulationControl -{ - -/** - * Provides convenience methods for querying general simulation information. - * Singleton class. - */ -class SimulationDescription -{ -public: - /** Return author of the simulation setup. - * - * The author that runs the simulation and is responsible for created - * output files. - * - * @return std::string with author name, can be empty - */ - std::string getAuthor() - { - return author; - } - - /** Set author - * - * @see getAuthor - * - * @param[in] std::string setAuthor - */ - void setAuthor( const std::string setAuthor ) - { - this->author = setAuthor; - } - - /** Return last time step of simulation - * - * @return uint32_t last step of the simulation to run to - */ - uint32_t getRunSteps() - { - return runSteps; - } - - /** Set last time step of simulation - * - * @see getRunSteps - * - * @param[in] uint32_t setRunSteps - */ - void setRunSteps( const uint32_t setRunSteps ) - { - runSteps = setRunSteps; - } - - /** Returns the current time step of the simulation - * - * \return uint32_t current time step - */ - uint32_t getCurrentStep() - { - return currentStep; - } - - /** Set the current time step - * - * @see getCurrentStep - * - * @param[in] uint32_t setCurrentStep - */ - void setCurrentStep( const uint32_t setCurrentStep ) + namespace simulationControl { - currentStep = setCurrentStep; - } - -protected: - /** author that runs the simulation */ - std::string author; - - /** maximum step to run this simulation to */ - uint32_t runSteps; - - /** current time step of simulation */ - uint32_t currentStep; - -private: - - friend struct detail::Environment; - - static SimulationDescription& getInstance() - { - static SimulationDescription instance; - return instance; - } - - SimulationDescription() : - author(""), - runSteps(0), - currentStep(0) - { - } -}; - -} // namespace simulationControl + /** + * Provides convenience methods for querying general simulation information. + * Singleton class. + */ + class SimulationDescription + { + public: + /** Return author of the simulation setup. + * + * The author that runs the simulation and is responsible for created + * output files. + * + * @return std::string with author name, can be empty + */ + std::string getAuthor() + { + return author; + } + + /** Set author + * + * @see getAuthor + * + * @param[in] std::string setAuthor + */ + void setAuthor(const std::string setAuthor) + { + this->author = setAuthor; + } + + /** Return last time step of simulation + * + * @return uint32_t last step of the simulation to run to + */ + uint32_t getRunSteps() + { + return runSteps; + } + + /** Set last time step of simulation + * + * @see getRunSteps + * + * @param[in] uint32_t setRunSteps + */ + void setRunSteps(const uint32_t setRunSteps) + { + runSteps = setRunSteps; + } + + /** Returns the current time step of the simulation + * + * \return uint32_t current time step + */ + uint32_t getCurrentStep() + { + return currentStep; + } + + /** Set the current time step + * + * @see getCurrentStep + * + * @param[in] uint32_t setCurrentStep + */ + void setCurrentStep(const uint32_t setCurrentStep) + { + currentStep = setCurrentStep; + } + + protected: + /** author that runs the simulation */ + std::string author; + + /** maximum step to run this simulation to */ + uint32_t runSteps; + + /** current time step of simulation */ + uint32_t currentStep; + + private: + friend struct detail::Environment; + + static SimulationDescription& getInstance() + { + static SimulationDescription instance; + return instance; + } + + SimulationDescription() : author(""), runSteps(0), currentStep(0) + { + } + }; + + } // namespace simulationControl } // namespace pmacc diff --git a/include/pmacc/simulationControl/SimulationHelper.hpp b/include/pmacc/simulationControl/SimulationHelper.hpp index e6a3625118..34bbdf5d54 100644 --- a/include/pmacc/simulationControl/SimulationHelper.hpp +++ b/include/pmacc/simulationControl/SimulationHelper.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Rene Widera, Alexander Debus, +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Rene Widera, Alexander Debus, * Benjamin Worpitz, Alexander Grund * * This file is part of PMacc. @@ -44,433 +44,432 @@ namespace pmacc { - -/** - * Abstract base class for simulations. - * - * Use this helper class to write your own concrete simulations - * by binding pure virtual methods. - * - * @tparam DIM base dimension for the simulation (2-3) - */ -template -class SimulationHelper : public IPlugin -{ -public: - - using SeqOfTimeSlices = std::vector< pluginSystem::TimeSlice >; - /** - * Constructor + * Abstract base class for simulations. * + * Use this helper class to write your own concrete simulations + * by binding pure virtual methods. + * + * @tparam DIM base dimension for the simulation (2-3) */ - SimulationHelper() : - runSteps(0), - checkpointDirectory("checkpoints"), - numCheckpoints(0), - restartStep(-1), - restartDirectory("checkpoints"), - restartRequested(false), - CHECKPOINT_MASTER_FILE("checkpoints.txt"), - author("") - { - tSimulation.toggleStart(); - tInit.toggleStart(); - } - - virtual ~SimulationHelper() + template + class SimulationHelper : public IPlugin { - tSimulation.toggleEnd(); - if (output) + public: + using SeqOfTimeSlices = std::vector; + + /** + * Constructor + * + */ + SimulationHelper() + : runSteps(0) + , checkpointDirectory("checkpoints") + , numCheckpoints(0) + , restartStep(-1) + , restartDirectory("checkpoints") + , restartRequested(false) + , CHECKPOINT_MASTER_FILE("checkpoints.txt") + , author("") + , useMpiDirect(false) { - std::cout << "full simulation time: " << - tSimulation.printInterval() << " = " << - (uint64_t) (tSimulation.getInterval() / 1000.) << " sec" << std::endl; + tSimulation.toggleStart(); + tInit.toggleStart(); } - } - - /** - * Must describe one iteration (step). - * - * This function is called automatically. - */ - virtual void runOneStep(uint32_t currentStep) = 0; - /** - * Initialize simulation - * - * Does hardware selections/reservations, memory allocations and - * initializes data structures as empty. - */ - virtual void init() = 0; - - /** - * Fills simulation with initial data after init() - * - * @return returns the first step of the simulation - * (can be >0 for, e.g., restarts from checkpoints) - */ - virtual uint32_t fillSimulation() = 0; - - /** - * Reset the simulation to a state such as it was after - * init() but for a specific time step. - * Can be used to call fillSimulation() again. - */ - virtual void resetAll(uint32_t currentStep) = 0; - - /** - * Check if moving window work must do - * - * If no moving window is needed the implementation of this function can be empty - * - * @param currentStep simulation step - */ - virtual void movingWindowCheck(uint32_t currentStep) = 0; - - /** - * Notifies registered output classes. - * - * This function is called automatically. - * - * @param currentStep simulation step - */ - virtual void dumpOneStep(uint32_t currentStep) - { - /* trigger notification */ - Environment::get().PluginConnector().notifyPlugins(currentStep); - - /* trigger checkpoint notification */ - if( - !checkpointPeriod.empty() && - pluginSystem::containsStep( - seqCheckpointPeriod, - currentStep - ) - ) + virtual ~SimulationHelper() { - /* first synchronize: if something failed, we can spare the time - * for the checkpoint writing */ - CUDA_CHECK(cudaDeviceSynchronize()); - CUDA_CHECK(cudaGetLastError()); - - // avoid deadlock between not finished PMacc tasks and MPI_Barrier - __getTransactionEvent().waitForFinished(); - - GridController &gc = Environment::get().GridController(); - /* can be spared for better scalings, but allows to spare the - * time for checkpointing if some ranks died */ - MPI_CHECK(MPI_Barrier(gc.getCommunicator().getMPIComm())); - - /* create directory containing checkpoints */ - if (numCheckpoints == 0) + tSimulation.toggleEnd(); + if(output) { - Environment::get().Filesystem().createDirectoryWithPermissions(checkpointDirectory); + std::cout << "full simulation time: " << tSimulation.printInterval() << " = " + << (uint64_t)(tSimulation.getInterval() / 1000.) << " sec" << std::endl; } + } - Environment::get().PluginConnector().checkpointPlugins(currentStep, - checkpointDirectory); - - /* important synchronize: only if no errors occured until this - * point guarantees that a checkpoint is usable */ - CUDA_CHECK(cudaDeviceSynchronize()); - CUDA_CHECK(cudaGetLastError()); - - /* avoid deadlock between not finished PMacc tasks and MPI_Barrier */ - __getTransactionEvent().waitForFinished(); - - /* \todo in an ideal world with MPI-3, this would be an - * MPI_Ibarrier call and this function would return a MPI_Request - * that could be checked */ - MPI_CHECK(MPI_Barrier(gc.getCommunicator().getMPIComm())); + /** + * Must describe one iteration (step). + * + * This function is called automatically. + */ + virtual void runOneStep(uint32_t currentStep) = 0; + + /** + * Initialize simulation + * + * Does hardware selections/reservations, memory allocations and + * initializes data structures as empty. + */ + virtual void init() = 0; + + /** + * Fills simulation with initial data after init() + * + * @return returns the first step of the simulation + * (can be >0 for, e.g., restarts from checkpoints) + */ + virtual uint32_t fillSimulation() = 0; + + /** + * Reset the simulation to a state such as it was after + * init() but for a specific time step. + * Can be used to call fillSimulation() again. + */ + virtual void resetAll(uint32_t currentStep) = 0; + + /** + * Check if moving window work must do + * + * If no moving window is needed the implementation of this function can be empty + * + * @param currentStep simulation step + */ + virtual void movingWindowCheck(uint32_t currentStep) = 0; + + /** + * Notifies registered output classes. + * + * This function is called automatically. + * + * @param currentStep simulation step + */ + virtual void dumpOneStep(uint32_t currentStep) + { + /* trigger notification */ + Environment::get().PluginConnector().notifyPlugins(currentStep); - if (gc.getGlobalRank() == 0) + /* trigger checkpoint notification */ + if(!checkpointPeriod.empty() && pluginSystem::containsStep(seqCheckpointPeriod, currentStep)) { - writeCheckpointStep(currentStep); + /* first synchronize: if something failed, we can spare the time + * for the checkpoint writing */ + CUDA_CHECK(cuplaDeviceSynchronize()); + CUDA_CHECK(cuplaGetLastError()); + + // avoid deadlock between not finished PMacc tasks and MPI_Barrier + __getTransactionEvent().waitForFinished(); + + GridController& gc = Environment::get().GridController(); + /* can be spared for better scalings, but allows to spare the + * time for checkpointing if some ranks died */ + MPI_CHECK(MPI_Barrier(gc.getCommunicator().getMPIComm())); + + /* create directory containing checkpoints */ + if(numCheckpoints == 0) + { + Environment::get().Filesystem().createDirectoryWithPermissions(checkpointDirectory); + } + + Environment::get().PluginConnector().checkpointPlugins(currentStep, checkpointDirectory); + + /* important synchronize: only if no errors occured until this + * point guarantees that a checkpoint is usable */ + CUDA_CHECK(cuplaDeviceSynchronize()); + CUDA_CHECK(cuplaGetLastError()); + + /* avoid deadlock between not finished PMacc tasks and MPI_Barrier */ + __getTransactionEvent().waitForFinished(); + + /* \todo in an ideal world with MPI-3, this would be an + * MPI_Ibarrier call and this function would return a MPI_Request + * that could be checked */ + MPI_CHECK(MPI_Barrier(gc.getCommunicator().getMPIComm())); + + if(gc.getGlobalRank() == 0) + { + writeCheckpointStep(currentStep); + } + numCheckpoints++; } - numCheckpoints++; } - } - - GridController & getGridController() - { - return Environment::get().GridController(); - } - void dumpTimes(TimeIntervall &tSimCalculation, TimeIntervall&, double& roundAvg, uint32_t currentStep) - { - /*dump 100% after simulation*/ - if (output && progress && (currentStep % showProgressAnyStep) == 0) + GridController& getGridController() { - tSimCalculation.toggleEnd(); - std::cout << std::setw(3) << - uint16_t( - double( currentStep ) / - double( Environment<>::get().SimulationDescription().getRunSteps() ) * - 100. - ) << - " % = " << std::setw(8) << currentStep << - " | time elapsed:" << - std::setw(25) << tSimCalculation.printInterval() << " | avg time per step: " << - TimeIntervall::printeTime(roundAvg / (double) showProgressAnyStep) << std::endl; - std::cout.flush(); - - roundAvg = 0.0; //clear round avg timer + return Environment::get().GridController(); } - } - - /** - * Begin the simulation. - */ - void startSimulation() - { - init(); - - // translate checkpointPeriod string into checkpoint intervals - seqCheckpointPeriod = pluginSystem::toTimeSlice( checkpointPeriod ); - - for (uint32_t nthSoftRestart = 0; nthSoftRestart <= softRestarts; ++nthSoftRestart) + void dumpTimes(TimeIntervall& tSimCalculation, TimeIntervall&, double& roundAvg, uint32_t currentStep) { - resetAll(0); - uint32_t currentStep = fillSimulation(); - Environment<>::get().SimulationDescription().setCurrentStep( currentStep ); - - tInit.toggleEnd(); - if (output) + /*dump 100% after simulation*/ + if(output && progress && (currentStep % showProgressAnyStep) == 0) { - std::cout << "initialization time: " << tInit.printInterval() << - " = " << - (int) (tInit.getInterval() / 1000.) << " sec" << std::endl; + tSimCalculation.toggleEnd(); + std::cout << std::setw(3) + << uint16_t( + double(currentStep) + / double(Environment<>::get().SimulationDescription().getRunSteps()) * 100.) + << " % = " << std::setw(8) << currentStep << " | time elapsed:" << std::setw(25) + << tSimCalculation.printInterval() << " | avg time per step: " + << TimeIntervall::printeTime(roundAvg / (double) showProgressAnyStep) << std::endl; + std::cout.flush(); + + roundAvg = 0.0; // clear round avg timer } + } - TimeIntervall tSimCalculation; - TimeIntervall tRound; - double roundAvg = 0.0; - - /* Since in the main loop movingWindow is called always before the dump, we also call it here for consistency. - * This becomes only important, if movingWindowCheck does more than merely checking for a slide. - * TO DO in a new feature: Turn this into a general hook for pre-checks (window slides are just one possible action). - */ - movingWindowCheck(currentStep); - - /* dump initial step if simulation starts without restart */ - if (!restartRequested) - { - dumpOneStep(currentStep); - } + /** + * Begin the simulation. + */ + void startSimulation() + { + if(useMpiDirect) + Environment<>::get().enableMpiDirect(); - /* dump 0% output */ - dumpTimes(tSimCalculation, tRound, roundAvg, currentStep); + init(); + // translate checkpointPeriod string into checkpoint intervals + seqCheckpointPeriod = pluginSystem::toTimeSlice(checkpointPeriod); - /** \todo currently we assume this is the only point in the simulation - * that is allowed to manipulate `currentStep`. Else, one needs to - * add and act on changed values via - * `SimulationDescription().getCurrentStep()` in this loop - */ - while (currentStep < Environment<>::get().SimulationDescription().getRunSteps()) + for(uint32_t nthSoftRestart = 0; nthSoftRestart <= softRestarts; ++nthSoftRestart) { - tRound.toggleStart(); - runOneStep(currentStep); - tRound.toggleEnd(); - roundAvg += tRound.getInterval(); - - /* NEXT TIMESTEP STARTS HERE */ - currentStep++; - Environment<>::get().SimulationDescription().setCurrentStep( currentStep ); - /* output times after a round */ - dumpTimes(tSimCalculation, tRound, roundAvg, currentStep); - + resetAll(0); + uint32_t currentStep = fillSimulation(); + Environment<>::get().SimulationDescription().setCurrentStep(currentStep); + + tInit.toggleEnd(); + if(output) + { + std::cout << "initialization time: " << tInit.printInterval() << " = " + << (int) (tInit.getInterval() / 1000.) << " sec" << std::endl; + } + + TimeIntervall tSimCalculation; + TimeIntervall tRound; + double roundAvg = 0.0; + + /* Since in the main loop movingWindow is called always before the dump, we also call it here for + * consistency. This becomes only important, if movingWindowCheck does more than merely checking for a + * slide. TO DO in a new feature: Turn this into a general hook for pre-checks (window slides are just + * one possible action). + */ movingWindowCheck(currentStep); - /* dump at the beginning of the simulated step */ - dumpOneStep(currentStep); - } - // simulatation end - Environment<>::get().Manager().waitForAllTasks(); + /* dump initial step if simulation starts without restart */ + if(!restartRequested) + { + dumpOneStep(currentStep); + } - tSimCalculation.toggleEnd(); + /* dump 0% output */ + dumpTimes(tSimCalculation, tRound, roundAvg, currentStep); - if (output) - { - std::cout << "calculation simulation time: " << - tSimCalculation.printInterval() << " = " << - (int) (tSimCalculation.getInterval() / 1000.) << " sec" << std::endl; - } - } // softRestarts loop - } + /** \todo currently we assume this is the only point in the simulation + * that is allowed to manipulate `currentStep`. Else, one needs to + * add and act on changed values via + * `SimulationDescription().getCurrentStep()` in this loop + */ + while(currentStep < Environment<>::get().SimulationDescription().getRunSteps()) + { + tRound.toggleStart(); + runOneStep(currentStep); + tRound.toggleEnd(); + roundAvg += tRound.getInterval(); + + /* NEXT TIMESTEP STARTS HERE */ + currentStep++; + Environment<>::get().SimulationDescription().setCurrentStep(currentStep); + /* output times after a round */ + dumpTimes(tSimCalculation, tRound, roundAvg, currentStep); + + movingWindowCheck(currentStep); + /* dump at the beginning of the simulated step */ + dumpOneStep(currentStep); + } + + // simulatation end + Environment<>::get().Manager().waitForAllTasks(); + + tSimCalculation.toggleEnd(); + + if(output) + { + std::cout << "calculation simulation time: " << tSimCalculation.printInterval() << " = " + << (int) (tSimCalculation.getInterval() / 1000.) << " sec" << std::endl; + } + + } // softRestarts loop + } - virtual void pluginRegisterHelp(po::options_description& desc) - { - desc.add_options() - ("steps,s", po::value (&runSteps), "Simulation steps") - ("checkpoint.restart.loop", po::value (&softRestarts)->default_value(0), - "Number of times to restart the simulation after simulation has finished (for presentations). " - "Note: does not yet work with all plugins, see issue #1305") - ("percent,p", po::value (&progress)->default_value(5), - "Print time statistics after p percent to stdout") - ("checkpoint.restart", po::value(&restartRequested)->zero_tokens(), "Restart simulation") - ("checkpoint.restart.directory", po::value(&restartDirectory)->default_value(restartDirectory), - "Directory containing checkpoints for a restart") - ("checkpoint.restart.step", po::value(&restartStep), "Checkpoint step to restart from") - ("checkpoint.period", po::value(&checkpointPeriod), "Period for checkpoint creation") - ("checkpoint.directory", po::value(&checkpointDirectory)->default_value(checkpointDirectory), - "Directory for checkpoints") - ("author", po::value(&author)->default_value(std::string("")), - "The author that runs the simulation and is responsible for created output files"); - } - - std::string pluginGetName() const - { - return "SimulationHelper"; - } + virtual void pluginRegisterHelp(po::options_description& desc) + { + desc.add_options()("steps,s", po::value(&runSteps), "Simulation steps")( + "checkpoint.restart.loop", + po::value(&softRestarts)->default_value(0), + "Number of times to restart the simulation after simulation has finished (for presentations). " + "Note: does not yet work with all plugins, see issue #1305")( + "percent,p", + po::value(&progress)->default_value(5), + "Print time statistics after p percent to stdout")( + "checkpoint.restart", + po::value(&restartRequested)->zero_tokens(), + "Restart simulation")( + "checkpoint.restart.directory", + po::value(&restartDirectory)->default_value(restartDirectory), + "Directory containing checkpoints for a restart")( + "checkpoint.restart.step", + po::value(&restartStep), + "Checkpoint step to restart from")( + "checkpoint.period", + po::value(&checkpointPeriod), + "Period for checkpoint creation")( + "checkpoint.directory", + po::value(&checkpointDirectory)->default_value(checkpointDirectory), + "Directory for checkpoints")( + "author", + po::value(&author)->default_value(std::string("")), + "The author that runs the simulation and is responsible for created output files")( + "mpiDirect", + po::value(&useMpiDirect)->zero_tokens(), + "use device direct for MPI communication e.g. GPU direct"); + } - void pluginLoad() - { - Environment<>::get().SimulationDescription().setRunSteps(runSteps); - Environment<>::get().SimulationDescription().setAuthor(author); + std::string pluginGetName() const + { + return "SimulationHelper"; + } - calcProgress(); + void pluginLoad() + { + Environment<>::get().SimulationDescription().setRunSteps(runSteps); + Environment<>::get().SimulationDescription().setAuthor(author); - output = (getGridController().getGlobalRank() == 0); - } + calcProgress(); - void pluginUnload() - { - } + output = (getGridController().getGlobalRank() == 0); + } - void restart(uint32_t, const std::string) - { - } + void pluginUnload() + { + } - void checkpoint(uint32_t, const std::string) - { - } + void restart(uint32_t, const std::string) + { + } -protected: - /* number of simulation steps to compute */ - uint32_t runSteps; + void checkpoint(uint32_t, const std::string) + { + } - /** Presentations: loop the whole simulation `softRestarts` times from - * initial step to runSteps */ - uint32_t softRestarts; + protected: + /* number of simulation steps to compute */ + uint32_t runSteps; - /* period for checkpoint creation */ - std::string checkpointPeriod; + /** Presentations: loop the whole simulation `softRestarts` times from + * initial step to runSteps */ + uint32_t softRestarts; - /* checkpoint intervals */ - SeqOfTimeSlices seqCheckpointPeriod; + /* period for checkpoint creation */ + std::string checkpointPeriod; - /* common directory for checkpoints */ - std::string checkpointDirectory; + /* checkpoint intervals */ + SeqOfTimeSlices seqCheckpointPeriod; - /* number of checkpoints written */ - uint32_t numCheckpoints; + /* common directory for checkpoints */ + std::string checkpointDirectory; - /* checkpoint step to restart from */ - int32_t restartStep; + /* number of checkpoints written */ + uint32_t numCheckpoints; - /* common directory for restarts */ - std::string restartDirectory; + /* checkpoint step to restart from */ + int32_t restartStep; - /* restart requested */ - bool restartRequested; + /* common directory for restarts */ + std::string restartDirectory; - /* filename for checkpoint master file with all checkpoint timesteps */ - const std::string CHECKPOINT_MASTER_FILE; + /* restart requested */ + bool restartRequested; - /* author that runs the simulation */ - std::string author; + /* filename for checkpoint master file with all checkpoint timesteps */ + const std::string CHECKPOINT_MASTER_FILE; -private: + /* author that runs the simulation */ + std::string author; - /** - * Set how often the elapsed time is printed. - * - * @param percent percentage difference for printing - */ - void calcProgress() - { - if (progress == 0 || progress > 100) - progress = 100; + //! enable MPI gpu direct + bool useMpiDirect; - showProgressAnyStep = uint32_t( - double( Environment<>::get().SimulationDescription().getRunSteps() ) / - 100. * double( progress ) - ); - if (showProgressAnyStep == 0) - showProgressAnyStep = 1; - } + private: + /** + * Set how often the elapsed time is printed. + * + * @param percent percentage difference for printing + */ + void calcProgress() + { + if(progress == 0 || progress > 100) + progress = 100; - /** - * Append \p checkpointStep to the master checkpoint file - * - * @param checkpointStep current checkpoint step - */ - void writeCheckpointStep(const uint32_t checkpointStep) - { - std::ofstream file; - const std::string checkpointMasterFile = - checkpointDirectory + std::string("/") + CHECKPOINT_MASTER_FILE; + showProgressAnyStep = uint32_t( + double(Environment<>::get().SimulationDescription().getRunSteps()) / 100. * double(progress)); + if(showProgressAnyStep == 0) + showProgressAnyStep = 1; + } - file.open(checkpointMasterFile.c_str(), std::ofstream::app); + /** + * Append \p checkpointStep to the master checkpoint file + * + * @param checkpointStep current checkpoint step + */ + void writeCheckpointStep(const uint32_t checkpointStep) + { + std::ofstream file; + const std::string checkpointMasterFile = checkpointDirectory + std::string("/") + CHECKPOINT_MASTER_FILE; - if (!file) - throw std::runtime_error("Failed to write checkpoint master file"); + file.open(checkpointMasterFile.c_str(), std::ofstream::app); - file << checkpointStep << std::endl; - file.close(); - } + if(!file) + throw std::runtime_error("Failed to write checkpoint master file"); -protected: - /** - * Reads the checkpoint master file if any and returns all found checkpoint steps - * - * @return vector of found checkpoints steps in order they appear in the file - */ - std::vector readCheckpointMasterFile() - { - std::vector checkpoints; + file << checkpointStep << std::endl; + file.close(); + } - const std::string checkpointMasterFile = - this->restartDirectory + std::string("/") + this->CHECKPOINT_MASTER_FILE; + protected: + /** + * Reads the checkpoint master file if any and returns all found checkpoint steps + * + * @return vector of found checkpoints steps in order they appear in the file + */ + std::vector readCheckpointMasterFile() + { + std::vector checkpoints; - if (!boost::filesystem::exists(checkpointMasterFile)) - return checkpoints; + const std::string checkpointMasterFile + = this->restartDirectory + std::string("/") + this->CHECKPOINT_MASTER_FILE; - std::ifstream file(checkpointMasterFile.c_str()); + if(!boost::filesystem::exists(checkpointMasterFile)) + return checkpoints; - /* read each line */ - std::string line; - while (std::getline(file, line)) - { - if (line.empty()) - continue; - try - { - checkpoints.push_back(boost::lexical_cast(line)); - } - catch (boost::bad_lexical_cast const&) + std::ifstream file(checkpointMasterFile.c_str()); + + /* read each line */ + std::string line; + while(std::getline(file, line)) { - std::cerr << "Warning: checkpoint master file contains invalid data (" - << line << ")" << std::endl; + if(line.empty()) + continue; + try + { + checkpoints.push_back(boost::lexical_cast(line)); + } + catch(boost::bad_lexical_cast const&) + { + std::cerr << "Warning: checkpoint master file contains invalid data (" << line << ")" << std::endl; + } } - } - return checkpoints; - } -private: - - bool output; + return checkpoints; + } - uint16_t progress; - uint32_t showProgressAnyStep; + private: + bool output = false; - TimeIntervall tSimulation; - TimeIntervall tInit; + uint16_t progress; + uint32_t showProgressAnyStep; -}; + TimeIntervall tSimulation; + TimeIntervall tInit; + }; } // namespace pmacc diff --git a/include/pmacc/simulationControl/TimeInterval.hpp b/include/pmacc/simulationControl/TimeInterval.hpp index 3df27d47bc..4be6c0c853 100644 --- a/include/pmacc/simulationControl/TimeInterval.hpp +++ b/include/pmacc/simulationControl/TimeInterval.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PMacc. * @@ -31,14 +31,12 @@ namespace pmacc { - class TimeIntervall { public: - TimeIntervall() { - start=end=getTime(); + start = end = getTime(); } /*! Get the timestamp in msec @@ -46,24 +44,24 @@ namespace pmacc */ static double getTime() { - auto time( Clock::now().time_since_epoch() ); - auto timestamp = std::chrono::duration_cast< Milliseconds >( time ).count(); - return static_cast< double >(timestamp); + auto time(Clock::now().time_since_epoch()); + auto timestamp = std::chrono::duration_cast(time).count(); + return static_cast(timestamp); } double toggleStart() { - return start=getTime(); + return start = getTime(); } double toggleEnd() { - return end=getTime(); + return end = getTime(); } double getInterval() { - return end-start; + return end - start; } std::string printInterval() @@ -78,53 +76,52 @@ namespace pmacc int p_time; - bool write_all=false; - if(time/(3600.*1000.)>1.) + bool write_all = false; + if(time / (3600. * 1000.) > 1.) { - p_time=time/(3600.*1000.); - time=time-3600.*1000.*p_time; - outstr<1.) + if(write_all || time / (60 * 1000) > 1.) { - p_time=time/(60.*1000.); - time=time-60.*1000.*p_time; - outstr<1.) + if(write_all || time / 1000. > 1.) { - p_time=time/1000.; - time=time-1000.*p_time; - outstr<1.) + if(write_all || time > 1.) { - outstr< - using TimePoint = std::chrono::time_point< Clock, Duration >; + template + using TimePoint = std::chrono::time_point; using Milliseconds = std::chrono::milliseconds; double start; double end; }; -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/static_assert.hpp b/include/pmacc/static_assert.hpp index 7f279572e0..41a95d0b5e 100644 --- a/include/pmacc/static_assert.hpp +++ b/include/pmacc/static_assert.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Felix Schmitt, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Felix Schmitt, Rene Widera * * This file is part of PMacc. * @@ -32,12 +32,12 @@ namespace pmacc { }; - template + template struct GetStaticAssertInfoType { typedef T_Type type; }; -} +} // namespace pmacc /** call BOOST_MPL_ASSERT_MSG and add unique id to message * @param pmacc_cond an integral constant expression @@ -45,46 +45,54 @@ namespace pmacc * @param pmacc_unique_id pre compiler unique id * @param pmacc_typeInfo a type that is shown in error message */ -#if BOOST_LANG_CUDA && BOOST_COMP_CLANG_CUDA +#if BOOST_LANG_CUDA && BOOST_COMP_CLANG_CUDA || BOOST_COMP_HIP /* device compile with clang: boost static assert can not be used * error is: calling a `__host__` function from `__device__` * Therefore C++11 `static_assert` is used */ -# define PMACC_STATIC_ASSERT_MSG_DO2(pmacc_cond, pmacc_msg, pmacc_unique_id, pmacc_typeInfo) \ - static_assert(pmacc_cond,#pmacc_msg) +# define PMACC_STATIC_ASSERT_MSG_DO2(pmacc_cond, pmacc_msg, pmacc_unique_id, pmacc_typeInfo) \ + static_assert(pmacc_cond, #pmacc_msg) #else -# define PMACC_STATIC_ASSERT_MSG_DO2(pmacc_cond, pmacc_msg, pmacc_unique_id, pmacc_typeInfo) \ - BOOST_MPL_ASSERT_MSG(pmacc_cond,PMACC_JOIN(pmacc_msg,PMACC_JOIN(_________,pmacc_unique_id)),(pmacc_typeInfo)) +# define PMACC_STATIC_ASSERT_MSG_DO2(pmacc_cond, pmacc_msg, pmacc_unique_id, pmacc_typeInfo) \ + BOOST_MPL_ASSERT_MSG( \ + pmacc_cond, \ + PMACC_JOIN(pmacc_msg, PMACC_JOIN(_________, pmacc_unique_id)), \ + (pmacc_typeInfo)) #endif /*! static assert with error message * @param pmacc_cond A condition which return true or false. - * @param pmacc_msg A message which is shown if the condition is false. Msg must a valid c++ variable name (etc. _only_human_make_mistakes) + * @param pmacc_msg A message which is shown if the condition is false. Msg must a valid c++ variable name (etc. + * _only_human_make_mistakes) * @param ... (optional) a type that is shown in error message */ -#define PMACC_STATIC_ASSERT_MSG(pmacc_cond,pmacc_msg,...) \ - PMACC_STATIC_ASSERT_MSG_DO2(pmacc_cond,pmacc_msg,__COUNTER__,typename pmacc::GetStaticAssertInfoType<__VA_ARGS__>::type) +#define PMACC_STATIC_ASSERT_MSG(pmacc_cond, pmacc_msg, ...) \ + PMACC_STATIC_ASSERT_MSG_DO2( \ + pmacc_cond, \ + pmacc_msg, \ + __COUNTER__, \ + typename pmacc::GetStaticAssertInfoType<__VA_ARGS__>::type) /*! static assert * @param pmacc_cond A condition which return true or false. */ -#define PMACC_STATIC_ASSERT(pmacc_cond) \ - PMACC_STATIC_ASSERT_MSG(pmacc_cond,STATIC_ASSERTION_FAILURE) +#define PMACC_STATIC_ASSERT(pmacc_cond) PMACC_STATIC_ASSERT_MSG(pmacc_cond, STATIC_ASSERTION_FAILURE, ) /*! static assert wrapper which is easier to use than \see PMACC_STATIC_ASSERT_MSG - * @param pmacc_msg A message which is shown if the condition is false. Msg must a valid c++ variable name (etc. _only_human_make_mistakes) + * @param pmacc_msg A message which is shown if the condition is false. Msg must a valid c++ variable name (etc. + * _only_human_make_mistakes) * @param pmacc_typeInfo a type that is shown in error message * @param ... A condition which return true or false. */ -#define PMACC_CASSERT_MSG_TYPE(pmacc_msg,pmacc_typeInfo,...) \ - PMACC_STATIC_ASSERT_MSG((__VA_ARGS__),pmacc_msg,pmacc_typeInfo) +#define PMACC_CASSERT_MSG_TYPE(pmacc_msg, pmacc_typeInfo, ...) \ + PMACC_STATIC_ASSERT_MSG((__VA_ARGS__), pmacc_msg, pmacc_typeInfo) /*! static assert wrapper which is easier to use than \see PMACC_STATIC_ASSERT_MSG - * @param pmacc_msg A message which is shown if the condition is false. Msg must a valid c++ variable name (etc. _only_human_make_mistakes) + * @param pmacc_msg A message which is shown if the condition is false. Msg must a valid c++ variable name (etc. + * _only_human_make_mistakes) * @param ... A condition which return true or false. */ -#define PMACC_CASSERT_MSG(pmacc_msg,...) \ - PMACC_STATIC_ASSERT_MSG((__VA_ARGS__),pmacc_msg) +#define PMACC_CASSERT_MSG(pmacc_msg, ...) PMACC_STATIC_ASSERT_MSG((__VA_ARGS__), pmacc_msg, ) /*! static assert * @param ... A condition which return true or false. @@ -98,17 +106,18 @@ namespace pmacc * @param nmspace The name of the namespace * @param var The variable to look for. */ -#define PMACC_DEF_IN_NAMESPACE_MSG(pmacc_msg,nmspace,var) \ - namespace pmacc_msg { \ - using nmspace::var; \ - namespace fallback \ - { \ - struct var \ - { \ - double d[9999]; \ - char c; \ - }; \ - } \ - using fallback::var; \ - } \ - PMACC_CASSERT_MSG( pmacc_msg, ((sizeof(pmacc_msg::var))!=(sizeof(pmacc_msg::fallback::var))) ); +#define PMACC_DEF_IN_NAMESPACE_MSG(pmacc_msg, nmspace, var) \ + namespace pmacc_msg \ + { \ + using nmspace::var; \ + namespace fallback \ + { \ + struct var \ + { \ + double d[9999]; \ + char c; \ + }; \ + } \ + using fallback::var; \ + } \ + PMACC_CASSERT_MSG(pmacc_msg, ((sizeof(pmacc_msg::var)) != (sizeof(pmacc_msg::fallback::var)))); diff --git a/include/pmacc/test/PMaccFixture.hpp b/include/pmacc/test/PMaccFixture.hpp index 3bbae4c491..a02037ce6b 100644 --- a/include/pmacc/test/PMaccFixture.hpp +++ b/include/pmacc/test/PMaccFixture.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Grund +/* Copyright 2016-2021 Alexander Grund * * This file is part of PMacc. * @@ -28,29 +28,28 @@ namespace pmacc { -namespace test -{ - -/** Fixture that initializes PMacc for a given dimensionality */ -template -struct PMaccFixture -{ - PMaccFixture() + namespace test { - const pmacc::DataSpace devices = pmacc::DataSpace::create(1); - const pmacc::DataSpace periodic = pmacc::DataSpace::create(1); - pmacc::Environment::get().initDevices(devices, periodic); - } - - ~PMaccFixture() - { - /* finalize the PMacc context */ - pmacc::Environment<>::get().finalize(); - } -}; - -using PMaccFixture2D = PMaccFixture< 2 >; -using PMaccFixture3D = PMaccFixture< 3 >; - -} // namespace test + /** Fixture that initializes PMacc for a given dimensionality */ + template + struct PMaccFixture + { + PMaccFixture() + { + const pmacc::DataSpace devices = pmacc::DataSpace::create(1); + const pmacc::DataSpace periodic = pmacc::DataSpace::create(1); + pmacc::Environment::get().initDevices(devices, periodic); + } + + ~PMaccFixture() + { + /* finalize the PMacc context */ + pmacc::Environment<>::get().finalize(); + } + }; + + using PMaccFixture2D = PMaccFixture<2>; + using PMaccFixture3D = PMaccFixture<3>; + + } // namespace test } // namespace pmacc diff --git a/include/pmacc/test/TemplateUT.cpp b/include/pmacc/test/TemplateUT.cpp deleted file mode 100644 index 4156ce4131..0000000000 --- a/include/pmacc/test/TemplateUT.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/* Copyright 2015-2020 Erik Zenker - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -// STL -#include /* uint8_t */ - -// BOOST -#include - -// Boost.Test documentation: http://www.boost.org/doc/libs/1_59_0/libs/test/doc/html/index.html - -/******************************************************************************* - * Configuration - ******************************************************************************/ - -// Nothing to configure, but here could be -// placed global variables, typedefs, classes. - -/******************************************************************************* - * Test Suite - ******************************************************************************/ -BOOST_AUTO_TEST_SUITE( template_unit_test ) - - -/*************************************************************************** - * Test Cases - ****************************************************************************/ - -// Normal test case -BOOST_AUTO_TEST_CASE( first ){ - BOOST_CHECK_EQUAL( sizeof(uint8_t), 1u ); - -} - - -BOOST_AUTO_TEST_SUITE_END() diff --git a/include/pmacc/test/main.cpp b/include/pmacc/test/main.cpp deleted file mode 100644 index bb1a505249..0000000000 --- a/include/pmacc/test/main.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright 2015-2020 Erik Zenker, Alexander Grund - * - * This file is part of PMacc. - * - * PMacc is free software: you can redistribute it and/or modify - * it under the terms of either the GNU General Public License or - * the GNU Lesser General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PMacc is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License and the GNU Lesser General Public License - * for more details. - * - * You should have received a copy of the GNU General Public License - * and the GNU Lesser General Public License along with PMacc. - * If not, see . - */ - -#define BOOST_TEST_MODULE "PMacc Unit Tests" -#define BOOST_TEST_NO_MAIN -#include - - -int main(int argc, char* argv[], char* envp[]) -{ - int result = boost::unit_test::unit_test_main(&init_unit_test, argc, argv); - - return result; -} diff --git a/include/pmacc/test/memory/HostBufferIntern/copyFrom.hpp b/include/pmacc/test/memory/HostBufferIntern/copyFrom.hpp index cf1f932c53..97ffdfc3b7 100644 --- a/include/pmacc/test/memory/HostBufferIntern/copyFrom.hpp +++ b/include/pmacc/test/memory/HostBufferIntern/copyFrom.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Erik Zenker +/* Copyright 2015-2021 Erik Zenker * * This file is part of PMacc. * @@ -26,70 +26,73 @@ namespace pmacc { -namespace test -{ -namespace memory -{ -namespace HostBufferIntern -{ - -/** - * Checks if data is copied correctly from device to - * host. - */ -struct CopyFromTest { - - template - void exec(T_Dim) + namespace test { - using Data = uint8_t ; - using Extents = size_t; - - using ::pmacc::test::memory::getElementsPerDim; - - std::vector nElementsPerDim = getElementsPerDim(); - - for(unsigned i = 0; i < nElementsPerDim.size(); ++i){ - ::pmacc::DataSpace const dataSpace = ::pmacc::DataSpace::create(nElementsPerDim[i]); - ::pmacc::HostBuffer* hostBufferIntern = new ::pmacc::HostBufferIntern(dataSpace); - ::pmacc::DeviceBuffer* deviceBufferIntern = new ::pmacc::DeviceBufferIntern(dataSpace); - - hostBufferIntern->reset(); - - for(size_t i = 0; i < static_cast(dataSpace.productOfComponents()); ++i){ - hostBufferIntern->getPointer()[i] = static_cast(i); - } - - deviceBufferIntern->copyFrom(*hostBufferIntern); - hostBufferIntern->reset(); - hostBufferIntern->copyFrom(*deviceBufferIntern); - - for(size_t i = 0; i < static_cast(dataSpace.productOfComponents()); ++i){ - BOOST_CHECK_EQUAL(hostBufferIntern->getPointer()[i], static_cast(i)); - } - - delete hostBufferIntern; - delete deviceBufferIntern; - - } - - } - - PMACC_NO_NVCC_HDWARNING - template - HDINLINE void operator()(T_Dim dim) - { - exec(dim); - } -}; - -} // namespace HostBufferIntern -} // namespace memory -} // namespace test + namespace memory + { + namespace HostBufferIntern + { + /** + * Checks if data is copied correctly from device to + * host. + */ + struct CopyFromTest + { + template + void exec(T_Dim) + { + using Data = uint8_t; + using Extents = size_t; + + using ::pmacc::test::memory::getElementsPerDim; + + std::vector nElementsPerDim = getElementsPerDim(); + + for(unsigned i = 0; i < nElementsPerDim.size(); ++i) + { + ::pmacc::DataSpace const dataSpace + = ::pmacc::DataSpace::create(nElementsPerDim[i]); + ::pmacc::HostBuffer* hostBufferIntern + = new ::pmacc::HostBufferIntern(dataSpace); + ::pmacc::DeviceBuffer* deviceBufferIntern + = new ::pmacc::DeviceBufferIntern(dataSpace); + + hostBufferIntern->reset(); + + for(size_t i = 0; i < static_cast(dataSpace.productOfComponents()); ++i) + { + hostBufferIntern->getPointer()[i] = static_cast(i); + } + + deviceBufferIntern->copyFrom(*hostBufferIntern); + hostBufferIntern->reset(); + hostBufferIntern->copyFrom(*deviceBufferIntern); + + for(size_t i = 0; i < static_cast(dataSpace.productOfComponents()); ++i) + { + REQUIRE(hostBufferIntern->getPointer()[i] == static_cast(i)); + } + + delete hostBufferIntern; + delete deviceBufferIntern; + } + } + + PMACC_NO_NVCC_HDWARNING + template + HDINLINE void operator()(T_Dim dim) + { + exec(dim); + } + }; + + } // namespace HostBufferIntern + } // namespace memory + } // namespace test } // namespace pmacc -BOOST_AUTO_TEST_CASE( copyFrom ) +TEST_CASE("HostBufferIntern::copyFrom", "[copyFrom]") { using namespace pmacc::test::memory::HostBufferIntern; - ::boost::mpl::for_each< Dims >( CopyFromTest() ); + ::boost::mpl::for_each(CopyFromTest()); } diff --git a/include/pmacc/test/memory/HostBufferIntern/reset.hpp b/include/pmacc/test/memory/HostBufferIntern/reset.hpp index 49f9f55715..d4cb235a3d 100644 --- a/include/pmacc/test/memory/HostBufferIntern/reset.hpp +++ b/include/pmacc/test/memory/HostBufferIntern/reset.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Erik Zenker +/* Copyright 2015-2021 Erik Zenker * * This file is part of PMacc. * @@ -26,58 +26,57 @@ namespace pmacc { -namespace test -{ -namespace memory -{ -namespace HostBufferIntern -{ - -/** - * Checks if the HostBufferIntern is reseted correctly to zero. - */ -struct ResetTest { - - template - void exec(T_Dim) + namespace test { - using Data = uint8_t ; - using Extents = size_t; - - using ::pmacc::test::memory::getElementsPerDim; - - std::vector nElementsPerDim = getElementsPerDim(); - - for(unsigned i = 0; i < nElementsPerDim.size(); ++i) + namespace memory { - ::pmacc::DataSpace const dataSpace = ::pmacc::DataSpace::create(nElementsPerDim[i]); - ::pmacc::HostBufferIntern hostBufferIntern(dataSpace); - - hostBufferIntern.reset(); - - for(size_t i = 0; i < static_cast(dataSpace.productOfComponents()); ++i){ - BOOST_CHECK_EQUAL( hostBufferIntern.getPointer()[i], 0 ); - } - - } - - } - - PMACC_NO_NVCC_HDWARNING - template - HDINLINE void operator()(T_Dim dim) - { - exec(dim); - } -}; - -} // namespace HostBufferIntern -} // namespace memory -} // namespace test + namespace HostBufferIntern + { + /** + * Checks if the HostBufferIntern is reseted correctly to zero. + */ + struct ResetTest + { + template + void exec(T_Dim) + { + using Data = uint8_t; + using Extents = size_t; + + using ::pmacc::test::memory::getElementsPerDim; + + std::vector nElementsPerDim = getElementsPerDim(); + + for(unsigned i = 0; i < nElementsPerDim.size(); ++i) + { + ::pmacc::DataSpace const dataSpace + = ::pmacc::DataSpace::create(nElementsPerDim[i]); + ::pmacc::HostBufferIntern hostBufferIntern(dataSpace); + + hostBufferIntern.reset(); + + for(size_t i = 0; i < static_cast(dataSpace.productOfComponents()); ++i) + { + REQUIRE(hostBufferIntern.getPointer()[i] == 0); + } + } + } + + PMACC_NO_NVCC_HDWARNING + template + HDINLINE void operator()(T_Dim dim) + { + exec(dim); + } + }; + + } // namespace HostBufferIntern + } // namespace memory + } // namespace test } // namespace pmacc -BOOST_AUTO_TEST_CASE( reset ) +TEST_CASE("HostBufferIntern::reset", "[reset]") { using namespace pmacc::test::memory::HostBufferIntern; - ::boost::mpl::for_each< Dims >( ResetTest() ); + ::boost::mpl::for_each(ResetTest()); } diff --git a/include/pmacc/test/memory/HostBufferIntern/setValue.hpp b/include/pmacc/test/memory/HostBufferIntern/setValue.hpp index c2cd58537e..cd72c5cf67 100644 --- a/include/pmacc/test/memory/HostBufferIntern/setValue.hpp +++ b/include/pmacc/test/memory/HostBufferIntern/setValue.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Erik Zenker +/* Copyright 2015-2021 Erik Zenker * * This file is part of PMacc. * @@ -26,62 +26,59 @@ namespace pmacc { -namespace test -{ -namespace memory -{ -namespace HostBufferIntern -{ - -/** - * Checks if the HostBufferIntern is set to a constant value. - */ -struct setValueTest -{ - - template - void exec(T_Dim) + namespace test { - using Data = uint8_t ; - using Extents = size_t; - - using ::pmacc::test::memory::getElementsPerDim; - - std::vector nElementsPerDim = getElementsPerDim(); - - for(size_t i = 0; i < nElementsPerDim.size(); ++i) + namespace memory { - ::pmacc::DataSpace const dataSpace = ::pmacc::DataSpace::create(nElementsPerDim[i]); - ::pmacc::HostBufferIntern hostBufferIntern(dataSpace); - - const Data value = 255; - hostBufferIntern.setValue(value); - - auto ptr = hostBufferIntern.getPointer( ); - for(size_t j = 0; j < static_cast(dataSpace.productOfComponents()); ++j) + namespace HostBufferIntern { - BOOST_CHECK_EQUAL( ptr[j], value ); - } - - } - - } - - PMACC_NO_NVCC_HDWARNING - template - HDINLINE void operator()(T_Dim dim) - { - exec(dim); - } -}; - -} // namespace HostBufferIntern -} // namespace memory -} // namespace test + /** + * Checks if the HostBufferIntern is set to a constant value. + */ + struct setValueTest + { + template + void exec(T_Dim) + { + using Data = uint8_t; + using Extents = size_t; + + using ::pmacc::test::memory::getElementsPerDim; + + std::vector nElementsPerDim = getElementsPerDim(); + + for(size_t i = 0; i < nElementsPerDim.size(); ++i) + { + ::pmacc::DataSpace const dataSpace + = ::pmacc::DataSpace::create(nElementsPerDim[i]); + ::pmacc::HostBufferIntern hostBufferIntern(dataSpace); + + const Data value = 255; + hostBufferIntern.setValue(value); + + auto ptr = hostBufferIntern.getPointer(); + for(size_t j = 0; j < static_cast(dataSpace.productOfComponents()); ++j) + { + REQUIRE(ptr[j] == value); + } + } + } + + PMACC_NO_NVCC_HDWARNING + template + HDINLINE void operator()(T_Dim dim) + { + exec(dim); + } + }; + + } // namespace HostBufferIntern + } // namespace memory + } // namespace test } // namespace pmacc -BOOST_AUTO_TEST_CASE( setValue ) +TEST_CASE("HostBufferIntern::setValue", "[setValue]") { using namespace pmacc::test::memory::HostBufferIntern; - ::boost::mpl::for_each< Dims >( setValueTest() ); + ::boost::mpl::for_each(setValueTest()); } diff --git a/include/pmacc/test/memory/memoryUT.cpp b/include/pmacc/test/memory/memoryUT.cpp index 731ac104e4..a4a814e8c1 100644 --- a/include/pmacc/test/memory/memoryUT.cpp +++ b/include/pmacc/test/memory/memoryUT.cpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Erik Zenker, Alexander Grund +/* Copyright 2015-2021 Erik Zenker, Alexander Grund * * This file is part of PMacc. * @@ -19,15 +19,17 @@ * If not, see . */ -#include "pmacc/test/PMaccFixture.hpp" +#include +#include // STL #include /* uint8_t */ #include /* cout, endl */ #include +#include + // BOOST -#include #include #include #include @@ -48,41 +50,42 @@ namespace pmacc { -namespace test -{ -namespace memory -{ - -/******************************************************************************* - * Configuration - ******************************************************************************/ - -/** - * Defines for which numbers of elements a - * test should be verfied e.g. the size - * of a host or device buffer. - */ -template -std::vector getElementsPerDim(){ - std::vector nElements; - std::vector nElementsPerDim; - - // Elements total - nElements.push_back(1); - nElements.push_back(1 * 1000); - nElements.push_back(1 * 1000 * 1000); - nElements.push_back(1 * 1000 * 1000 * 10); - - // Elements per dimension - for(size_t i = 0; i < nElements.size(); ++i){ - nElementsPerDim.push_back(std::pow(nElements[i], static_cast(1)/static_cast(T_Dim::value))); - - } - return nElementsPerDim; -} - -} // namespace memory -} // namespace test + namespace test + { + namespace memory + { + /******************************************************************************* + * Configuration + ******************************************************************************/ + + /** + * Defines for which numbers of elements a + * test should be verfied e.g. the size + * of a host or device buffer. + */ + template + std::vector getElementsPerDim() + { + std::vector nElements; + std::vector nElementsPerDim; + + // Elements total + nElements.push_back(1); + nElements.push_back(1 * 1000); + nElements.push_back(1 * 1000 * 1000); + nElements.push_back(1 * 1000 * 1000 * 10); + + // Elements per dimension + for(size_t i = 0; i < nElements.size(); ++i) + { + nElementsPerDim.push_back( + std::pow(nElements[i], static_cast(1) / static_cast(T_Dim::value))); + } + return nElementsPerDim; + } + + } // namespace memory + } // namespace test } // namespace pmacc /** @@ -91,23 +94,15 @@ std::vector getElementsPerDim(){ * each dimension setup automatically. For this * purpose boost::mpl::for_each is used. */ -using Dims = ::boost::mpl::list< boost::mpl::int_< DIM1 >, - boost::mpl::int_< DIM2 >, - boost::mpl::int_< DIM3 > >; +using Dims = ::boost::mpl::list, boost::mpl::int_, boost::mpl::int_>; /******************************************************************************* * Test Suites ******************************************************************************/ -using MyPMaccFixture = pmacc::test::PMaccFixture< TEST_DIM >; - -BOOST_GLOBAL_FIXTURE( MyPMaccFixture ); - -BOOST_AUTO_TEST_SUITE( memory ) +using MyPMaccFixture = pmacc::test::PMaccFixture; - BOOST_AUTO_TEST_SUITE( HostBufferIntern ) -# include "HostBufferIntern/copyFrom.hpp" -# include "HostBufferIntern/reset.hpp" -# include "HostBufferIntern/setValue.hpp" - BOOST_AUTO_TEST_SUITE_END() +static MyPMaccFixture fixture; -BOOST_AUTO_TEST_SUITE_END() +#include "HostBufferIntern/copyFrom.hpp" +#include "HostBufferIntern/reset.hpp" +#include "HostBufferIntern/setValue.hpp" diff --git a/include/pmacc/test/particles/IdProvider.hpp b/include/pmacc/test/particles/IdProvider.hpp index d41ad891f5..d42aab1971 100644 --- a/include/pmacc/test/particles/IdProvider.hpp +++ b/include/pmacc/test/particles/IdProvider.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Grund +/* Copyright 2016-2021 Alexander Grund * * This file is part of PMacc. * @@ -32,163 +32,136 @@ #include #include #include -#include + +#include + #include #include #include -BOOST_AUTO_TEST_SUITE( particles ) - namespace pmacc { -namespace test -{ -namespace particles -{ - namespace bmpl = boost::mpl; - - template< - uint32_t T_numWorkers, - uint32_t T_numIdsPerBlock, - typename T_IdProvider - > - struct GenerateIds + namespace test { - template - HDINLINE void operator()(const T_Acc & acc, T_Box outputbox, uint32_t numThreads, uint32_t numIdsPerThread) const + namespace particles { - using namespace ::pmacc; - using namespace mappings::threads; - - constexpr uint32_t numWorkers = T_numWorkers; - - uint32_t const workerIdx = threadIdx.x; - - uint32_t const blockId = blockIdx.x * T_numIdsPerBlock; - ForEachIdx< - IdxConfig< - T_numIdsPerBlock, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearId, - uint32_t const - ) + namespace bmpl = boost::mpl; + + template + struct GenerateIds + { + template + HDINLINE void operator()( + const T_Acc& acc, + T_Box outputbox, + uint32_t numThreads, + uint32_t numIdsPerThread) const { - uint32_t const localId = blockId + linearId; - if( localId < numThreads ) - { - for( uint32_t i = 0u; i < numIdsPerThread; i++ ) - outputbox( i * numThreads + localId ) = T_IdProvider::getNewId( ); - } + using namespace ::pmacc; + using namespace mappings::threads; + + constexpr uint32_t numWorkers = T_numWorkers; + + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + uint32_t const blockId = cupla::blockIdx(acc).x * T_numIdsPerBlock; + ForEachIdx>{workerIdx}( + [&](uint32_t const linearId, uint32_t const) { + uint32_t const localId = blockId + linearId; + if(localId < numThreads) + { + for(uint32_t i = 0u; i < numIdsPerThread; i++) + outputbox(i * numThreads + localId) = T_IdProvider::getNewId(); + } + }); + } + }; + + /** function checks if a value is in a collection + * + * Use like: REQUIRE(checkDuplicate(col, value, true|false)); + * @param col Container to be searched + * @param value Value to search for + * @param shouldFind Whether the value is expected in the collection or not + * @return Error-Value, if the value is not found and shouldFind is true or + * the value is found and shouldFind is false, otherwise a True-Value + */ + template + bool checkDuplicate(const T_Collection& col, const T& value, bool shouldFind) + { + if((std::find(col.begin(), col.end(), value) != col.end()) != shouldFind) + { + bool res(false); + if(shouldFind) + std::cout << "Value not found found: "; + else + std::cout << "Duplicate found: "; + std::cout << value << ". Values=["; + for(typename T_Collection::const_iterator it = col.begin(); it != col.end(); ++it) + std::cout << *it << ","; + std::cout << "]"; + return res; } - ); - - - } - }; -/** - * Boost.Test compatible function that checks if a value is in a collection - * Use like: BOOST_REQUIRE(checkDuplicate(col, value, true|false)); - * @param col Container to be searched - * @param value Value to search for - * @param shouldFind Whether the value is expected in the collection or not - * @return Error-Value, if the value is not found and shouldFind is true or - * the value is found and shouldFind is false, otherwise a True-Value - */ -template -boost::test_tools::predicate_result -checkDuplicate(const T_Collection& col, const T& value, bool shouldFind) -{ - if((std::find(col.begin(), col.end(), value) != col.end()) != shouldFind) - { - boost::test_tools::predicate_result res(false); - if(shouldFind) - res.message() << "Value not found found: "; - else - res.message() << "Duplicate found: "; - res.message() << value << ". Values=["; - for(typename T_Collection::const_iterator it = col.begin(); it != col.end(); ++it) - res.message() << *it << ","; - res.message() << "]"; - return res; - } - - return true; -} + return true; + } -template -struct IdProviderTest -{ - void operator()() - { - using namespace ::pmacc; - - constexpr uint32_t numBlocks = 4; - constexpr uint32_t numIdsPerBlock = 64; - constexpr uint32_t numThreads = numBlocks * numIdsPerBlock; - constexpr uint32_t numIdsPerThread = 2; - constexpr uint32_t numIds = numThreads * numIdsPerThread; - - using IdProvider = IdProvider< T_dim >; - IdProvider::init(); - // Check initial state - typename IdProvider::State state = IdProvider::getState(); - BOOST_REQUIRE_EQUAL(state.startId, state.nextId); - BOOST_REQUIRE_EQUAL(state.maxNumProc, 1u); - BOOST_REQUIRE(!IdProvider::isOverflown()); - std::set ids; - BOOST_REQUIRE_EQUAL(IdProvider::getNewIdHost(), state.nextId); - // Generate some IDs using the function - for(int i=0; i idBuf(numIds); - constexpr uint32_t numWorkers = traits::GetNumWorkers< - numIdsPerBlock - >::value; - PMACC_KERNEL( GenerateIds< - numWorkers, - numIdsPerBlock, - IdProvider - >{ })( - numBlocks, - numWorkers - )( - idBuf.getDeviceBuffer().getDataBox(), - numThreads, - numIdsPerThread - ); - idBuf.deviceToHost(); - BOOST_REQUIRE_EQUAL(numIds, ids.size()); - auto hostBox = idBuf.getHostBuffer().getDataBox(); - // Make sure they are the same - for(uint32_t i=0; i + struct IdProviderTest + { + void operator()() + { + using namespace ::pmacc; + + constexpr uint32_t numBlocks = 4; + constexpr uint32_t numIdsPerBlock = 64; + constexpr uint32_t numThreads = numBlocks * numIdsPerBlock; + constexpr uint32_t numIdsPerThread = 2; + constexpr uint32_t numIds = numThreads * numIdsPerThread; + + using IdProvider = IdProvider; + IdProvider::init(); + // Check initial state + typename IdProvider::State state = IdProvider::getState(); + REQUIRE(state.startId == state.nextId); + REQUIRE(state.maxNumProc == 1u); + REQUIRE(!IdProvider::isOverflown()); + std::set ids; + REQUIRE(IdProvider::getNewIdHost() == state.nextId); + // Generate some IDs using the function + for(int i = 0; i < numIds; i++) + { + const uint64_t newId = IdProvider::getNewIdHost(); + REQUIRE(checkDuplicate(ids, newId, false)); + ids.insert(newId); + } + // Reset the state + IdProvider::setState(state); + REQUIRE(IdProvider::getNewIdHost() == state.nextId); + // Generate the same IDs on the device + HostDeviceBuffer idBuf(numIds); + constexpr uint32_t numWorkers = traits::GetNumWorkers::value; + PMACC_KERNEL(GenerateIds{}) + (numBlocks, numWorkers)(idBuf.getDeviceBuffer().getDataBox(), numThreads, numIdsPerThread); + idBuf.deviceToHost(); + REQUIRE(numIds == ids.size()); + auto hostBox = idBuf.getHostBuffer().getDataBox(); + // Make sure they are the same + for(uint32_t i = 0; i < numIds; i++) + { + REQUIRE(checkDuplicate(ids, hostBox(i), true)); + } + } + }; -} // namespace particles -} // namespace test + } // namespace particles + } // namespace test } // namespace pmacc -BOOST_AUTO_TEST_CASE(IdProvider) +TEST_CASE("particles::IDProvider", "[IDProvider]") { using namespace pmacc::test::particles; IdProviderTest()(); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/include/pmacc/test/particles/memory/SuperCell.hpp b/include/pmacc/test/particles/memory/SuperCell.hpp index d552424de9..40994ae451 100644 --- a/include/pmacc/test/particles/memory/SuperCell.hpp +++ b/include/pmacc/test/particles/memory/SuperCell.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PMacc. * @@ -26,90 +26,70 @@ namespace pmacc { -namespace test -{ -namespace particles -{ -namespace memory -{ - -template< typename T_SuperCell > -struct TestNumParticlesLastFrame -{ - struct FrameTypeDummy + namespace test { - using SuperCellSize = T_SuperCell; - }; + namespace particles + { + namespace memory + { + template + struct TestNumParticlesLastFrame + { + struct FrameTypeDummy + { + using SuperCellSize = T_SuperCell; + }; - /** test a combination - * - * @param numParticlesPerCell number of particles within the test supercell - * @param particleLastFrame the assumed result with the given number of particles - * and T_SuperCell - */ - HDINLINE void operator()( - uint32_t numParticlesPerCell, - uint32_t particleLastFrame - ) - { - pmacc::SuperCell< FrameTypeDummy > superCell; - superCell.setNumParticles( numParticlesPerCell ); + /** test a combination + * + * @param numParticlesPerCell number of particles within the test supercell + * @param particleLastFrame the assumed result with the given number of particles + * and T_SuperCell + */ + HINLINE void operator()(uint32_t numParticlesPerCell, uint32_t particleLastFrame) + { + pmacc::SuperCell superCell; + superCell.setNumParticles(numParticlesPerCell); - BOOST_CHECK_EQUAL( - superCell.getSizeLastFrame(), - particleLastFrame - ); - } -}; + REQUIRE(superCell.getSizeLastFrame() == particleLastFrame); + } + }; -} // namespace memory -} // namespace particles -} // namespace test + } // namespace memory + } // namespace particles + } // namespace test } // namespace pmacc /* The supercell test is always performed with a 3 dimensional supercell * because the supercell is agnostic about the number of dimensions. */ -BOOST_AUTO_TEST_CASE( copyFrom ) +TEST_CASE("particles::SuperCell", "[SuperCell]") { using namespace pmacc::test::particles::memory; - TestNumParticlesLastFrame< - pmacc::math::CT::Int< - 8, - 8, - 4 - > - > cell256{}; + TestNumParticlesLastFrame> cell256{}; // no particles in the supercell - cell256( 0u, 0u ); + cell256(0u, 0u); // one full frame - cell256( 256u, 256u ); + cell256(256u, 256u); // two full frames - cell256( 512u, 256u ); + cell256(512u, 256u); // edge cases - cell256( 255u, 255u ); - cell256( 257u, 1u ); - cell256( 1u, 1u ); + cell256(255u, 255u); + cell256(257u, 1u); + cell256(1u, 1u); using namespace pmacc::test::particles::memory; - TestNumParticlesLastFrame< - pmacc::math::CT::Int< - 3, - 3, - 3 - > - > cell27{}; + TestNumParticlesLastFrame> cell27{}; // no particles in the supercell - cell27( 0u, 0u ); + cell27(0u, 0u); // one full frame - cell27( 27u, 27u ); + cell27(27u, 27u); // two full frames - cell27( 54u, 27u ); + cell27(54u, 27u); // edge cases - cell27( 26u, 26u ); - cell27( 28u, 1u ); - cell27( 1u, 1u ); - + cell27(26u, 26u); + cell27(28u, 1u); + cell27(1u, 1u); } diff --git a/include/pmacc/test/particles/particlesUT.cpp b/include/pmacc/test/particles/particlesUT.cpp index 8ce68203a0..bad91a48aa 100644 --- a/include/pmacc/test/particles/particlesUT.cpp +++ b/include/pmacc/test/particles/particlesUT.cpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Grund +/* Copyright 2016-2021 Alexander Grund * * This file is part of PMacc. * @@ -19,17 +19,18 @@ * If not, see . */ -#include "pmacc/test/PMaccFixture.hpp" +#include +#include -#include +#include #if TEST_DIM == 2 - using pmacc::test::PMaccFixture2D; - BOOST_GLOBAL_FIXTURE( PMaccFixture2D ); +using pmacc::test::PMaccFixture2D; +static PMaccFixture2D fixture; #else - using pmacc::test::PMaccFixture3D; - BOOST_GLOBAL_FIXTURE( PMaccFixture3D ); +using pmacc::test::PMaccFixture3D; +static PMaccFixture3D fixture; #endif #include "IdProvider.hpp" diff --git a/include/pmacc/test/random/2DDistribution.cpp b/include/pmacc/test/random/2DDistribution.cpp index bbee7d68e9..cafe63aac4 100644 --- a/include/pmacc/test/random/2DDistribution.cpp +++ b/include/pmacc/test/random/2DDistribution.cpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Alexander Grund +/* Copyright 2016-2021 Alexander Grund * * This file is part of PMacc. * @@ -19,19 +19,20 @@ * If not, see . */ -#include "pmacc/types.hpp" -#include "pmacc/memory/buffers/HostDeviceBuffer.hpp" -#include "pmacc/random/RNGProvider.hpp" -#include "pmacc/random/distributions/Uniform.hpp" -#include "pmacc/random/methods/AlpakaRand.hpp" -#include "pmacc/dimensions/DataSpace.hpp" -#include "pmacc/assert.hpp" -#include "pmacc/mappings/threads/ForEachIdx.hpp" -#include "pmacc/mappings/threads/IdxConfig.hpp" -#include "pmacc/traits/GetNumWorkers.hpp" -#include "pmacc/dataManagement/ISimulationData.hpp" -#include "pmacc/Environment.hpp" -#include "pmacc/eventSystem/tasks/ITask.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -41,295 +42,261 @@ namespace pmacc { -namespace test -{ -namespace random -{ + namespace test + { + namespace random + { + using Space2D = pmacc::DataSpace; + using Space3D = pmacc::DataSpace; -using Space2D = pmacc::DataSpace< DIM2 >; -using Space3D = pmacc::DataSpace< DIM3 >; + template + struct RandomFiller + { + template + DINLINE void operator()( + T_Acc const& acc, + T_DataBox box, + Space2D const boxSize, + T_Random const rand, + uint32_t const numSamples) const + { + using namespace pmacc::mappings::threads; -template< - uint32_t T_numWorkers, - uint32_t T_blockSize -> -struct RandomFiller -{ - template< - typename T_DataBox, - typename T_Random, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_DataBox box, - Space2D const boxSize, - T_Random const rand, - uint32_t const numSamples - ) const - { - using namespace pmacc::mappings::threads; + constexpr uint32_t numWorkers = T_numWorkers; + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + using SupercellDomCfg = IdxConfig; - constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; + // each virtual worker initialize one rng state + ForEachIdx forEachCell(workerIdx); - using SupercellDomCfg = IdxConfig< - T_blockSize, - numWorkers - >; + forEachCell([&](uint32_t const linearIdx, uint32_t const) { + uint32_t const linearTid = cupla::blockIdx(acc).x * T_blockSize + linearIdx; - // each virtual worker initialize one rng state - ForEachIdx< SupercellDomCfg > forEachCell( workerIdx ); + if(linearTid >= boxSize.productOfComponents()) + return; + + Space2D const ownIdx = pmacc::DataSpaceOperations::map(boxSize, linearTid); + // each virtual worker needs an own instance of rand + T_Random vWorkerRand = rand; + vWorkerRand.init(ownIdx); + for(uint32_t i = 0u; i < numSamples; i++) + { + Space2D idx = vWorkerRand(acc, boxSize); + cupla::atomicAdd(acc, &box(idx), 1u, ::alpaka::hierarchy::Blocks{}); + } + }); + } + }; - forEachCell( - [&]( - uint32_t const linearIdx, - uint32_t const - ) + template + struct GetRandomIdx { - uint32_t const linearTid = blockIdx.x * T_blockSize + linearIdx; - - if( linearTid >= boxSize.productOfComponents() ) - return; - - Space2D const ownIdx = pmacc::DataSpaceOperations< Space2D::dim >::map( - boxSize, - linearTid - ); - // each virtual worker needs an own instance of rand - T_Random vWorkerRand = rand; - vWorkerRand.init( ownIdx ); - for( uint32_t i = 0u; i < numSamples; i++ ) + typedef pmacc::random::distributions::Uniform Distribution; + typedef typename T_RNGProvider::template GetRandomType::type Random; + + HINLINE GetRandomIdx() : rand(T_RNGProvider::template createRandom()) { - Space2D idx = vWorkerRand( - acc, - boxSize - ); - atomicAdd(&box(idx), 1u, ::alpaka::hierarchy::Blocks{}); + } + + /** initialize the random generator + * + * @warning: it is not allowed to call this method twice on an instance + */ + DINLINE void init(Space2D globalCellIdx) + { + rand.init(globalCellIdx); + } + + template + DINLINE Space2D operator()(T_Acc const& acc, Space2D size) + { + using pmacc::math::float2int_rd; + return Space2D(float2int_rd(rand(acc) * size.x()), float2int_rd(rand(acc) * size.y())); + } + + private: + PMACC_ALIGN8(rand, Random); + }; + + /** Write in PGM grayscale file format (easy to read/interpret) */ + template + void writePGM(const std::string& filePath, T_Buffer& buffer) + { + const Space2D size = buffer.getDataSpace(); + uint32_t maxVal = 0; + for(int y = 0; y < size.y(); y++) + { + for(int x = 0; x < size.x(); x++) + { + uint32_t val = buffer.getDataBox()(Space2D(x, y)); + if(val > maxVal) + maxVal = val; + } + } + + // Standard format is single byte per value which limits the range to 0-255 + // An extension allows 2 bytes so 0-65536) + if(maxVal > std::numeric_limits::max()) + maxVal = std::numeric_limits::max(); + const bool isTwoByteFormat = maxVal > std::numeric_limits::max(); + + std::ofstream outFile(filePath.c_str()); + // TAG + outFile << "P5\n"; + // Size and maximum value (at most 65536 which is 2 bytes per value) + outFile << size.x() << " " << size.y() << " " << maxVal << "\n"; + for(int y = 0; y < size.y(); y++) + { + for(int x = 0; x < size.x(); x++) + { + uint32_t val = buffer.getDataBox()(Space2D(x, y)); + // Clip value + if(val > maxVal) + val = maxVal; + // Write first byte (higher order bits) if file is in 2 byte format + if(isTwoByteFormat) + outFile << uint8_t(val >> 8); + // Write remaining bytze + outFile << uint8_t(val); + } } } - ); - } -}; -template -struct GetRandomIdx -{ - typedef pmacc::random::distributions::Uniform Distribution; - typedef typename T_RNGProvider::template GetRandomType::type Random; - - HINLINE GetRandomIdx(): rand(T_RNGProvider::template createRandom()) - {} - - /** initialize the random generator - * - * @warning: it is not allowed to call this method twice on an instance - */ - DINLINE void - init(Space2D globalCellIdx) - { - rand.init(globalCellIdx); - } - - template< typename T_Acc > - DINLINE Space2D - operator()( - T_Acc const & acc, - Space2D size - ) - { - using pmacc::algorithms::math::float2int_rd; - return Space2D( - float2int_rd( rand( acc ) * size.x() ), - float2int_rd( rand( acc ) * size.y() ) - ); - } -private: - PMACC_ALIGN8(rand, Random); -}; - -/** Write in PGM grayscale file format (easy to read/interpret) */ -template -void writePGM(const std::string& filePath, T_Buffer& buffer) -{ - const Space2D size = buffer.getDataSpace(); - uint32_t maxVal = 0; - for(int y=0; y maxVal) - maxVal = val; - } - } - - // Standard format is single byte per value which limits the range to 0-255 - // An extension allows 2 bytes so 0-65536) - if(maxVal > std::numeric_limits::max()) - maxVal = std::numeric_limits::max(); - const bool isTwoByteFormat = maxVal > std::numeric_limits::max(); - - std::ofstream outFile(filePath.c_str()); - // TAG - outFile << "P5\n"; - // Size and maximum value (at most 65536 which is 2 bytes per value) - outFile << size.x() << " " << size.y() << " " << maxVal << "\n"; - for(int y=0; y maxVal) - val = maxVal; - // Write first byte (higher order bits) if file is in 2 byte format - if(isTwoByteFormat) - outFile << uint8_t(val >> 8); - // Write remaining bytze - outFile << uint8_t(val); - } - } -} + template + void generateRandomNumbers( + const Space2D& rngSize, + uint32_t numSamples, + T_DeviceBuffer& buffer, + const T_Random& rand) + { + cuplaEvent_t start, stop; + CUDA_CHECK(cuplaEventCreate(&start)); + CUDA_CHECK(cuplaEventCreate(&stop)); + + constexpr uint32_t blockSize = 256; + + constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers::value; + + uint32_t gridSize = (rngSize.productOfComponents() + blockSize - 1u) / blockSize; + + CUDA_CHECK(cuplaEventRecord( + start, + /* we need to pass a stream to avoid that we record the event in + * an empty or wrong stream + */ + pmacc::Environment<>::get() + .TransactionManager() + .getEventStream(pmacc::ITask::TASK_DEVICE) + ->getCudaStream())); + PMACC_KERNEL(RandomFiller{}) + (gridSize, numWorkers)(buffer.getDataBox(), buffer.getDataSpace(), rand, numSamples); + + CUDA_CHECK(cuplaEventRecord( + stop, + /* we need to pass a stream to avoid that we record the event in + * an empty or wrong stream + */ + pmacc::Environment<>::get() + .TransactionManager() + .getEventStream(pmacc::ITask::TASK_DEVICE) + ->getCudaStream())); + CUDA_CHECK(cuplaEventSynchronize(stop)); + float milliseconds = 0; + CUDA_CHECK(cuplaEventElapsedTime(&milliseconds, start, stop)); + std::cout << "Done in " << milliseconds << "ms" << std::endl; + CUDA_CHECK(cuplaEventDestroy(start)); + CUDA_CHECK(cuplaEventDestroy(stop)); + } -template -void generateRandomNumbers(const Space2D& rngSize, uint32_t numSamples, T_DeviceBuffer& buffer, const T_Random& rand) -{ - cudaEvent_t start, stop; - CUDA_CHECK(cudaEventCreate(&start)); - CUDA_CHECK(cudaEventCreate(&stop)); - - constexpr uint32_t blockSize = 256; - - constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< - blockSize - >::value; - - uint32_t gridSize = ( rngSize.productOfComponents() + blockSize - 1u ) / blockSize; - - CUDA_CHECK(cudaEventRecord( - start, - /* we need to pass a stream to avoid that we record the event in - * an empty or wrong stream - */ - pmacc::Environment<>::get( ).TransactionManager( ). - getEventStream( pmacc::ITask::TASK_CUDA )->getCudaStream() - )); - PMACC_KERNEL( - RandomFiller< - numWorkers, - blockSize - >{} - )( - gridSize, - numWorkers - )( - buffer.getDataBox(), - buffer.getDataSpace(), - rand, - numSamples - ); - - CUDA_CHECK(cudaEventRecord( - stop, - /* we need to pass a stream to avoid that we record the event in - * an empty or wrong stream - */ - pmacc::Environment<>::get( ).TransactionManager( ). - getEventStream( pmacc::ITask::TASK_CUDA )->getCudaStream() - )); - CUDA_CHECK(cudaEventSynchronize(stop)); - float milliseconds = 0; - CUDA_CHECK(cudaEventElapsedTime(&milliseconds, start, stop)); - std::cout << "Done in " << milliseconds << "ms" << std::endl; - CUDA_CHECK(cudaEventDestroy(start)); - CUDA_CHECK(cudaEventDestroy(stop)); -} + template + void runTest(uint32_t numSamples) + { + typedef pmacc::random::RNGProvider<2, T_Method> RNGProvider; + + const std::string rngName = RNGProvider::RNGMethod::getName(); + std::cout << std::endl + << "Running test for " << rngName << " with " << numSamples << " samples per cell" + << std::endl; + // Size of the detector + const Space2D size(256, 256); + // Size of the rng provider (= number of states used) + const Space2D rngSize(256, 256); + + pmacc::HostDeviceBuffer detector(size); + auto rngProvider = new RNGProvider(rngSize); + + pmacc::Environment<>::get().DataConnector().share( + std::shared_ptr(rngProvider)); + rngProvider->init(0x42133742); + + generateRandomNumbers(rngSize, numSamples, detector.getDeviceBuffer(), GetRandomIdx()); + + detector.deviceToHost(); + auto box = detector.getHostBuffer().getDataBox(); + // Write data to file + std::ofstream dataFile((rngName + "_data.txt").c_str()); + for(int y = 0; y < size.y(); y++) + { + for(int x = 0; x < size.x(); x++) + dataFile << box(Space2D(x, y)) << ","; + } + writePGM(rngName + "_img.pgm", detector.getHostBuffer()); -template -void runTest(uint32_t numSamples) -{ - typedef pmacc::random::RNGProvider<2, T_Method> RNGProvider; - - const std::string rngName = RNGProvider::RNGMethod::getName(); - std::cout << std::endl << "Running test for " << rngName - << " with " << numSamples << " samples per cell" - << std::endl; - // Size of the detector - const Space2D size(256, 256); - // Size of the rng provider (= number of states used) - const Space2D rngSize(256, 256); - - pmacc::HostDeviceBuffer detector(size); - auto rngProvider = new RNGProvider(rngSize); - - pmacc::Environment<>::get().DataConnector().share( std::shared_ptr< pmacc::ISimulationData >( rngProvider ) ); - rngProvider->init(0x42133742); - - generateRandomNumbers(rngSize, numSamples, detector.getDeviceBuffer(), GetRandomIdx()); - - detector.deviceToHost(); - auto box = detector.getHostBuffer().getDataBox(); - // Write data to file - std::ofstream dataFile((rngName + "_data.txt").c_str()); - for(int y=0; y(-1); - for(int y=0; y maxVal) - maxVal = val; - if(val < minVal) - minVal = val; - totalNumSamples += val; - mean += pmacc::math::linearize(size.shrink<1>(1), idx) * static_cast(val); - } - } - PMACC_ASSERT(totalNumSamples == uint64_t(rngSize.productOfComponents()) * uint64_t(numSamples)); - // Expected value: (n-1)/2 - double Ex = (size.productOfComponents() - 1) / 2.; - // Variance: (n^2 - 1) / 12 - double var = (pmacc::algorithms::math::pow(size.productOfComponents(), 2) - 1.) / 12.; - // Mean value - mean /= totalNumSamples; - double errSq = 0; - // Calc standard derivation - for(int y=0; y(pmacc::math::linearize(size.shrink<1>(1), idx) - mean, 2); - } - } - double stdDev = sqrt(errSq/(totalNumSamples - 1)); - - uint64_t avg = totalNumSamples/size.productOfComponents(); - std::cout << " Samples: " << totalNumSamples << std::endl; - std::cout << " Min: " << minVal << std::endl; - std::cout << " Max: " << maxVal << std::endl; - std::cout << " Avg/cell: " << avg << std::endl; - std::cout << " E(x): " << Ex << std::endl; - std::cout << " mean: " << mean << std::endl; - std::cout << " dev(x): " << sqrt(var) << std::endl; - std::cout << " std. dev: " << stdDev << std::endl; -} + uint64_t totalNumSamples = 0; + double mean = 0; + uint32_t maxVal = 0; + uint32_t minVal = static_cast(-1); + for(int y = 0; y < size.y(); y++) + { + for(int x = 0; x < size.x(); x++) + { + Space2D idx(x, y); + uint32_t val = box(idx); + if(val > maxVal) + maxVal = val; + if(val < minVal) + minVal = val; + totalNumSamples += val; + mean += pmacc::math::linearize(size.shrink<1>(1), idx) * static_cast(val); + } + } + PMACC_ASSERT(totalNumSamples == uint64_t(rngSize.productOfComponents()) * uint64_t(numSamples)); + // Expected value: (n-1)/2 + double Ex = (size.productOfComponents() - 1) / 2.; + // Variance: (n^2 - 1) / 12 + double var = (cupla::pow(static_cast(size.productOfComponents()), 2.0) - 1.) / 12.; + // Mean value + mean /= totalNumSamples; + double errSq = 0; + // Calc standard derivation + for(int y = 0; y < size.y(); y++) + { + for(int x = 0; x < size.x(); x++) + { + Space2D idx(x, y); + uint32_t val = box(idx); + errSq += val + * cupla::pow( + static_cast(pmacc::math::linearize(size.shrink<1>(1), idx) - mean), + 2.0); + } + } + double stdDev = sqrt(errSq / (totalNumSamples - 1)); + + uint64_t avg = totalNumSamples / size.productOfComponents(); + std::cout << " Samples: " << totalNumSamples << std::endl; + std::cout << " Min: " << minVal << std::endl; + std::cout << " Max: " << maxVal << std::endl; + std::cout << " Avg/cell: " << avg << std::endl; + std::cout << " E(x): " << Ex << std::endl; + std::cout << " mean: " << mean << std::endl; + std::cout << " dev(x): " << sqrt(var) << std::endl; + std::cout << " std. dev: " << stdDev << std::endl; + } -} // namespace random -} // namespace test + } // namespace random + } // namespace test } // namespace pmacc int main(int argc, char** argv) @@ -341,7 +308,7 @@ int main(int argc, char** argv) const uint32_t numSamples = (argc > 1) ? atoi(argv[1]) : 100; - runTest< random::methods::AlpakaRand< cupla::Acc> >(numSamples); + runTest>(numSamples); /* finalize the pmacc context */ Environment<>::get().finalize(); diff --git a/include/pmacc/test/random/CMakeLists.txt b/include/pmacc/test/random/CMakeLists.txt index 0a137c59be..9003f1eea8 100644 --- a/include/pmacc/test/random/CMakeLists.txt +++ b/include/pmacc/test/random/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2016-2020 Alexander Grund +# Copyright 2016-2021 Alexander Grund # # This file is part of PMacc. # @@ -19,7 +19,7 @@ # If not, see . # -cmake_minimum_required(VERSION 3.11.4) +cmake_minimum_required(VERSION 3.15.0) project("TestRandomGenerators") set(CMAKE_PREFIX_PATH ${CMAKE_PREFIX_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/../..") diff --git a/include/pmacc/traits/GetCTName.hpp b/include/pmacc/traits/GetCTName.hpp index 74c87e606f..13f3ccaeb9 100644 --- a/include/pmacc/traits/GetCTName.hpp +++ b/include/pmacc/traits/GetCTName.hpp @@ -1,4 +1,4 @@ -/* Copyright 2018-2020 Rene Widera +/* Copyright 2018-2021 Rene Widera * * This file is part of PMacc. * @@ -26,24 +26,23 @@ namespace pmacc { -namespace traits -{ - - /** Return the compile time name - * - * @tparam T_Type type of the object where the name is queried - * @return ::type name of the object as pmacc::meta::String, - * empty string is returned if the trait is not specified for - * T_Type - */ - template< typename T_Type > - struct GetCTName + namespace traits { - using type = pmacc::meta::String< >; - }; + /** Return the compile time name + * + * @tparam T_Type type of the object where the name is queried + * @return ::type name of the object as pmacc::meta::String, + * empty string is returned if the trait is not specified for + * T_Type + */ + template + struct GetCTName + { + using type = pmacc::meta::String<>; + }; - template< typename T_Type > - using GetCTName_t = typename GetCTName< T_Type >::type; + template + using GetCTName_t = typename GetCTName::type; -} // namespace traits + } // namespace traits } // namespace pmacc diff --git a/include/pmacc/traits/GetComponentsType.hpp b/include/pmacc/traits/GetComponentsType.hpp index 7ce698b51f..488633290c 100644 --- a/include/pmacc/traits/GetComponentsType.hpp +++ b/include/pmacc/traits/GetComponentsType.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -25,27 +25,25 @@ namespace pmacc { - -namespace traits -{ - /** Get component type of an object - * - * \tparam T_Type any type - * \return \p ::type get result type - * If T_Type is fundamental c++ type, the identity is returned - * - * Attention: do not defines this trait for structs with different attributes inside - */ - template::value > - struct GetComponentsType; - - template - struct GetComponentsType + namespace traits { - typedef T_Type type; - }; - -} //namespace traits - -}// namespace pmacc - + /** Get component type of an object + * + * \tparam T_Type any type + * \return \p ::type get result type + * If T_Type is fundamental c++ type, the identity is returned + * + * Attention: do not defines this trait for structs with different attributes inside + */ + template::value> + struct GetComponentsType; + + template + struct GetComponentsType + { + typedef T_Type type; + }; + + } // namespace traits + +} // namespace pmacc diff --git a/include/pmacc/traits/GetFlagType.hpp b/include/pmacc/traits/GetFlagType.hpp index 3d1b0f578b..b728d9f9ed 100644 --- a/include/pmacc/traits/GetFlagType.hpp +++ b/include/pmacc/traits/GetFlagType.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -24,20 +24,19 @@ namespace pmacc { -namespace traits -{ - -/** Get Flag of an Object - * - * @tparam T_Object any object (class or typename) - * @tparam T_Key a class which is used as identifier - * - * @treturn ::type - */ -template -struct GetFlagType; + namespace traits + { + /** Get Flag of an Object + * + * @tparam T_Object any object (class or typename) + * @tparam T_Key a class which is used as identifier + * + * @treturn ::type + */ + template + struct GetFlagType; -}//namespace traits + } // namespace traits -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/traits/GetInitializedInstance.hpp b/include/pmacc/traits/GetInitializedInstance.hpp index a66bf02ddb..201671aec9 100644 --- a/include/pmacc/traits/GetInitializedInstance.hpp +++ b/include/pmacc/traits/GetInitializedInstance.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PMacc. * @@ -26,27 +26,26 @@ namespace pmacc { -namespace traits -{ - -/** Return an initialized instance. Expects a single parameter. - * - * The main reason to use this is for templated types where it's unknown - * if they are fundamental or vector-like. - * - * \tparam T_Type type of object - */ -template -struct GetInitializedInstance -{ - typedef T_Type Type; - - template - HDINLINE Type operator()(const ValueType& value) const + namespace traits { - return Type(value); - } -}; + /** Return an initialized instance. Expects a single parameter. + * + * The main reason to use this is for templated types where it's unknown + * if they are fundamental or vector-like. + * + * \tparam T_Type type of object + */ + template + struct GetInitializedInstance + { + typedef T_Type Type; + + template + HDINLINE Type operator()(const ValueType& value) const + { + return Type(value); + } + }; -} // traits -} // PMacc + } // namespace traits +} // namespace pmacc diff --git a/include/pmacc/traits/GetNComponents.hpp b/include/pmacc/traits/GetNComponents.hpp index 3ced6165ff..7a42d36fb8 100644 --- a/include/pmacc/traits/GetNComponents.hpp +++ b/include/pmacc/traits/GetNComponents.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -25,38 +25,39 @@ namespace pmacc { - -namespace traits -{ -/** C - * - * \tparam T_Type any type - * \return \p ::value as public with number of components (uint32_t) - */ -template::value> -struct GetNComponents -{ - /* The compiler is allowed to evaluate an expression that does not depend on a template parameter - * even if the class is never instantiated. In that case static assert is always - * evaluated (e.g. with clang), this results in an error if the condition is false. - * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html - * - * A workaround is to add a template dependency to the expression. - * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. - */ - PMACC_CASSERT_MSG_TYPE( __GetNComponents_is_not_defined_for_this_type, T_Type, false && ( sizeof(T_Type) != 0 ) ); - static constexpr uint32_t value = 0; -}; - -/** return value=1 for al fundamental c++ types - */ -template -struct GetNComponents -{ - static constexpr uint32_t value=1; -}; - -} //namespace traits - -}// namespace Pmacc - + namespace traits + { + /** C + * + * \tparam T_Type any type + * \return \p ::value as public with number of components (uint32_t) + */ + template::value> + struct GetNComponents + { + /* The compiler is allowed to evaluate an expression that does not depend on a template parameter + * even if the class is never instantiated. In that case static assert is always + * evaluated (e.g. with clang), this results in an error if the condition is false. + * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html + * + * A workaround is to add a template dependency to the expression. + * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. + */ + PMACC_CASSERT_MSG_TYPE( + __GetNComponents_is_not_defined_for_this_type, + T_Type, + false && (sizeof(T_Type) != 0)); + static constexpr uint32_t value = 0; + }; + + /** return value=1 for al fundamental c++ types + */ + template + struct GetNComponents + { + static constexpr uint32_t value = 1; + }; + + } // namespace traits + +} // namespace pmacc diff --git a/include/pmacc/traits/GetNumWorkers.hpp b/include/pmacc/traits/GetNumWorkers.hpp index 10a3ab19b7..71284ce1ed 100644 --- a/include/pmacc/traits/GetNumWorkers.hpp +++ b/include/pmacc/traits/GetNumWorkers.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Rene Widera +/* Copyright 2017-2021 Rene Widera * * This file is part of PMacc. * @@ -27,63 +27,60 @@ namespace pmacc { -namespace traits -{ - /** Get number of workers - * - * the number of workers for a kernel depending on the used accelerator - * - * @tparam T_maxWorkers the maximum number of workers - * @tparam T_Acc the accelerator type - * @return @p ::value number of workers - */ - template< - uint32_t T_maxWorkers, - typename T_Acc = cupla::AccThreadSeq - > - struct GetNumWorkers + namespace traits { - static constexpr uint32_t value = T_maxWorkers; - }; + /** Get number of workers + * + * the number of workers for a kernel depending on the used accelerator + * + * @tparam T_maxWorkers the maximum number of workers + * @tparam T_Acc the accelerator type + * @return @p ::value number of workers + */ + template + struct GetNumWorkers + { + static constexpr uint32_t value = T_maxWorkers; + }; -#if( ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED == 1 ) - template< - uint32_t T_maxWorkers, - typename ... T_Args - > - struct GetNumWorkers< - T_maxWorkers, - alpaka::acc::AccCpuOmp2Blocks< T_Args... > - > - { - static constexpr uint32_t value = 1u; - }; +#if(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED == 1) + template + struct GetNumWorkers> + { + static constexpr uint32_t value = 1u; + }; #endif -#if( ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED == 1 ) - template< - uint32_t T_maxWorkers, - typename ... T_Args - > - struct GetNumWorkers< - T_maxWorkers, - alpaka::acc::AccCpuSerial< T_Args... > - > - { - static constexpr uint32_t value = 1u; - }; +#if(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED == 1) + template + struct GetNumWorkers> + { + static constexpr uint32_t value = 1u; + }; #endif -#if( ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED == 1 ) - template< - uint32_t T_maxWorkers, - typename ... T_Args - > - struct GetNumWorkers< - T_maxWorkers, - alpaka::acc::AccCpuTbbBlocks< T_Args... > - > - { - static constexpr uint32_t value = 1u; - }; +#if(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED == 1) + template + struct GetNumWorkers> + { + static constexpr uint32_t value = 1u; + }; +#endif +#if(ALPAKA_ACC_ANY_BT_OMP5_ENABLED == 1) && defined ALPAKA_OFFLOAD_MAX_BLOCK_SIZE && ALPAKA_OFFLOAD_MAX_BLOCK_SIZE > 0 + template + struct GetNumWorkers> + { + static constexpr uint32_t value = ALPAKA_OFFLOAD_MAX_BLOCK_SIZE; + }; +#endif +#if(ALPAKA_ACC_ANY_BT_OACC_ENABLED == 1) + template + struct GetNumWorkers> + { +# ifdef ALPAKA_OFFLOAD_MAX_BLOCK_SIZE + static constexpr uint32_t value = ALPAKA_OFFLOAD_MAX_BLOCK_SIZE; +# else + static constexpr uint32_t value = 1; +# endif + }; #endif -} // namespace traits + } // namespace traits } // namespace pmacc diff --git a/include/pmacc/traits/GetStringProperties.hpp b/include/pmacc/traits/GetStringProperties.hpp index f9b5aa586a..125f97bd65 100644 --- a/include/pmacc/traits/GetStringProperties.hpp +++ b/include/pmacc/traits/GetStringProperties.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera * * This file is part of PMacc. * @@ -27,108 +27,108 @@ namespace pmacc { -namespace traits -{ - - /** a property with sub properties - * - * This class inherit from `std::map`. - * If the `operator[]` is used to access a not existing key an empty StringProperty - * with the given key is inserted (default behavior of `std::map`) - */ - struct StringProperty : public std::map< std::string, StringProperty > + namespace traits { - typedef std::map< std::string, StringProperty > StringPropertyMap; - - //! empty constructor - StringProperty( - ) - {} - - /** constructor + /** a property with sub properties * - * creates a property with one key value + * This class inherit from `std::map`. + * If the `operator[]` is used to access a not existing key an empty StringProperty + * with the given key is inserted (default behavior of `std::map`) * - * \param key name of the key - * \param propertyValue value of the property + * Key naming convention: + * "name" for name, openPMD-compatible when possible + * "param" for additional parameters, corresdponding to openPMD + * ...Parameters attribute */ - StringProperty( - const std::string& key, - const std::string& propertyValue - ) : value(propertyValue) + struct StringProperty : public std::map { - (*this)[key] = propertyValue; - } + typedef std::map StringPropertyMap; - /** overwrite the value from a property - * - * \param propertyValue new value - * \return the property itself + //! empty constructor + StringProperty() + { + } + + /** constructor + * + * creates a property with one key value + * + * \param key name of the key + * \param propertyValue value of the property + */ + StringProperty(const std::string& key, const std::string& propertyValue) : value(propertyValue) + { + (*this)[key] = propertyValue; + } + + /** overwrite the value from a property + * + * \param propertyValue new value + * \return the property itself + */ + StringProperty& operator=(const std::string& propertyValue) + { + value = propertyValue; + return *this; + } + + //! stores a property value + std::string value; + }; + + /** stream operator for a StringProperty */ - StringProperty& operator=( const std::string& propertyValue ) + HINLINE std::ostream& operator<<(std::ostream& out, const StringProperty& property) { - value = propertyValue; - return *this; + out << property.value; + return out; } - //! stores a property value - std::string value; - }; + /** Get a property tree of an object + * + * specialize this struct including the static method `StringProperty get()` + * to define a property for an object without the method `getStringProperties()` + * + * \tparam T_Type any type + * \return \p T_Type::getStringProperties() if trait `GetStringProperties<>` is not specialized + */ + template + struct StringProperties + { + static StringProperty get() + { + return T_Type::getStringProperties(); + } + }; - /** stream operator for a StringProperty - */ - HINLINE std::ostream& operator<<( std::ostream& out, const StringProperty& property ) - { - out << property.value; - return out; - } - /** Get a property tree of an object - * - * specialize this struct including the static method `StringProperty get()` - * to define a property for an object without the method `getStringProperties()` - * - * \tparam T_Type any type - * \return \p T_Type::getStringProperties() if trait `GetStringProperties<>` is not specialized - */ - template< typename T_Type > - struct StringProperties - { - static StringProperty get() + /** get the properties of an object + * + * The struct `StringProperties<>` needs to be specialized to change the result + * of this trait for a user defined type. + * If there is no user defined specialization available this trait inherits from + * the result of `::getStringProperties()` from the queried type. + */ + template + struct GetStringProperties : public StringProperty { - return T_Type::getStringProperties(); - } - }; - + GetStringProperties() : StringProperty(StringProperties::get()) + { + } + }; - /** get the properties of an object - * - * The struct `StringProperties<>` needs to be specialized to change the result - * of this trait for a user defined type. - * If there is no user defined specialization available this trait inherits from - * the result of `::getStringProperties()` from the queried type. - */ - template< typename T_Type > - struct GetStringProperties : public StringProperty - { - GetStringProperties() : StringProperty( StringProperties< T_Type >::get() ) + /** get the properties of an object instance + * + * same as `GetStringProperties<>` but accepts an instance instead a type + * + * \param an instance that shall be queried + * \return StringProperty of the given instance + */ + template + HINLINE StringProperty getStringProperties(const T_Type&) { + return GetStringProperties()(); } - }; - - /** get the properties of an object instance - * - * same as `GetStringProperties<>` but accepts an instance instead a type - * - * \param an instance that shall be queried - * \return StringProperty of the given instance - */ - template< typename T_Type > - HINLINE StringProperty - getStringProperties( const T_Type& ) - { - return GetStringProperties()(); - }; -} // namespace traits + } // namespace traits } // namespace pmacc diff --git a/include/pmacc/traits/GetUniqueTypeId.hpp b/include/pmacc/traits/GetUniqueTypeId.hpp index a963c7ca1e..6ff75859f6 100644 --- a/include/pmacc/traits/GetUniqueTypeId.hpp +++ b/include/pmacc/traits/GetUniqueTypeId.hpp @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Sergei Bastrakov +/* Copyright 2015-2021 Rene Widera, Sergei Bastrakov * * This file is part of PMacc. * @@ -30,98 +30,92 @@ namespace pmacc { -namespace traits -{ - -/** Get next available type id - * - * Warning: is not thread-safe. - */ -inline uint64_t getNextId( ); - -namespace detail -{ - -/** Global counter for type ids - */ -inline uint64_t & counter() -{ - static uint64_t value = 0; - return value; -} - -/** Unique id for a given type - * - * @tparam T_Type type - */ -template -struct TypeId -{ - static const uint64_t id; -}; - -/** These id values are generated during the startup for all types that cause - * instantiation of GetUniqueTypeId::uid(). - * - * The order of calls to GetUniqueTypeId::uid() does not affect the id - * generation, which guarantees the ids are matching for all processes even when - * the run-time access is not. - */ -template -const uint64_t TypeId::id = getNextId( ); - -} //namespace detail - -/** Get next available type id - * - * Warning: is not thread-safe. - */ -uint64_t getNextId( ) -{ - return ++detail::counter( ); -} - -/** Get a unique id of a type - * - * - get a unique id of a type at runtime - * - the id of a type is equal on each instance of a process - * - * @tparam T_Type any object (class or typename) - * @tparam T_ResultType result type - */ -template -struct GetUniqueTypeId -{ - typedef T_ResultType ResultType; - typedef T_Type Type; - - /** create unique id - * - * @param maxValue largest allowed id - */ - static const ResultType uid(uint64_t maxValue = boost::numeric::bounds::highest()) + namespace traits { + /** Get next available type id + * + * Warning: is not thread-safe. + */ + inline uint64_t getNextId(); - const uint64_t id = detail::TypeId::id; - - /* if `id` is out of range than throw an error */ - if (id > maxValue) + namespace detail { - std::stringstream sId; - sId << id; - std::stringstream sMax; - sMax << maxValue; - throw std::runtime_error("generated id is out of range [ id = " + - sId.str() + - std::string(", largest allowed id = ") + - sMax.str() + - std::string(" ]")); + /** Global counter for type ids + */ + inline uint64_t& counter() + { + static uint64_t value = 0; + return value; + } + + /** Unique id for a given type + * + * @tparam T_Type type + */ + template + struct TypeId + { + static const uint64_t id; + }; + + /** These id values are generated during the startup for all types that cause + * instantiation of GetUniqueTypeId::uid(). + * + * The order of calls to GetUniqueTypeId::uid() does not affect the id + * generation, which guarantees the ids are matching for all processes even when + * the run-time access is not. + */ + template + const uint64_t TypeId::id = getNextId(); + + } // namespace detail + + /** Get next available type id + * + * Warning: is not thread-safe. + */ + uint64_t getNextId() + { + return ++detail::counter(); } - return static_cast (id); - } - -}; -}//namespace traits - -}//namespace pmacc + /** Get a unique id of a type + * + * - get a unique id of a type at runtime + * - the id of a type is equal on each instance of a process + * + * @tparam T_Type any object (class or typename) + * @tparam T_ResultType result type + */ + template + struct GetUniqueTypeId + { + typedef T_ResultType ResultType; + typedef T_Type Type; + + /** create unique id + * + * @param maxValue largest allowed id + */ + static const ResultType uid(uint64_t maxValue = boost::numeric::bounds::highest()) + { + const uint64_t id = detail::TypeId::id; + + /* if `id` is out of range than throw an error */ + if(id > maxValue) + { + std::stringstream sId; + sId << id; + std::stringstream sMax; + sMax << maxValue; + throw std::runtime_error( + "generated id is out of range [ id = " + sId.str() + std::string(", largest allowed id = ") + + sMax.str() + std::string(" ]")); + } + return static_cast(id); + } + }; + + } // namespace traits + +} // namespace pmacc diff --git a/include/pmacc/traits/GetValueType.hpp b/include/pmacc/traits/GetValueType.hpp index 1076c3b8fc..f2312848df 100644 --- a/include/pmacc/traits/GetValueType.hpp +++ b/include/pmacc/traits/GetValueType.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -30,7 +30,7 @@ namespace pmacc { typedef typename T::ValueType ValueType; }; - } -} + } // namespace traits +} // namespace pmacc #include "GetValueType.tpp" diff --git a/include/pmacc/traits/GetValueType.tpp b/include/pmacc/traits/GetValueType.tpp index b2bd9d4bc3..211643618f 100644 --- a/include/pmacc/traits/GetValueType.tpp +++ b/include/pmacc/traits/GetValueType.tpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -30,8 +30,5 @@ namespace pmacc { typedef Type ValueType; }; - } -} - - - + } // namespace traits +} // namespace pmacc diff --git a/include/pmacc/traits/HasFlag.hpp b/include/pmacc/traits/HasFlag.hpp index fed45d0caf..4fdf1b2f26 100644 --- a/include/pmacc/traits/HasFlag.hpp +++ b/include/pmacc/traits/HasFlag.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -24,26 +24,25 @@ namespace pmacc { -namespace traits -{ - -/** Checks if a Objects has an flag - * - * @tparam T_Object any object (class or typename) - * @tparam T_Key a class which is used as identifier - * - * This struct must define - * ::type (boost::mpl::bool_<>) - */ -template -struct HasFlag; + namespace traits + { + /** Checks if a Objects has an flag + * + * @tparam T_Object any object (class or typename) + * @tparam T_Key a class which is used as identifier + * + * This struct must define + * ::type (boost::mpl::bool_<>) + */ + template + struct HasFlag; -template -bool hasFlag(const T_Object& obj,const T_Key& key) -{ - return HasFlag::type::value; -} + template + bool hasFlag(const T_Object& obj, const T_Key& key) + { + return HasFlag::type::value; + } -}//namespace traits + } // namespace traits -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/traits/HasIdentifier.hpp b/include/pmacc/traits/HasIdentifier.hpp index 4ad2a6b206..9ea941266c 100644 --- a/include/pmacc/traits/HasIdentifier.hpp +++ b/include/pmacc/traits/HasIdentifier.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -26,41 +26,39 @@ namespace pmacc { -namespace traits -{ + namespace traits + { + /** Checks if a Objects has an identifier + * + * @tparam T_Object any object (class or typename) + * @tparam T_Key a class which is used as identifier + * + * This struct must define + * ::type (boost::mpl::bool_<>) + */ + template + struct HasIdentifier + { + /* The compiler is allowed to evaluate an expression that does not depend on a template parameter + * even if the class is never instantiated. In that case static assert is always + * evaluated (e.g. with clang), this results in an error if the condition is false. + * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html + * + * A workaround is to add a template dependency to the expression. + * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. + */ + PMACC_CASSERT_MSG_TYPE( + ___HasIdentifier_is_not_specialized_for_T_Object, + T_Object, + false && (sizeof(T_Object) != 0)); + }; -/** Checks if a Objects has an identifier - * - * @tparam T_Object any object (class or typename) - * @tparam T_Key a class which is used as identifier - * - * This struct must define - * ::type (boost::mpl::bool_<>) - */ -template -struct HasIdentifier -{ - /* The compiler is allowed to evaluate an expression that does not depend on a template parameter - * even if the class is never instantiated. In that case static assert is always - * evaluated (e.g. with clang), this results in an error if the condition is false. - * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html - * - * A workaround is to add a template dependency to the expression. - * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. - */ - PMACC_CASSERT_MSG_TYPE( - ___HasIdentifier_is_not_specialized_for_T_Object, - T_Object, - false && ( sizeof(T_Object) != 0 ) - ); -}; - -template -bool hasIdentifier(const T_Object& obj,const T_Key& key) -{ - return HasIdentifier::type::value; -} + template + bool hasIdentifier(const T_Object& obj, const T_Key& key) + { + return HasIdentifier::type::value; + } -}//namespace traits + } // namespace traits -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/traits/HasIdentifiers.hpp b/include/pmacc/traits/HasIdentifiers.hpp index 8569b7cd6a..1bdb182902 100644 --- a/include/pmacc/traits/HasIdentifiers.hpp +++ b/include/pmacc/traits/HasIdentifiers.hpp @@ -1,4 +1,4 @@ -/* Copyright 2017-2020 Axel Huebl +/* Copyright 2017-2021 Axel Huebl * * This file is part of PMacc. * @@ -29,58 +29,33 @@ namespace pmacc { -namespace traits -{ - - /** Checks if an object has all specified identifiers - * - * Individual identifiers checks are logically connected via - * boost::mpl::and_ . - * - * @tparam T_Object any object (class or typename) - * @tparam T_SeqKeys a sequence of identifiers - * - * This struct must define - * ::type (boost::mpl::bool_<>) - */ - template< - typename T_Object, - typename T_SeqKeys - > - struct HasIdentifiers - { - using SeqHasIdentifiers = typename bmpl::transform< - T_SeqKeys, - HasIdentifier< - T_Object, - bmpl::_1 - > - >::type; - - using type = typename bmpl::accumulate< - SeqHasIdentifiers, - bmpl::bool_< true >, - bmpl::and_< - bmpl::_1, - bmpl::_2 - > - >::type; - }; - - template< - typename T_Object, - typename T_SeqKeys - > - bool hasIdentifiers( - T_Object const &, - T_SeqKeys const & - ) + namespace traits { - return HasIdentifiers< - T_Object, - T_SeqKeys - >::type::value; - } - -} // namespace traits + /** Checks if an object has all specified identifiers + * + * Individual identifiers checks are logically connected via + * boost::mpl::and_ . + * + * @tparam T_Object any object (class or typename) + * @tparam T_SeqKeys a sequence of identifiers + * + * This struct must define + * ::type (boost::mpl::bool_<>) + */ + template + struct HasIdentifiers + { + using SeqHasIdentifiers = typename bmpl::transform>::type; + + using type = + typename bmpl::accumulate, bmpl::and_>::type; + }; + + template + bool hasIdentifiers(T_Object const&, T_SeqKeys const&) + { + return HasIdentifiers::type::value; + } + + } // namespace traits } // namespace pmacc diff --git a/include/pmacc/traits/IsBaseTemplateOf.hpp b/include/pmacc/traits/IsBaseTemplateOf.hpp new file mode 100644 index 0000000000..f8f565aad9 --- /dev/null +++ b/include/pmacc/traits/IsBaseTemplateOf.hpp @@ -0,0 +1,62 @@ +/* Copyright 2020-2021 Sergei Bastrakov + * + * This file is part of PMacc. + * + * PMacc is free software: you can redistribute it and/or modify + * it under the terms of either the GNU General Public License or + * the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PMacc is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License and the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * and the GNU Lesser General Public License along with PMacc. + * If not, see . + */ + +#pragma once + +#include + + +namespace pmacc +{ + namespace traits + { + /** Check if a type inherits the given class template (with any arguments) + * + * This is basically a version of std::is_base_of but for class template as base. + * Based on Stack Overflow post: + * source: https://stackoverflow.com/a/34672753 + * author: rmawatson + * date: Aug 23 '18 + * + * @tparam T_Base base template (itself, without arguments) + * @tparam T_Derived derived type to check + * @treturn ::type std::true_type or std::false_type + */ + template class T_Base, typename T_Derived> + struct IsBaseTemplateOf + { + template + static constexpr std::true_type test(const T_Base*); + static constexpr std::false_type test(...); + using type = decltype(test(std::declval())); + }; + + /** Helper alias for IsBaseTemplateOf<...>::type + * + * @tparam T_Base base template (itself, without arguments) + * @tparam T_Derived derived type to check + * @treturn std::true_type or std::false_type + */ + template class T_Base, typename T_Derived> + using IsBaseTemplateOf_t = typename IsBaseTemplateOf::type; + + } // namespace traits +} // namespace pmacc diff --git a/include/pmacc/traits/Limits.hpp b/include/pmacc/traits/Limits.hpp index 679f32b45b..e6952c5173 100644 --- a/include/pmacc/traits/Limits.hpp +++ b/include/pmacc/traits/Limits.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -26,28 +26,28 @@ namespace pmacc { -namespace traits -{ -namespace limits -{ -/** get maximum finite value - * - * @tparam T_Type any type - * @result ::value - */ -template -struct Max; + namespace traits + { + namespace limits + { + /** get maximum finite value + * + * @tparam T_Type any type + * @result ::value + */ + template + struct Max; -/** get minimum finite value - * - * @tparam T_Type any type - * @result ::value - */ -template -struct Min; + /** get minimum finite value + * + * @tparam T_Type any type + * @result ::value + */ + template + struct Min; -} //namespace limits -} //namespace traits -} //namespace pmacc + } // namespace limits + } // namespace traits +} // namespace pmacc #include "pmacc/traits/Limits.tpp" diff --git a/include/pmacc/traits/Limits.tpp b/include/pmacc/traits/Limits.tpp index 06117e2638..2cd165ae86 100644 --- a/include/pmacc/traits/Limits.tpp +++ b/include/pmacc/traits/Limits.tpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -28,29 +28,28 @@ namespace pmacc { -namespace traits -{ -namespace limits -{ - -template<> -struct Max -{ - static constexpr int value=INT_MAX; -}; - -template<> -struct Max -{ - static constexpr uint32_t value=static_cast(-1); -}; - -template<> -struct Max -{ - static constexpr uint64_t value=static_cast(-1); -}; - -} //namespace limits -} //namespace traits -} //namespace pmacc + namespace traits + { + namespace limits + { + template<> + struct Max + { + static constexpr int value = INT_MAX; + }; + + template<> + struct Max + { + static constexpr uint32_t value = static_cast(-1); + }; + + template<> + struct Max + { + static constexpr uint64_t value = static_cast(-1); + }; + + } // namespace limits + } // namespace traits +} // namespace pmacc diff --git a/include/pmacc/traits/NumberOfExchanges.hpp b/include/pmacc/traits/NumberOfExchanges.hpp index 4fef38085d..3fd4dcd21d 100644 --- a/include/pmacc/traits/NumberOfExchanges.hpp +++ b/include/pmacc/traits/NumberOfExchanges.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -25,37 +25,35 @@ namespace pmacc { - -namespace traits -{ -/** Get number of possible exchanges - * - * \tparam T_dim dimension of the simulation - * \return \p ::value number of possible exchanges - * (is number neighbors + myself) - */ -template -struct NumberOfExchanges; - -template<> -struct NumberOfExchanges -{ - static constexpr uint32_t value = LEFT + RIGHT; -}; - -template<> -struct NumberOfExchanges -{ - static constexpr uint32_t value = TOP + BOTTOM; -}; - -template<> -struct NumberOfExchanges -{ - static constexpr uint32_t value = BACK + FRONT; -}; - -} //namespace traits - -}// namespace pmacc - + namespace traits + { + /** Get number of possible exchanges + * + * \tparam T_dim dimension of the simulation + * \return \p ::value number of possible exchanges + * (is number neighbors + myself) + */ + template + struct NumberOfExchanges; + + template<> + struct NumberOfExchanges + { + static constexpr uint32_t value = LEFT + RIGHT; + }; + + template<> + struct NumberOfExchanges + { + static constexpr uint32_t value = TOP + BOTTOM; + }; + + template<> + struct NumberOfExchanges + { + static constexpr uint32_t value = BACK + FRONT; + }; + + } // namespace traits + +} // namespace pmacc diff --git a/include/pmacc/traits/Resolve.hpp b/include/pmacc/traits/Resolve.hpp index de984fb3f2..8fc9b19d32 100644 --- a/include/pmacc/traits/Resolve.hpp +++ b/include/pmacc/traits/Resolve.hpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera +/* Copyright 2014-2021 Rene Widera * * This file is part of PMacc. * @@ -24,25 +24,24 @@ namespace pmacc { -namespace traits -{ - -/** Get resolved type - * - * Explicitly resolve the type of a synonym type, e.g., resolve the type of an PMacc alias. - * A synonym type is wrapper type (class) around an other type. - * If this trait is not defined for the given type the result is the identity of the given type. - * - * @tparam T_Object any object (class or typename) - * - * @treturn ::type - */ -template -struct Resolve -{ - typedef T_Object type; -}; + namespace traits + { + /** Get resolved type + * + * Explicitly resolve the type of a synonym type, e.g., resolve the type of an PMacc alias. + * A synonym type is wrapper type (class) around an other type. + * If this trait is not defined for the given type the result is the identity of the given type. + * + * @tparam T_Object any object (class or typename) + * + * @treturn ::type + */ + template + struct Resolve + { + typedef T_Object type; + }; -}//namespace traits + } // namespace traits -}//namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/type/Area.hpp b/include/pmacc/type/Area.hpp index 57def5afe1..03aa3c6d38 100644 --- a/include/pmacc/type/Area.hpp +++ b/include/pmacc/type/Area.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, * Alexander Grund * @@ -26,22 +26,21 @@ namespace pmacc { -namespace type -{ - - /*! area which is calculated - * - * CORE is the inner area of a grid - * BORDER is the border of a grid (my own border, not the neighbor part) - */ - enum AreaType + namespace type { - CORE = 1u, - BORDER = 2u, - GUARD = 4u - }; + /*! area which is calculated + * + * CORE is the inner area of a grid + * BORDER is the border of a grid (my own border, not the neighbor part) + */ + enum AreaType + { + CORE = 1u, + BORDER = 2u, + GUARD = 4u + }; -} // namespace type + } // namespace type // for backward compatibility pull all definitions into the pmacc namespace using namespace type; diff --git a/include/pmacc/type/Exchange.hpp b/include/pmacc/type/Exchange.hpp index 6b27a2ed4f..c9e57ee0a8 100644 --- a/include/pmacc/type/Exchange.hpp +++ b/include/pmacc/type/Exchange.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, * Alexander Grund * @@ -28,58 +28,70 @@ namespace pmacc { -namespace type -{ - - /** - * Bitmask which describes the direction of communication. - * - * Bitmasks may be combined logically, e.g. LEFT+TOP = TOPLEFT. - * It is not possible to combine complementary masks (e.g. FRONT and BACK), - * as a bitmask always defines one direction of communication (send or receive). - * - * Axis index relation: - * right & left are in X - * bottom & top are in Y - * back & front are in Z - */ - enum ExchangeType + namespace type { - RIGHT = 1u, - LEFT = 2u, - BOTTOM = 3u, - TOP = 6u, - BACK = 9u, - FRONT = 18u // 3er-System - }; + /** + * Bitmask which describes the direction of communication. + * + * Bitmasks may be combined logically, e.g. LEFT+TOP = TOPLEFT. + * It is not possible to combine complementary masks (e.g. FRONT and BACK), + * as a bitmask always defines one direction of communication (send or receive). + * + * Axis index relation: + * right & left are in X + * bottom & top are in Y + * back & front are in Z + */ + enum ExchangeType + { + RIGHT = 1u, + LEFT = 2u, + BOTTOM = 3u, + TOP = 6u, + BACK = 9u, + FRONT = 18u // 3er-System + }; - struct ExchangeTypeNames - { - std::string operator[]( const uint32_t exchange ) const + struct ExchangeTypeNames { - if( exchange >= 27 ) - return std::string("unknown exchange type: ") + std::to_string(exchange); + std::string operator[](const uint32_t exchange) const + { + if(exchange >= 27) + return std::string("unknown exchange type: ") + std::to_string(exchange); - const char* names[27] = { - "none", - "right", "left", "bottom", - "right-bottom", "left-bottom", - "top", - "right-top", "left-top", - "back", - "right-back", "left-back", - "bottom-back", "right-bottom-back", "left-bottom-back", - "top-back", "right-top-back", "left-top-back", - "front", - "right-front", "left-front", - "bottom-front", "right-bottom-front", "left-bottom-front", - "top-front", "right-top-front", "left-top-front" - }; - return names[exchange]; - } - }; + const char* names[27] + = {"none", + "right", + "left", + "bottom", + "right-bottom", + "left-bottom", + "top", + "right-top", + "left-top", + "back", + "right-back", + "left-back", + "bottom-back", + "right-bottom-back", + "left-bottom-back", + "top-back", + "right-top-back", + "left-top-back", + "front", + "right-front", + "left-front", + "bottom-front", + "right-bottom-front", + "left-bottom-front", + "top-front", + "right-top-front", + "left-top-front"}; + return names[exchange]; + } + }; -} // namespace type + } // namespace type // for backward compatibility pull all definitions into the pmacc namespace using namespace type; diff --git a/include/pmacc/type/Integral.hpp b/include/pmacc/type/Integral.hpp index 85533edc8f..8c5c039756 100644 --- a/include/pmacc/type/Integral.hpp +++ b/include/pmacc/type/Integral.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, * Alexander Grund * @@ -28,14 +28,13 @@ namespace pmacc { -namespace type -{ - - using id_t = uint64_t; - using uint64_cu = unsigned long long int; - using int64_cu = long long int; + namespace type + { + using id_t = uint64_t; + using uint64_cu = unsigned long long int; + using int64_cu = long long int; -} // namespace type + } // namespace type // for backward compatibility pull all definitions into the pmacc namespace using namespace type; diff --git a/include/pmacc/types.hpp b/include/pmacc/types.hpp index 03d05ed716..7dc8a16344 100644 --- a/include/pmacc/types.hpp +++ b/include/pmacc/types.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Heiko Burau, Rene Widera, * Wolfgang Hoenig, Benjamin Worpitz, * Alexander Grund * @@ -30,34 +30,18 @@ #include #ifndef PMACC_CUDA_ENABLED -# define PMACC_CUDA_ENABLED ALPAKA_ACC_GPU_CUDA_ENABLED +# define PMACC_CUDA_ENABLED ALPAKA_ACC_GPU_CUDA_ENABLED #endif -#if( PMACC_CUDA_ENABLED == 1 ) +#if(BOOST_LANG_CUDA || BOOST_COMP_HIP) /* include mallocMC before cupla renaming is activated, else we need the variable acc * to call atomic cuda functions */ -# include +# include #endif -#include - -#if( PMACC_CUDA_ENABLED == 1 ) -/** @todo please remove this workaround - * This workaround allows to use native CUDA on the CUDA device without - * passing the variable `acc` to each function. This is only needed during the - * porting phase to allow the full feature set of the plain PMacc and PIConGPU - * CUDA version if the accelerator is CUDA. - */ -# undef blockIdx -# undef __syncthreads -# undef threadIdx -# undef gridDim -# undef blockDim -# undef uint3 - -#endif +#include #include "pmacc/debug/PMaccVerbose.hpp" #include "pmacc/ppFunctions.hpp" @@ -81,8 +65,7 @@ namespace pmacc { + namespace bmpl = boost::mpl; + namespace bfs = boost::filesystem; -namespace bmpl = boost::mpl; -namespace bfs = boost::filesystem; - -} //namespace pmacc +} // namespace pmacc diff --git a/include/pmacc/verify.hpp b/include/pmacc/verify.hpp index d221aaf91b..c3f59e7e43 100644 --- a/include/pmacc/verify.hpp +++ b/include/pmacc/verify.hpp @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Rene Widera +/* Copyright 2016-2021 Rene Widera * * This file is part of PMacc. * @@ -30,8 +30,7 @@ * * @param expr expression to be evaluated */ -#define PMACC_VERIFY( expr ) \ - ( !!(expr) ) ? ( (void) 0 ) : pmacc::abortWithError( #expr, __FILE__, __LINE__ ) +#define PMACC_VERIFY(expr) (!!(expr)) ? ((void) 0) : pmacc::abortWithError(#expr, __FILE__, __LINE__) /** verify expression with message * @@ -41,5 +40,4 @@ * @param msg output message (of type `std::string`) which is printed if the * expression is evaluated to false */ -#define PMACC_VERIFY_MSG( expr, msg ) \ - ( !!(expr) ) ? ( (void) 0 ) : pmacc::abortWithError( #expr, __FILE__, __LINE__, msg ) +#define PMACC_VERIFY_MSG(expr, msg) (!!(expr)) ? ((void) 0) : pmacc::abortWithError(#expr, __FILE__, __LINE__, msg) diff --git a/lib/python/picongpu/input/parameters.py b/lib/python/picongpu/input/parameters.py index fd554c5d52..8dc174dd66 100755 --- a/lib/python/picongpu/input/parameters.py +++ b/lib/python/picongpu/input/parameters.py @@ -1,7 +1,7 @@ """ This file is part of PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke, Jeffrey Kelling License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/data/XrayScatteringData.py b/lib/python/picongpu/plugins/data/XrayScatteringData.py new file mode 100644 index 0000000000..087d2a4ae8 --- /dev/null +++ b/lib/python/picongpu/plugins/data/XrayScatteringData.py @@ -0,0 +1,101 @@ +""" +This file is part of the PIConGPU. + +Copyright 2017-2021 PIConGPU contributors +Authors: Pawel Ordyna +License: GPLv3+ +""" +from .base_reader import DataReader + +from os import path +import numpy as np +import openpmd_api as api + + +class XrayScatteringData(DataReader): + """ Data reader for the xrayScattering plugin. """ + + def __init__(self, run_directory, species, file_extension='bp', + file_name_base='Output'): + """ + Parameters + ---------- + run_directory : string + path to the run directory of PIConGPU + (the path before ``simOutput/``) + species : string + Species for which the plugin output should be loaded. It's the + string defined in `speciesDefinition.param`. + file_extension : string + file extension of the xrayScattering output file. + Default is "bp". + file_name_base : string + String name set in the xrayScattering command line parameter + fileName. Default is "Output". + The full file name is + + :: + `_xrayScattering.` + """ + + super().__init__(run_directory) + + self.full_file_name = (species + "_xrayScattering" + file_name_base + + "." + file_extension) + + self.full_path = path.join(self.run_directory, + "simOutput/xrayScatteringOutput") + self.full_path = path.join(self.full_path, self.full_file_name) + # openPMD series + self.series = api.Series(self.full_path, api.Access_Type.read_only) + self.total_simulation_cells = self.series.get_attribute( + "totalSimulationCells") + + def get_data_path(self, **kwargs): + """ + Returns + ------- + A string with the path to the underlying data file. + """ + return self.full_path + + def get_iterations(self, **kwargs): + """ + Returns + ------- + An array with unsigned integers of iterations for which + data is available. + """ + return np.array(list(self.series.iterations)) + + def _get_for_iteration(self, iteration, **kwargs): + """ Get the data for a given iteration in PIC units. + + Call `get_unit` method to get the conversion factor (to SI). + + Returns + ------- + The complex scattering amplitude in PIC units. + """ + + i = self.series.iterations[iteration] + amplitude = i.meshes['amplitude'] + mrc_real, mrc_imag = amplitude['x'], amplitude['y'] + real = mrc_real.load_chunk() + imag = mrc_imag.load_chunk() + self.series.flush() + if mrc_imag.dtype.type is np.float32: + dtype = np.complex64 + elif mrc_imag.dtype.type is np.float64: + dtype = np.complex128 + else: + raise TypeError + result = (real + 1j * imag) * self.total_simulation_cells + return result.astype(dtype) + + def get_unit(self): + """ Get the amplitude unit. """ + i = self.series.iterations[self.get_iterations()[0]] + amplitude = i.meshes['amplitude'] + mrc_real = amplitude['x'] + return mrc_real.unit_SI diff --git a/lib/python/picongpu/plugins/data/__init__.py b/lib/python/picongpu/plugins/data/__init__.py index 3c85f5f986..d9d5d006c4 100644 --- a/lib/python/picongpu/plugins/data/__init__.py +++ b/lib/python/picongpu/plugins/data/__init__.py @@ -5,6 +5,7 @@ from .sliceFieldReader import FieldSliceData from .emittance import EmittanceData from .transitionradiation import TransitionRadiationData +from .XrayScatteringData import XrayScatteringData __all__ = [ "EnergyHistogramData", @@ -14,4 +15,5 @@ "FieldSliceData", "EmittanceData", "TransitionRadiationData", + "XrayScatteringData" ] diff --git a/lib/python/picongpu/plugins/data/base_reader.py b/lib/python/picongpu/plugins/data/base_reader.py index 4d869e1189..39c8a19bf4 100644 --- a/lib/python/picongpu/plugins/data/base_reader.py +++ b/lib/python/picongpu/plugins/data/base_reader.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ @@ -38,7 +38,7 @@ def get_dt(self): """ return self.find_time.get_dt() - def get_times(self, **kwargs): + def get_times(self, *args, **kwargs): """ Returns ------- @@ -46,10 +46,10 @@ def get_times(self, **kwargs): data is available """ - iterations = np.array(self.get_iterations(**kwargs)) + iterations = np.array(self.get_iterations(*args, **kwargs)) return self.find_time.get_time(iterations) - def get_data_path(self, **kwargs): + def get_data_path(self, *args, **kwargs): """ Returns ------- @@ -57,7 +57,7 @@ def get_data_path(self, **kwargs): """ raise NotImplementedError - def get_iterations(self, **kwargs): + def get_iterations(self, *args, **kwargs): """ Returns ------- @@ -66,7 +66,7 @@ def get_iterations(self, **kwargs): """ raise NotImplementedError - def get(self, **kwargs): + def get(self, *args, **kwargs): """ Parameters ---------- @@ -74,6 +74,10 @@ def get(self, **kwargs): If both are given, the 'time' argument is converted to an iteration and data for the iteration matching the time is returned. + For other valid args and kwargs, please look at the + documentation of the '_get_for_iteration' methods + of the derived classes since the parameters are passed + on to that function. time: float or np.array of float or None. If None, data for all available times is returned. @@ -103,15 +107,15 @@ def get(self, **kwargs): time = kwargs.pop('time') if time is None: # use all times that are available, i.e. all iterations - iteration = self.get_iterations(**kwargs) + iteration = self.get_iterations(*args, **kwargs) else: iteration = self.find_time.get_iteration( time, method='closest') # print("got 'time'=", time, ", converted to iter", iteration) - return self._get_for_iteration(iteration, **kwargs) + return self._get_for_iteration(iteration, *args, **kwargs) - def _get_for_iteration(self, iteration, **kwargs): + def _get_for_iteration(self, iteration, *args, **kwargs): """ Get the data for a given iteration. diff --git a/lib/python/picongpu/plugins/data/emittance.py b/lib/python/picongpu/plugins/data/emittance.py index 830b556a74..2ab0778a42 100644 --- a/lib/python/picongpu/plugins/data/emittance.py +++ b/lib/python/picongpu/plugins/data/emittance.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sophie Rudat, Axel Huebl License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/data/energy_histogram.py b/lib/python/picongpu/plugins/data/energy_histogram.py index ac6e29baff..0ea0d23265 100644 --- a/lib/python/picongpu/plugins/data/energy_histogram.py +++ b/lib/python/picongpu/plugins/data/energy_histogram.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Axel Huebl License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/data/phase_space.py b/lib/python/picongpu/plugins/data/phase_space.py index b8f6334835..5bad6fcf15 100644 --- a/lib/python/picongpu/plugins/data/phase_space.py +++ b/lib/python/picongpu/plugins/data/phase_space.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Axel Huebl License: GPLv3+ """ @@ -10,9 +10,7 @@ import collections import numpy as np import os -import glob -import re -import h5py as h5 +import openpmd_api as io class PhaseSpaceMeta(object): @@ -71,10 +69,9 @@ def __init__(self, run_directory): super().__init__(run_directory) self.data_file_prefix = "PhaseSpace_{0}_{1}_{2}_{3}" - self.data_file_suffix = ".h5" self.data_hdf5_path = "/data/{0}/{1}" - def get_data_path(self, ps, species, species_filter="all", iteration=None): + def get_data_path(self, ps, species, species_filter="all", file_ext="h5"): """ Return the path to the underlying data file. @@ -89,19 +86,16 @@ def get_data_path(self, ps, species, species_filter="all", iteration=None): species_filter: string name of the particle species filter, default is 'all' (defined in ``particleFilters.param``) - iteration : (unsigned) int or list of int [unitless] - The iteration at which to read the data. - If 'None', a regular expression string matching - all iterations will be returned. + file_ext: string + filename extension for openPMD backend + default is 'h5' for the HDF5 backend Returns ------- - A string with a file path and a string with a in-file HDF5 path if - iteration is a single value or a list of length one. - If iteration is a list of length > 1, a list of paths is returned. - If iteration is None, only the first string is returned and contains a - regex-* for the position iteration. + A string with a the full openPMD file path pattern for loading from + a file-based iteration layout. """ + # @todo different file extensions? if species is None: raise ValueError('The species parameter can not be None!') if species_filter is None: @@ -123,45 +117,17 @@ def get_data_path(self, ps, species, species_filter="all", iteration=None): 'Did the simulation already run?' .format(self.run_directory)) - if iteration is not None: - if not isinstance(iteration, collections.Iterable): - iteration = [iteration] - - ret = [] - for it in iteration: - data_file_name = self.data_file_prefix.format( - species, - species_filter, - ps, - str(it)) + self.data_file_suffix - data_file_path = os.path.join(output_dir, data_file_name) - - if not os.path.isfile(data_file_path): - raise IOError('The file {} does not exist.\n' - 'Did the simulation already run?' - .format(data_file_path)) - - data_hdf5_name = self.data_hdf5_path.format( - it, - ps) - - ret.append((data_file_path, data_hdf5_name)) - if len(iteration) == 1: - return ret[0] - else: - return ret - else: - iteration_str = "*" - - data_file_name = self.data_file_prefix.format( - species, - species_filter, - ps, - iteration_str - ) + self.data_file_suffix - return os.path.join(output_dir, data_file_name) - - def get_iterations(self, ps, species, species_filter='all'): + iteration_str = "%T" + data_file_name = self.data_file_prefix.format( + species, + species_filter, + ps, + iteration_str + ) + '.' + file_ext + return os.path.join(output_dir, data_file_name) + + def get_iterations(self, ps, species, species_filter='all', + file_ext="h5"): """ Return an array of iterations with available data. @@ -176,32 +142,25 @@ def get_iterations(self, ps, species, species_filter='all'): species_filter: string name of the particle species filter, default is 'all' (defined in ``particleFilters.param``) + file_ext: string + filename extension for openPMD backend + default is 'h5' for the HDF5 backend Returns ------- An array with unsigned integers. """ # get the regular expression matching all available files - data_file_path = self.get_data_path(ps, species, species_filter) - - matching_files = glob.glob(data_file_path) - re_it = re.compile(data_file_path.replace("*", "([0-9]+)")) - - iterations = np.array( - sorted( - map( - lambda file_path: - np.uint64(re_it.match(file_path).group(1)), - matching_files - ) - ), - dtype=np.uint64 - ) + data_file_path = self.get_data_path(ps, species, species_filter, + file_ext=file_ext) + + series = io.Series(data_file_path, io.Access.read_only) + iterations = [key for key, _ in series.iterations.items()] return iterations def _get_for_iteration(self, iteration, ps, species, species_filter='all', - **kwargs): + file_ext="h5", **kwargs): """ Get a phase space histogram. @@ -219,6 +178,9 @@ def _get_for_iteration(self, iteration, ps, species, species_filter='all', species_filter: string name of the particle species filter, default is 'all' (defined in ``particleFilters.param``) + file_ext: string + filename extension for openPMD backend + default is 'h5' for the HDF5 backend Returns ------- @@ -231,8 +193,11 @@ def _get_for_iteration(self, iteration, ps, species, species_filter='all', containing ps and ps_meta for each requested iteration. If a single iteration is requested, return the tuple (ps, ps_meta). """ - available_iterations = self.get_iterations( - ps, species, species_filter) + + data_file_path = self.get_data_path(ps, species, species_filter, + file_ext=file_ext) + series = io.Series(data_file_path, io.Access.read_only) + available_iterations = [key for key, _ in series.iterations.items()] if iteration is not None: if not isinstance(iteration, collections.Iterable): @@ -247,28 +212,25 @@ def _get_for_iteration(self, iteration, ps, species, species_filter='all', iteration = available_iterations ret = [] - for it in iteration: - data_file_path, data_hdf5_name = self.get_data_path( - ps, - species, - species_filter, - it) - - f = h5.File(data_file_path, 'r') - ps_data = f[data_hdf5_name] + for index in iteration: + it = series.iterations[index] + dataset_name = "{}_{}_{}".format(species, species_filter, ps) + mesh = it.meshes[dataset_name] + ps_data = mesh[io.Mesh_Record_Component.SCALAR] # all in SI - dV = ps_data.attrs['dV'] * ps_data.attrs['dr_unit']**3 - unitSI = ps_data.attrs['sim_unit'] - p_range = ps_data.attrs['p_unit'] * \ - np.array([ps_data.attrs['p_min'], ps_data.attrs['p_max']]) - - mv_start = ps_data.attrs['movingWindowOffset'] - mv_end = mv_start + ps_data.attrs['movingWindowSize'] + dV = mesh.get_attribute('dV') * mesh.get_attribute('dr')**3 + unitSI = mesh.get_attribute('sim_unit') + p_range = mesh.get_attribute('p_unit') * \ + np.array( + [mesh.get_attribute('p_min'), mesh.get_attribute('p_max')]) + + mv_start = mesh.get_attribute('movingWindowOffset') + mv_end = mv_start + mesh.get_attribute('movingWindowSize') # 2D histogram: 0 (r_i); 1 (p_i) - spatial_offset = ps_data.attrs['_global_start'][1] + spatial_offset = mesh.get_attribute('_global_start')[0] - dr = ps_data.attrs['dr'] * ps_data.attrs['dr_unit'] + dr = mesh.get_attribute('dr') * mesh.get_attribute('dr_unit') r_range_cells = np.array([mv_start, mv_end]) + spatial_offset r_range = r_range_cells * dr @@ -278,7 +240,7 @@ def _get_for_iteration(self, iteration, ps, species, species_filter='all', # cut out the current window & scale by unitSI ps_cut = ps_data[mv_start:mv_end, :] * unitSI - f.close() + it.close() ps_meta = PhaseSpaceMeta( species, species_filter, ps, ps_cut.shape, extent, dV) diff --git a/lib/python/picongpu/plugins/data/png.py b/lib/python/picongpu/plugins/data/png.py index 79e41ef17f..f391f5e972 100644 --- a/lib/python/picongpu/plugins/data/png.py +++ b/lib/python/picongpu/plugins/data/png.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ @@ -9,8 +9,9 @@ import numpy as np import os -from scipy import misc import collections +from imageio import imread + SPECIES_LONG_NAMES = { 'e': 'Electrons' @@ -196,7 +197,7 @@ def _get_for_iteration(self, iteration, species, species_filter='all', # iteration is None, so we use all available data iteration = available_iterations - imgs = [misc.imread( + imgs = [imread( self.get_data_path(species, species_filter, axis, slice_point, it)) for it in iteration] diff --git a/lib/python/picongpu/plugins/data/radiation.py b/lib/python/picongpu/plugins/data/radiation.py index 988e300f62..639a7f1dbd 100644 --- a/lib/python/picongpu/plugins/data/radiation.py +++ b/lib/python/picongpu/plugins/data/radiation.py @@ -1,4 +1,4 @@ -# Copyright 2016-2020 Richard Pausch +# Copyright 2016-2021 Richard Pausch # # This file is part of PIConGPU. # diff --git a/lib/python/picongpu/plugins/data/requirements.txt b/lib/python/picongpu/plugins/data/requirements.txt index 515602f5a6..305e86a761 100644 --- a/lib/python/picongpu/plugins/data/requirements.txt +++ b/lib/python/picongpu/plugins/data/requirements.txt @@ -2,4 +2,5 @@ numpy pandas>=0.21.0 h5py pillow -scipy +imageio +openPMD-api>=0.10.3gt diff --git a/lib/python/picongpu/plugins/data/sliceFieldReader.py b/lib/python/picongpu/plugins/data/sliceFieldReader.py index 68b526f85b..da658a504d 100644 --- a/lib/python/picongpu/plugins/data/sliceFieldReader.py +++ b/lib/python/picongpu/plugins/data/sliceFieldReader.py @@ -1,4 +1,4 @@ -# Copyright 2014-2020 Richard Pausch, Klaus Steiniger +# Copyright 2014-2021 Richard Pausch, Klaus Steiniger # # This file is part of PIConGPU. # diff --git a/lib/python/picongpu/plugins/data/transitionradiation.py b/lib/python/picongpu/plugins/data/transitionradiation.py index b77b6b0c7f..db88d9e28d 100644 --- a/lib/python/picongpu/plugins/data/transitionradiation.py +++ b/lib/python/picongpu/plugins/data/transitionradiation.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Axel Huebl, Finn-Ole Carstens License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/jupyter_widgets/base_widget.py b/lib/python/picongpu/plugins/jupyter_widgets/base_widget.py index 903035db01..75c9fd2e0e 100644 --- a/lib/python/picongpu/plugins/jupyter_widgets/base_widget.py +++ b/lib/python/picongpu/plugins/jupyter_widgets/base_widget.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ @@ -166,6 +166,14 @@ def _show_run_dir_options_in_dropdown(self): self._handle_run_dir_selection_callback, names='value') # set the UI self.sim_drop.options = sim_options + # don't select a value yet but leave it to the user. + # this needs to be handled differently + # for single and multi selection + if isinstance(self.sim_drop, widgets.Dropdown): + self.sim_drop.value = None + else: + # we assume widgets.SelectMultiple instance here + self.sim_drop.value = () # re-enable the callback functions self.sim_drop.observe( self._handle_run_dir_selection_callback, names='value') @@ -187,6 +195,20 @@ def set_run_dir_options(self, run_dir_options): # set the options in the dropdown self._show_run_dir_options_in_dropdown() + # clear the ax (this is done by the current plot_mpl instance) + self._clean_ax() + + # create a fresh plot_mpl object since the old + # one had some run directories which are outdated now + plot_mpl_class = type(self.plot_mpl) + self.plot_mpl = plot_mpl_class( + run_directories=None, + ax=self.ax) + + # the user has not yet chosen any simulation + # so we have no option about which times are available + self.sim_time_slider.options = ('',) + def _init_fig_and_ax(self, fig, **kwargs): """ Creates the figure and the ax as members. @@ -381,10 +403,6 @@ def visualize(self, **kwargs): if time is None or time == "": return - # print("{} called visualize for time {} and run_dirs {}".format( - # type(self), time, - # [reader.run_directory for reader in self.plot_mpl.data_reader])) - vis_params = self._get_widget_args() try: self.plot_mpl.visualize(time=time, @@ -396,13 +414,16 @@ def visualize(self, **kwargs): # since interactive mode should be turned off, we have # to update the figure explicitely try: - self.fig.canvas.draw() - self.fig.canvas.flush_events() + self.update_plot() except ValueError as e: warn("{}: drawing the plot failed! Reason: {}".format( type(self), e)) # raise e + def update_plot(self): + self.fig.canvas.draw() + self.fig.canvas.flush_events() + def _make_drop_val_compatible(self, val): """ Depending on the type of self.sim_drop we have to @@ -482,3 +503,5 @@ def _use_options_from_other(self, other): @capture_output def _clean_ax(self): self.plot_mpl._clean_ax() + # refresh the figure since we are not in interactive mode + self.update_plot() diff --git a/lib/python/picongpu/plugins/jupyter_widgets/energy_histogram_widget.py b/lib/python/picongpu/plugins/jupyter_widgets/energy_histogram_widget.py index 73e3581e84..2749733666 100644 --- a/lib/python/picongpu/plugins/jupyter_widgets/energy_histogram_widget.py +++ b/lib/python/picongpu/plugins/jupyter_widgets/energy_histogram_widget.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/jupyter_widgets/phase_space_widget.py b/lib/python/picongpu/plugins/jupyter_widgets/phase_space_widget.py index 457406fd29..e8081acbfd 100644 --- a/lib/python/picongpu/plugins/jupyter_widgets/phase_space_widget.py +++ b/lib/python/picongpu/plugins/jupyter_widgets/phase_space_widget.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/jupyter_widgets/png_widget.py b/lib/python/picongpu/plugins/jupyter_widgets/png_widget.py index 79ec875114..9d8f0787ac 100644 --- a/lib/python/picongpu/plugins/jupyter_widgets/png_widget.py +++ b/lib/python/picongpu/plugins/jupyter_widgets/png_widget.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/jupyter_widgets/utils.py b/lib/python/picongpu/plugins/jupyter_widgets/utils.py index df2a6ade42..2137b61aa5 100644 --- a/lib/python/picongpu/plugins/jupyter_widgets/utils.py +++ b/lib/python/picongpu/plugins/jupyter_widgets/utils.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/plot_mpl/base_visualizer.py b/lib/python/picongpu/plugins/plot_mpl/base_visualizer.py index 184e5392aa..70efd494e4 100644 --- a/lib/python/picongpu/plugins/plot_mpl/base_visualizer.py +++ b/lib/python/picongpu/plugins/plot_mpl/base_visualizer.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/plot_mpl/emittance_evolution_visualizer.py b/lib/python/picongpu/plugins/plot_mpl/emittance_evolution_visualizer.py index 5ba8cc7227..f96eacf29a 100644 --- a/lib/python/picongpu/plugins/plot_mpl/emittance_evolution_visualizer.py +++ b/lib/python/picongpu/plugins/plot_mpl/emittance_evolution_visualizer.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sophie Rudat, Sebastian Starke License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/plot_mpl/energy_histogram_visualizer.py b/lib/python/picongpu/plugins/plot_mpl/energy_histogram_visualizer.py index 80a3941493..7c105e1270 100644 --- a/lib/python/picongpu/plugins/plot_mpl/energy_histogram_visualizer.py +++ b/lib/python/picongpu/plugins/plot_mpl/energy_histogram_visualizer.py @@ -1,14 +1,16 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ +import numpy as np from picongpu.plugins.data import EnergyHistogramData from picongpu.plugins.plot_mpl.base_visualizer import Visualizer as\ BaseVisualizer +from warnings import warn class Visualizer(BaseVisualizer): @@ -44,6 +46,12 @@ def _create_plt_obj(self, idx): counts, bins, iteration, dt = self.data[idx] label = self.sim_labels[idx] + + if np.all(counts == 0.): + warn("All counts were 0 for {}. ".format(label) + + "No log-plot can be created!") + return + self.plt_obj[idx] = self.ax.semilogy( bins, counts, nonposy='clip', label=label, color=self.colors[idx])[0] @@ -53,6 +61,13 @@ def _update_plt_obj(self, idx): Implementation of base class function. """ counts, bins, iteration, dt = self.data[idx] + label = self.sim_labels[idx] + + if np.all(counts == 0.): + warn("All counts were 0 for {}. ".format(label) + + "Log-plot will not be updated!") + return + self.plt_obj[idx].set_data(bins, counts) def visualize(self, **kwargs): diff --git a/lib/python/picongpu/plugins/plot_mpl/energy_waterfall_visualizer.py b/lib/python/picongpu/plugins/plot_mpl/energy_waterfall_visualizer.py index 51ccbf1076..9627bb6ab7 100644 --- a/lib/python/picongpu/plugins/plot_mpl/energy_waterfall_visualizer.py +++ b/lib/python/picongpu/plugins/plot_mpl/energy_waterfall_visualizer.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sophie Rudat, Sebastian Starke License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/plot_mpl/phase_space_visualizer.py b/lib/python/picongpu/plugins/plot_mpl/phase_space_visualizer.py index 344d209d1c..4c5cc163c1 100644 --- a/lib/python/picongpu/plugins/plot_mpl/phase_space_visualizer.py +++ b/lib/python/picongpu/plugins/plot_mpl/phase_space_visualizer.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ @@ -191,6 +191,9 @@ def visualize(self, **kwargs): ps : string phase space selection in order: spatial, momentum component, e.g. 'ypy' or 'ypx' + file_ext: string + filename extension for openPMD backend + default is 'h5' for the HDF5 backend """ super().visualize(**kwargs) diff --git a/lib/python/picongpu/plugins/plot_mpl/png_visualizer.py b/lib/python/picongpu/plugins/plot_mpl/png_visualizer.py index a410970f18..bae668838d 100644 --- a/lib/python/picongpu/plugins/plot_mpl/png_visualizer.py +++ b/lib/python/picongpu/plugins/plot_mpl/png_visualizer.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/plot_mpl/slice_emittance_visualizer.py b/lib/python/picongpu/plugins/plot_mpl/slice_emittance_visualizer.py index 6cf06586f6..635109bf16 100644 --- a/lib/python/picongpu/plugins/plot_mpl/slice_emittance_visualizer.py +++ b/lib/python/picongpu/plugins/plot_mpl/slice_emittance_visualizer.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sophie Rudat, Sebastian Starke License: GPLv3+ """ diff --git a/lib/python/picongpu/plugins/plot_mpl/slice_emittance_waterfall_visualizer.py b/lib/python/picongpu/plugins/plot_mpl/slice_emittance_waterfall_visualizer.py index 911f491320..84d58f48ee 100644 --- a/lib/python/picongpu/plugins/plot_mpl/slice_emittance_waterfall_visualizer.py +++ b/lib/python/picongpu/plugins/plot_mpl/slice_emittance_waterfall_visualizer.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sophie Rudat, Sebastian Starke License: GPLv3+ """ diff --git a/lib/python/picongpu/utils/field_ionization.py b/lib/python/picongpu/utils/field_ionization.py index 54c9d2a286..e966c194d8 100755 --- a/lib/python/picongpu/utils/field_ionization.py +++ b/lib/python/picongpu/utils/field_ionization.py @@ -1,7 +1,7 @@ """Field ionization models implemented in PIConGPU. This file is part of the PIConGPU. -Copyright 2019-2020 PIConGPU contributors +Copyright 2019-2021 PIConGPU contributors Authors: Marco Garten License: GPLv3+ """ diff --git a/lib/python/picongpu/utils/find_time.py b/lib/python/picongpu/utils/find_time.py index 042ffa8b24..6a46d58059 100644 --- a/lib/python/picongpu/utils/find_time.py +++ b/lib/python/picongpu/utils/find_time.py @@ -1,7 +1,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Axel Huebl License: GPLv3+ """ diff --git a/lib/python/picongpu/utils/memory_calculator.py b/lib/python/picongpu/utils/memory_calculator.py index 3e97e4f554..e647d7b6c5 100644 --- a/lib/python/picongpu/utils/memory_calculator.py +++ b/lib/python/picongpu/utils/memory_calculator.py @@ -6,7 +6,7 @@ It is supposed to give an estimate for the memory requirement of a PIConGPU simulation per device. -Copyright 2018-2020 PIConGPU contributors +Copyright 2018-2021 PIConGPU contributors Authors: Marco Garten, Sergei Bastrakov License: GPLv3+ """ diff --git a/lib/python/picongpu/utils/param_parser.py b/lib/python/picongpu/utils/param_parser.py index f0d972e371..77dbb70bc0 100644 --- a/lib/python/picongpu/utils/param_parser.py +++ b/lib/python/picongpu/utils/param_parser.py @@ -2,7 +2,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke License: GPLv3+ """ diff --git a/libraryDependencies.png b/libraryDependencies.png index 2f04308fc6..7904d86e71 100644 Binary files a/libraryDependencies.png and b/libraryDependencies.png differ diff --git a/share/ci/bash.profile b/share/ci/bash.profile new file mode 100755 index 0000000000..80443b74b5 --- /dev/null +++ b/share/ci/bash.profile @@ -0,0 +1,50 @@ +#!/bin/bash + +# setup dependencies for PIConGPU for CMake and runtime usage + +set -e +set -o pipefail + +if [ -d "/opt/pngwriter" ] ; then + export PNGWRITER_ROOT=/opt/pngwriter/0.7.0 +else + # pngwriter is currently install to the / instead of /opt + export PNGWRITER_ROOT=/pngwriter/0.7.0 +fi +export CMAKE_PREFIX_PATH=$PNGWRITER_ROOT:$CMAKE_PREFIX_PATH +export LD_LIBRARY_PATH=$PNGWRITER_ROOT/lib:$LD_LIBRARY_PATH + +export HDF5_ROOT=/opt/hdf5/1.8.20/ +export LD_LIBRARY_PATH=$HDF5_ROOT/lib:$LD_LIBRARY_PATH + +export SPLASH_ROOT=/opt/libsplash/1.7.0 +export CMAKE_PREFIX_PATH=$SPLASH_ROOT:$CMAKE_PREFIX_PATH +export LD_LIBRARY_PATH=$SPLASH_ROOT/lib:$LD_LIBRARY_PATH + +export ADIOS1_ROOT=/opt/adios/1.13.1 +export CMAKE_PREFIX_PATH=$ADIOS1_ROOT:$CMAKE_PREFIX_PATH +export PATH=$ADIOS1_ROOT/bin:$PATH +export LD_LIBRARY_PATH=$ADIOS1_ROOT/lib:$LD_LIBRARY_PATH + +export ADIOS2_ROOT=/opt/adios/2.6.0 +export CMAKE_PREFIX_PATH=$ADIOS2_ROOT:$CMAKE_PREFIX_PATH +export PATH=$ADIOS2_ROOT/bin:$PATH +export LD_LIBRARY_PATH=$ADIOS2_ROOT/lib:$LD_LIBRARY_PATH + +if [ -z "$DISABLE_ISAAC" ] ; then + export ICET_ROOT=/opt/icet/2.9.0 + export CMAKE_PREFIX_PATH=$ICET_ROOT/lib:$CMAKE_PREFIX_PATH + export LD_LIBRARY_PATH=$ICET_ROOT/lib:$LD_LIBRARY_PATH + + export JANSSON_ROOT=/opt/jansson/2.9.0/ + export CMAKE_PREFIX_PATH=$JANSSON_ROOT/lib/cmake:$CMAKE_PREFIX_PATH + export LD_LIBRARY_PATH=$JANSSON_ROOT/lib:$LD_LIBRARY_PATH + + export ISAAC_ROOT=/opt/isaac/1.6.0-dev + export CMAKE_PREFIX_PATH=$ISAAC_ROOT:$CMAKE_PREFIX_PATH + export LD_LIBRARY_PATH=$ISAAC_ROOT/lib:$LD_LIBRARY_PATH +fi + +export OPENPMD_ROOT=/opt/openPMD-api/0.12.0-dev +export CMAKE_PREFIX_PATH=$OPENPMD_ROOT:$CMAKE_PREFIX_PATH +export LD_LIBRARY_PATH=$OPENPMD_ROOT/lib:$LD_LIBRARY_PATH diff --git a/share/ci/check_cpp_code_style.sh b/share/ci/check_cpp_code_style.sh new file mode 100755 index 0000000000..d86e802172 --- /dev/null +++ b/share/ci/check_cpp_code_style.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +set -e +set -o pipefail + +cd $CI_PROJECT_DIR + +# check code style with clang format +find include/ share/picongpu/ share/pmacc -iname "*.def" \ + -o -iname "*.h" -o -iname "*.cpp" -o -iname "*.cu" \ + -o -iname "*.hpp" -o -iname "*.tpp" -o -iname "*.kernel" \ + -o -iname "*.loader" -o -iname "*.param" -o -iname "*.unitless" \ + | xargs clang-format-11 --dry-run --Werror + +############################################################################# +# Conformance with Alpaka: Do not write __global__ CUDA kernels directly # +############################################################################# +test/hasCudaGlobalKeyword include/pmacc +test/hasCudaGlobalKeyword share/pmacc/examples +test/hasCudaGlobalKeyword include/picongpu +test/hasCudaGlobalKeyword share/picongpu/examples + +############################################################################# +# Disallow end-of-line (EOL) white spaces # +############################################################################# +test/hasEOLwhiteSpace + +############################################################################# +# Disallow TABs, use white spaces # +############################################################################# +test/hasTabs + +############################################################################# +# Disallow non-ASCII in source files and scripts # +############################################################################# +test/hasNonASCII + +############################################################################# +# Disallow spaces before pre-compiler macros # +############################################################################# +test/hasSpaceBeforePrecompiler + +############################################################################# +# Enforce angle brackets <...> for includes of external library files # +############################################################################# +test/hasExtLibIncludeBrackets include boost +test/hasExtLibIncludeBrackets include alpaka +test/hasExtLibIncludeBrackets include cupla +test/hasExtLibIncludeBrackets include splash +test/hasExtLibIncludeBrackets include mallocMC +test/hasExtLibIncludeBrackets include/picongpu pmacc +test/hasExtLibIncludeBrackets share/picongpu/examples pmacc +test/hasExtLibIncludeBrackets share/picongpu/examples boost +test/hasExtLibIncludeBrackets share/picongpu/examples alpaka +test/hasExtLibIncludeBrackets share/picongpu/examples cupla +test/hasExtLibIncludeBrackets share/picongpu/examples splash +test/hasExtLibIncludeBrackets share/picongpu/examples mallocMC +test/hasExtLibIncludeBrackets share/pmacc/examples pmacc diff --git a/share/ci/compiler_clang.yml b/share/ci/compiler_clang.yml new file mode 100644 index 0000000000..da3754a0b1 --- /dev/null +++ b/share/ci/compiler_clang.yml @@ -0,0 +1,17 @@ +################################################################################ +# [clang++-X] : X = {4.0, 5.0, 6.0, 7, 8, 9, 10, 11} + +.base_clang: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-clang-pic:1.2 + variables: + GIT_SUBMODULE_STRATEGY: normal + script: + - apt update + - apt install -y curl libjpeg-dev + - $CI_PROJECT_DIR/share/ci/git_merge.sh + - $CI_PROJECT_DIR/share/ci/bash.profile + - $CI_PROJECT_DIR/share/ci/run_pmacc_tests.sh + - $CI_PROJECT_DIR/share/ci/run_picongpu_tests.sh + # x86_64 tag is used to get a multi-core CPU for the tests + tags: + - x86_64 diff --git a/share/ci/compiler_clang_cuda.yml b/share/ci/compiler_clang_cuda.yml new file mode 100644 index 0000000000..58caf4477d --- /dev/null +++ b/share/ci/compiler_clang_cuda.yml @@ -0,0 +1,30 @@ +################################################################################ +# [clang++-X] : X = {4.0, 5.0, 6.0, 7, 8, 9, 10, 11} +# cuda9.2Clang is not supporting clang-7 + +.base_cuda_clang: + variables: + GIT_SUBMODULE_STRATEGY: normal + PIC_CMAKE_ARGS: "-DALPAKA_CUDA_COMPILER=clang" + script: + - apt update + - apt install -y curl libjpeg-dev + - $CI_PROJECT_DIR/share/ci/git_merge.sh + - $CI_PROJECT_DIR/share/ci/bash.profile + - $CI_PROJECT_DIR/share/ci/run_pmacc_tests.sh + - $CI_PROJECT_DIR/share/ci/run_picongpu_tests.sh + tags: + - cuda + - x86_64 + +.base_clangCuda_cuda_9.2: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-cuda92-clangpic:1.2 + extends: .base_cuda_clang + +.base_clangCuda_cuda_10.0: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-cuda100-clangpic:1.2 + extends: .base_cuda_clang + +.base_clangCuda_cuda_10.1: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-cuda101-clangpic:1.2 + extends: .base_cuda_clang diff --git a/share/ci/compiler_gcc.yml b/share/ci/compiler_gcc.yml new file mode 100644 index 0000000000..ad24aa7b8e --- /dev/null +++ b/share/ci/compiler_gcc.yml @@ -0,0 +1,17 @@ +################################################################################ +# [g++-X] : X = {5, 6, 7, 8, 9 ,10} + +.base_gcc: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-gcc-pic:1.2 + variables: + GIT_SUBMODULE_STRATEGY: normal + script: + - apt update + - apt install -y curl libjpeg-dev + - $CI_PROJECT_DIR/share/ci/git_merge.sh + - $CI_PROJECT_DIR/share/ci/bash.profile + - $CI_PROJECT_DIR/share/ci/run_pmacc_tests.sh + - $CI_PROJECT_DIR/share/ci/run_picongpu_tests.sh + # x86_64 tag is used to get a multi-core CPU for the tests + tags: + - x86_64 diff --git a/share/ci/compiler_hipcc.yml b/share/ci/compiler_hipcc.yml new file mode 100644 index 0000000000..ba720aa4d3 --- /dev/null +++ b/share/ci/compiler_hipcc.yml @@ -0,0 +1,28 @@ +################################################################################ +# [clang-X] : X = {12} +# clang compiler is located under /opt/rocm/llvm/bin + +.base_hipcc: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-rocm4.0-pic:1.2 + variables: + GIT_SUBMODULE_STRATEGY: normal + PIC_CMAKE_ARGS: "-DALPAKA_HIP_ARCH=900 -DCMAKE_MODULE_PATH=/opt/rocm/hip/cmake" + # use VEGA64 GPU + HIP_VISIBLE_DEVICES: "2" + # ISAAC is not working with HIP + DISABLE_ISAAC: "yes" + script: + - export PATH="$PATH:/opt/rocm/llvm/bin/" + # rocm 4.0 container is missing a binary/symlink named `clang++-12` + - ln -s /opt/rocm/llvm/bin/clang++ /opt/rocm/llvm/bin/clang++-12 + - rocm-smi + - hipcc --version + - apt update + - apt install -y curl libjpeg-dev + - $CI_PROJECT_DIR/share/ci/git_merge.sh + - $CI_PROJECT_DIR/share/ci/bash.profile + - $CI_PROJECT_DIR/share/ci/run_pmacc_tests.sh + - $CI_PROJECT_DIR/share/ci/run_picongpu_tests.sh + tags: + - amd + - rocm diff --git a/share/ci/compiler_nvcc_cuda.yml b/share/ci/compiler_nvcc_cuda.yml new file mode 100644 index 0000000000..9af605df63 --- /dev/null +++ b/share/ci/compiler_nvcc_cuda.yml @@ -0,0 +1,47 @@ +################################################################################ +# [g++-X] : X = {5, 6, 7, 8, 9, 10} + +.base_nvcc: + variables: + GIT_SUBMODULE_STRATEGY: normal + before_script: + - nvidia-smi + - nvcc --version + script: + - apt update + - apt install -y curl libjpeg-dev + - $CI_PROJECT_DIR/share/ci/git_merge.sh + - $CI_PROJECT_DIR/share/ci/bash.profile + - $CI_PROJECT_DIR/share/ci/run_pmacc_tests.sh + - $CI_PROJECT_DIR/share/ci/run_picongpu_tests.sh + tags: + - cuda + - x86_64 + +.base_nvcc_cuda_9.2: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-cuda92-gccpic:1.2 + extends: .base_nvcc + +.base_nvcc_cuda_10.0: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-cuda100-gccpic:1.2 + extends: .base_nvcc + +.base_nvcc_cuda_10.1: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-cuda101-gccpic:1.2 + extends: .base_nvcc + +.base_nvcc_cuda_10.2: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-cuda102-gccpic:1.2 + extends: .base_nvcc + +.base_nvcc_cuda_11.0: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-cuda110-gccpic:1.2 + extends: .base_nvcc + +.base_nvcc_cuda_11.1: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-cuda111-gccpic:1.2 + extends: .base_nvcc + +.base_nvcc_cuda_11.2: + image: registry.gitlab.com/hzdr/crp/alpaka-group-container/alpaka-ci-cuda112-gccpic:1.2 + extends: .base_nvcc diff --git a/share/ci/generate_reduced_matrix.sh b/share/ci/generate_reduced_matrix.sh new file mode 100755 index 0000000000..b43a7b8e4e --- /dev/null +++ b/share/ci/generate_reduced_matrix.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +set -e +set -o pipefail + +# generate a reduced matrix with ci jobs based on the list (space separated) provided by the environment variable PIC_INPUTS + +export PATH=$CI_PROJECT_DIR/share/ci:$PATH +export picongpu_DIR=$CI_PROJECT_DIR + +cd $picongpu_DIR/share/picongpu/ + +echo "include:" +echo " - local: '/share/ci/compiler_clang.yml'" +echo " - local: '/share/ci/compiler_gcc.yml'" +echo " - local: '/share/ci/compiler_nvcc_cuda.yml'" +echo " - local: '/share/ci/compiler_clang_cuda.yml'" +echo " - local: '/share/ci/compiler_hipcc.yml'" +echo "" + +# handle CI actions +has_label=$($CI_PROJECT_DIR/share/ci/pr_has_label.sh "CI:no-compile" && echo "0" || echo "1") +if [ "$has_label" == "0" ] ; then + echo "skip-compile:" + echo " script:" + echo " - echo \"CI action - 'CI:no-compile' -> skip compile/runtime tests\"" + exit 0 +fi + +folders=() +for CASE in ${PIC_INPUTS}; do + if [ "$CASE" == "examples" ] || [ "$CASE" == "tests" ] || [ "$CASE" == "benchmarks" ] ; then + all_cases=$(find ${CASE}/* -maxdepth 0 -type d) + else + all_cases=$(find $CASE -maxdepth 0 -type d) + fi + for test_case_folder in $all_cases ; do + folders+=($test_case_folder) + done +done + +echo "${folders[@]}" | tr " " "\n" | n_wise_generator.py $@ diff --git a/share/ci/git_merge.sh b/share/ci/git_merge.sh new file mode 100755 index 0000000000..09fc014725 --- /dev/null +++ b/share/ci/git_merge.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +set -e +set -o pipefail + +# merge the PR to the latest version of the destination branch + +cd $CI_PROJECT_DIR + +is_pr=$(echo "$CI_BUILD_REF_NAME" | grep -q "^pr-" && echo 0 || echo 1) +# merge only pull requests +if [ $is_pr -eq 0 ] ; then + github_group_repo="ComputationalRadiationPhysics/picongpu" + + pr_id=$(echo "$CI_BUILD_REF_NAME" | cut -d"/" -f1 | cut -d"-" -f2) + # used a token without any rights from psychocoderHPC to avoid API query limitations + curl_data=$(curl -u psychocoderHPC:$GITHUB_TOKEN -X GET https://api.github.com/repos/${github_group_repo}/pulls/${pr_id} 2>/dev/null) + echo "--- curl data ---" + echo "$curl_data" + echo "-----------------" + # get the destination branch + destination_branch=$(echo "$curl_data" | python3 -c 'import json,sys;obj=json.loads(sys.stdin.read());print(obj["base"]["ref"])') + destination_sha=$(echo "$curl_data" | python3 -c 'import json,sys;obj=json.loads(sys.stdin.read());print(obj["base"]["sha"])') + echo "destination_branch=${destination_branch}" + echo "destination_sha=${destination_sha}" + + mainline_exists=$(git remote -v | cut -f1 | grep mainline -q && echo 0 || echo 1) + # avoid adding the remote repository twice if gitlab already cached this operation + if [ $mainline_exists -ne 0 ] ; then + git remote add mainline https://github.com/${github_group_repo}.git + else + # if the PR was set to a different branch before + git remote set-url mainline https://github.com/${github_group_repo}.git + fi + git fetch mainline + + # required by git to be able to use `git rebase` + git config --global user.email "CI-BOT" + git config --global user.name "CI-BOT@hzdr.d" + + # make a copy of the pull request branch + git checkout -b pr_to_merge + # switch to the destination hash + git checkout -b destination_branch ${destination_sha} + # merge pull request to the destination + git merge --no-edit pr_to_merge +fi diff --git a/share/ci/n_wise_generator.py b/share/ci/n_wise_generator.py new file mode 100755 index 0000000000..07a7be119f --- /dev/null +++ b/share/ci/n_wise_generator.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 + +# generate a reduced test matrix based on the N-wise testing model +# https://en.wikipedia.org/wiki/All-pairs_testing + +from allpairspy import AllPairs +import argparse +import sys + + +parser = argparse.ArgumentParser(description='Generate tesing pairs') +parser.add_argument('-n', dest='n_pairs', default=1, action="store", + help='number of tuple elements') +parser.add_argument('--compact', dest='compact', action="store_true", + help='print compact form of the test matrix') +args = parser.parse_args() +n_pairs = int(args.n_pairs) + +examples = [] +for i in sys.stdin: + examples.append(i.rstrip()) + + +def get_version(tuple): + if len(tuple) >= 2: + return float(tuple[1]) + return 0 + + +# lookup table with compiler name and the required base container suffix +image_dict = { + "g++": ".base_gcc", + "g++_nvcc": ".base_nvcc", + "clang++_nvcc": ".base_clangCuda", + "clang++": ".base_clang", + "clang++_clangCuda": ".base_clangCuda", + "clang++_hipcc": ".base_hipcc" +} + + +def get_base_image(compiler, backend): + lookup_name = compiler[0] + if len(compiler) == 3: + lookup_name += "_" + compiler[2] + img_name = image_dict[lookup_name] + if backend[0] == "cuda": + img_name += "_" + backend[0] + "_" + str(backend[1]) + + return img_name + + +# filter invalid cominations +# +# filter based on the compatibility overview +# https://gist.github.com/ax3l/9489132 +def is_valid_combination(row): + n = len(row) + + if n >= 2: + v_compiler = get_version(row[0]) + + is_clang_cuda = True if len(row[0]) == 3 and \ + row[0][2] == "clangCuda" else False + is_clang = True if row[0][0] == "clang++" or is_clang_cuda else False + + is_gnu = True if row[0][0] == "g++" else False + + is_nvcc = True if len(row[0]) == 3 and row[0][2] == "nvcc" else False + is_cuda = True if row[1][0] == "cuda" else False + v_cuda = get_version(row[1]) + + # hipcc + is_hipcc = True if len(row[0]) == 3 and row[0][2] == "hipcc" else False + is_hip = True if row[1][0] == "hip" else False + + # CI nvcc image is not shipped with clang++ + # clang_cuda images can currently not be used because + # the base image is setting -DALPAKA_CUDA_COMPILER=clang + if is_nvcc and is_clang: + return False + + # hipcc is only valid in one combination + if is_hip and is_hipcc and is_clang and v_compiler == 12: + return True + elif is_hip or is_hipcc: + return False + + # clang 12 is currently only shipped with the HIP container + if is_clang and v_compiler == 12: + return False + + # docker images for clang cuda do not support clang++-7 + # together with cuda-9.2 + if is_clang_cuda and v_compiler == 7 and v_cuda == 9.2: + return False + + # CUDA compiler requires backed `cuda` + if (is_nvcc or is_clang_cuda) and not is_cuda: + return False + + # cpu only compiler can not handle the backend `cuda` + if (not is_nvcc and not is_clang_cuda) and is_cuda: + return False + + # clang cuda compatibility + if is_clang_cuda: + if not is_cuda: + return False + if v_cuda == 9.2 and v_compiler >= 7: + return True + if v_cuda == 10.0 and v_compiler >= 8: + return True + if v_cuda == 10.1 and v_compiler >= 9: + return True + + return False + + # nvcc compatibility + if is_cuda and is_nvcc: + if is_gnu: + # g++-5.5 is not compatible with CUDA + # https://github.com/tensorflow/tensorflow/issues/10220 + if v_compiler == 5: + return False + if v_cuda <= 10.1 and v_compiler <= 7: + return True + if v_cuda == 10.2 and v_compiler <= 8: + return True + if v_cuda == 11.0 and v_compiler <= 9: + return True + if v_cuda >= 11.1 and v_compiler <= 10: + return True + + if is_clang: + if v_cuda == 9.2 and v_compiler <= 5: + return True + if 10.0 <= v_cuda and v_cuda <= 10.2 and v_compiler <= 8: + return True + if v_cuda == 11.0 and v_compiler <= 9: + return True + if v_cuda >= 11.1 and v_compiler <= 10: + return True + + return False + + return True + + +# compiler list +# tuple with two components (compiler name, version) +clang_compiers = [("clang++", 5.0), ("clang++", 6.0), ("clang++", 7), + ("clang++", 8), ("clang++", 9), ("clang++", 10), + ("clang++", 11), ("clang++", 12)] +gnu_compilers = [("g++", 5), ("g++", 6), ("g++", 7), ("g++", 8), + ("g++", 9), ("g++", 10)] +compilers = [ + clang_compiers, + gnu_compilers +] + +# generate clang cuda compiler list +# add third component with the device compiler name +cuda_clang_compilers = [] +for i in clang_compiers: + cuda_clang_compilers.append(i + ("clangCuda", )) +compilers.append(cuda_clang_compilers) + +# nvcc compiler +cuda_nvcc_compilers = [] +for i in clang_compiers: + cuda_nvcc_compilers.append(i + ("nvcc", )) +for i in gnu_compilers: + cuda_nvcc_compilers.append(i + ("nvcc", )) +compilers.append(cuda_nvcc_compilers) + +# hipcc compiler +hip_clang_compilers = [] +for i in clang_compiers: + hip_clang_compilers.append(i + ("hipcc", )) +compilers.append(hip_clang_compilers) + +# PIConGPU backend list +# tuple with two components (backend name, version) +# version is only required for the cuda backend +backends = [("cuda", 9.2), + ("cuda", 10.0), ("cuda", 10.1), ("cuda", 10.2), + ("cuda", 11.0), ("cuda", 11.1), ("cuda", 11.2), + ("omp2b", ), ("serial", ), + ("hip", )] +boost_libs = ["1.65.1", "1.66.0", "1.67.0", "1.68.0", "1.69.0", + "1.70.0", "1.71.0", "1.72.0", "1.73.0", "1.74.0"] + +rounds = 1 +# activate looping over the compiler categories to minimize the test matrix +# a small test matrix for each compiler e.g. clang, nvcc, g++, clang, +# clangCuda is created +if n_pairs == 1: + rounds = len(compilers) + +for i in range(rounds): + used_compilers = [] + if n_pairs == 1: + used_compilers = compilers[i] + else: + for c in compilers: + used_compilers += c + + parameters = [ + used_compilers, + backends, + boost_libs, + examples + ] + + for i, pairs in enumerate( + AllPairs(parameters, + filter_func=is_valid_combination, n=n_pairs)): + if args.compact: + print("{:2d}: {}".format(i, pairs)) + else: + compiler = pairs[0][0] + "-" + str(pairs[0][1]) + backend = pairs[1][0] + boost_version = pairs[2] + folder = pairs[3] + v_cuda = get_version(pairs[1]) + v_cuda_str = "" if v_cuda == 0 else str(v_cuda) + job_name = compiler + "_" + backend + v_cuda_str + "_boost" + \ + boost_version + "_" + folder.replace("/", ".") + print(job_name + ":") + print(" variables:") + print(" PIC_TEST_CASE_FOLDER: '" + folder + "'") + print(" PIC_BACKEND: '" + backend + "'") + print(" BOOST_VERSION: '" + boost_version + "'") + print(" CXX_VERSION: '" + compiler + "'") + print(" before_script:") + print(" - apt-get update -qq") + print(" - apt-get install -y -qq libopenmpi-dev " + "openmpi-bin openssh-server") + print(" extends: " + get_base_image(pairs[0], pairs[1])) + print("") diff --git a/share/ci/pr_has_label.sh b/share/ci/pr_has_label.sh new file mode 100755 index 0000000000..fb63e07200 --- /dev/null +++ b/share/ci/pr_has_label.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -e +set -o pipefail + +github_group_repo="ComputationalRadiationPhysics/picongpu" + +pr_id=$(echo "$CI_BUILD_REF_NAME" | cut -d"/" -f1 | cut -d"-" -f2) +# used a token without any rights from psychocoderHPC to avoid API query limitations +curl_data=$(curl -u psychocoderHPC:$GITHUB_TOKEN -X GET https://api.github.com/repos/${github_group_repo}/pulls/${pr_id} 2>/dev/null) +# get the destination branch +all_labels=$(echo "$curl_data" | python3 -c 'import json,sys;obj=json.loads(sys.stdin.read());x = obj["labels"];labels = list(i["name"] for i in x); print(labels)') +echo "search for label: '$1'" >&2 +echo "labels: '${all_labels}'" >&2 +label_found=$(echo "$all_labels" | grep -q "$1" && echo 0 || echo 1) + +exit $label_found diff --git a/share/ci/run_picongpu_tests.sh b/share/ci/run_picongpu_tests.sh new file mode 100755 index 0000000000..49634567fb --- /dev/null +++ b/share/ci/run_picongpu_tests.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +set -e +set -o pipefail + +# the default build type is Release +# if neccesary, you can rerun the pipeline with another build type-> https://docs.gitlab.com/ee/ci/pipelines.html#manually-executing-pipelines +# to change the build type, you must set the environment variable PIC_BUILD_TYPE +if [[ ! -v PIC_BUILD_TYPE ]] ; then + PIC_BUILD_TYPE=Release ; +fi + +################################################### +# cmake config builder +################################################### + +PIC_CONST_ARGS="" +# to save compile time reduce the isaac functor chain length to one +PIC_CONST_ARGS="${PIC_CONST_ARGS} -DISAAC_MAX_FUNCTORS=1 -DCMAKE_BUILD_TYPE=${PIC_BUILD_TYPE}" +CMAKE_ARGS="${PIC_CONST_ARGS} ${PIC_CMAKE_ARGS} -DCMAKE_CXX_COMPILER=${CXX_VERSION} -DBOOST_ROOT=/opt/boost/${BOOST_VERSION}" + +# workaround for clang cuda +# HDF5 from the apt sources is pulling -D_FORTIFY_SOURCE=2 into the compile flags +# this workaround is creating a warning about the double definition of _FORTIFY_SOURCE +# +# Workaround will be removed after the test container are shipped with a self compiled HDF5 +if [[ $CXX_VERSION =~ ^clang && $PIC_BACKEND =~ ^cuda ]] ; then + CMAKE_ARGS="$CMAKE_ARGS -DCMAKE_CXX_FLAGS=-D_FORTIFY_SOURCE=0" +fi + +################################################### +# build an run tests +################################################### + +# use one build directory for all build configurations +cd $HOME +mkdir buildCI +cd buildCI + +export picongpu_DIR=$CI_PROJECT_DIR +export PATH=$picongpu_DIR/bin:$PATH + +# adjust number of parallel builds to avoid out of memory errors +# PIC_BUILD_REQUIRED_MEM_BYTES is a configured variable in the CI web interface +PIC_PARALLEL_BUILDS=$(($CI_RAM_BYTES_TOTAL/$PIC_BUILD_REQUIRED_MEM_BYTES)) + +# limit to number of available cores +if [ $PIC_PARALLEL_BUILDS -gt $CI_CPUS ] ; then + PIC_PARALLEL_BUILDS=$CI_CPUS +fi + +# CI_MAX_PARALLELISM is a configured variable in the CI web interface +if [ $PIC_PARALLEL_BUILDS -gt $CI_MAX_PARALLELISM ] ; then + PIC_PARALLEL_BUILDS=$CI_MAX_PARALLELISM +fi +echo -e "\033[0;32m///////////////////////////////////////////////////" +echo "PIC_BUILD_REQUIRED_MEM_BYTES-> ${PIC_BUILD_REQUIRED_MEM_BYTES}" +echo "CI_RAM_BYTES_TOTAL -> ${CI_RAM_BYTES_TOTAL}" +echo "CI_CPUS -> ${CI_CPUS}" +echo "CI_MAX_PARALLELISM -> ${CI_MAX_PARALLELISM}" +echo "number of processor threads -> $(nproc)" +echo "number of parallel builds -> $PIC_PARALLEL_BUILDS" +echo "cmake version -> $(cmake --version | head -n 1)" +echo "build directory -> $(pwd)" +echo "CMAKE_ARGS -> ${CMAKE_ARGS}" +echo "accelerator -> ${PIC_BACKEND}" +echo "input set -> ${PIC_TEST_CASE_FOLDER}" +echo -e "/////////////////////////////////////////////////// \033[0m \n\n" + +if [ "$PIC_TEST_CASE_FOLDER" == "examples/" ] || [ "$PIC_TEST_CASE_FOLDER" == "tests/" ] || [ "$PIC_TEST_CASE_FOLDER" == "benchmarks/" ] ; then + extended_compile_options="-l" +fi + +# test compiling +error_code=$(pic-compile -q -c"$CMAKE_ARGS" $extended_compile_options -j $PIC_PARALLEL_BUILDS ${picongpu_DIR}/share/picongpu/$PIC_TEST_CASE_FOLDER . 2>&1 > pic_compile.log && echo "0" || echo "1") +cat pic_compile.log +for test_case in $(ls -w1 ./build) ; do + if [ -f "build/$test_case/returnCode" ] ; then + returnCode=$(cat "build/$test_case/returnCode") + if [ "$returnCode" != "0" ] ; then + echo -e "\033[0;31m compile FAILED - $test_case \033[0m" + cat "build/$test_case/compile.log" + else + echo -e "\033[0;32m compile PASSED - $test_case \033[0m" + fi + else + echo -e "\033[0;33m compile NOT tested - $test_case \033[0m" + fi +done +if [ "$error_code" != "0" ] ; then + exit 1 +fi +# runtime test (call --help) +for test_case_folder in $(ls params/*/* -d -w1) ; do + export LD_LIBRARY_PATH=/opt/boost/${BOOST_VERSION}/lib:$LD_LIBRARY_PATH + echo -e "\033[0;33m runtime test- $(basename $test_case_folder) \033[0m" + ${test_case_folder}/bin/picongpu --help +done diff --git a/share/ci/run_pmacc_tests.sh b/share/ci/run_pmacc_tests.sh new file mode 100755 index 0000000000..a08b1acddd --- /dev/null +++ b/share/ci/run_pmacc_tests.sh @@ -0,0 +1,111 @@ +#!/bin/bash + +set -e +set -o pipefail + +# the default build type is Release +# if neccesary, you can rerun the pipeline with another build type-> https://docs.gitlab.com/ee/ci/pipelines.html#manually-executing-pipelines +# to change the build type, you must set the environment variable PMACC_BUILD_TYPE +if [[ ! -v PMACC_BUILD_TYPE ]] ; then + PMACC_BUILD_TYPE=Release; +fi + +################################################### +# cmake config builder +################################################### + +PMACC_CONST_ARGS="" +# to save compile time reduce the isaac functor chain length to one +PMACC_CONST_ARGS="${PMACC_CONST_ARGS} -DCMAKE_BUILD_TYPE=${PMACC_BUILD_TYPE}" +CMAKE_ARGS="${PMACC_CONST_ARGS} ${PIC_CMAKE_ARGS} -DCMAKE_CXX_COMPILER=${CXX_VERSION} -DBOOST_ROOT=/opt/boost/${BOOST_VERSION}" +# allow root user to execute MPI +CMAKE_ARGS="$CMAKE_ARGS -DUSE_MPI_AS_ROOT_USER=ON" + +################################################### +# translate PIConGPU backend names into CMake Flags +################################################### + +get_backend_flags() +{ + backend_cfg=(${1//:/ }) + num_options="${#backend_cfg[@]}" + if [ $num_options -gt 2 ] ; then + echo "-b|--backend must be contain 'backend:arch' or 'backend'" >&2 + exit 1 + fi + if [ "${backend_cfg[0]}" == "cuda" ] ; then + result+=" -DALPAKA_ACC_GPU_CUDA_ENABLE=ON -DALPAKA_ACC_GPU_CUDA_ONLY_MODE=ON" + if [ $num_options -eq 2 ] ; then + result+=" -DALPAKA_CUDA_ARCH=\"${backend_cfg[1]}\"" + fi + elif [ "${backend_cfg[0]}" == "omp2b" ] ; then + result+=" -DALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE=ON" + if [ $num_options -eq 2 ] ; then + result+=" -DPMACC_CPU_ARCH=\"${backend_cfg[1]}\"" + fi + elif [ "${backend_cfg[0]}" == "serial" ] ; then + result+=" -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE=ON" + if [ $num_options -eq 2 ] ; then + result+=" -DPMACC_CPU_ARCH=\"${backend_cfg[1]}\"" + fi + elif [ "${backend_cfg[0]}" == "tbb" ] ; then + result+=" -DALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE=ON" + if [ $num_options -eq 2 ] ; then + result+=" -DPMACC_CPU_ARCH=\"${backend_cfg[1]}\"" + fi + elif [ "${backend_cfg[0]}" == "threads" ] ; then + result+=" -DALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE=ON" + if [ $num_options -eq 2 ] ; then + result+=" -DPMACC_CPU_ARCH=\"${backend_cfg[1]}\"" + fi + elif [ "${backend_cfg[0]}" == "hip" ] ; then + result+=" -DALPAKA_ACC_GPU_HIP_ENABLE=ON -DALPAKA_ACC_GPU_HIP_ONLY_MODE=ON" + if [ $num_options -eq 2 ] ; then + result+=" -DPMACC_CPU_ARCH=\"${backend_cfg[1]}\"" + fi + else + echo "unsupported backend given '$1'" >&2 + exit 1 + fi + + echo "$result" + exit 0 +} + +################################################### +# build an run tests +################################################### + +# use one build directory for all build configurations +cd $HOME +mkdir buildPMaccCI +cd buildPMaccCI + +export code_DIR=$CI_PROJECT_DIR + +PMACC_PARALLEL_BUILDS=$(nproc) +# limit to $CI_MAX_PARALLELISM parallel builds to avoid out of memory errors +# CI_MAX_PARALLELISM is a configured variable in the CI web interface +if [ $PMACC_PARALLEL_BUILDS -gt $CI_MAX_PARALLELISM ] ; then + PMACC_PARALLEL_BUILDS=$CI_MAX_PARALLELISM +fi +alpaka_backend=$(get_backend_flags ${PIC_BACKEND}) +CMAKE_ARGS="$CMAKE_ARGS $alpaka_backend" + +echo -e "\033[0;32m///////////////////////////////////////////////////" +echo "number of processor threads -> $(nproc)" +echo "number of parallel builds -> $PMACC_PARALLEL_BUILDS" +echo "cmake version -> $(cmake --version | head -n 1)" +echo "build directory -> $(pwd)" +echo "CMAKE_ARGS -> ${CMAKE_ARGS}" +echo "accelerator -> ${PIC_BACKEND}" +echo -e "/////////////////////////////////////////////////// \033[0m \n\n" + +# disable warning if infiniband is not used +export OMPI_MCA_btl_base_warn_component_unused=0 +export LD_LIBRARY_PATH=/opt/boost/${BOOST_VERSION}/lib:$LD_LIBRARY_PATH + +cmake $CMAKE_ARGS $code_DIR/include/pmacc +make + +ctest -V diff --git a/share/paraview/hypnos.pvsc b/share/paraview/hypnos.pvsc deleted file mode 100644 index b1352fa9e0..0000000000 --- a/share/paraview/hypnos.pvsc +++ /dev/null @@ -1,98 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - diff --git a/share/picongpu/benchmarks/SPEC/etc/picongpu/1.cfg b/share/picongpu/benchmarks/SPEC/etc/picongpu/1.cfg new file mode 100644 index 0000000000..96d50f11f9 --- /dev/null +++ b/share/picongpu/benchmarks/SPEC/etc/picongpu/1.cfg @@ -0,0 +1,72 @@ +# Copyright 2013-2021 Rene Widera, Axel Huebl +# +# This file is part of PIConGPU. +# +# PIConGPU is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PIConGPU is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PIConGPU. +# If not, see . +# + +## +## This configuration file is used by PIConGPU's TBG tool to create a +## batch script for PIConGPU runs. For a detailed description of PIConGPU +## configuration files including all available variables, see +## +## docs/TBG_macros.cfg +## + + +################################# +## Section: Required Variables ## +################################# + +TBG_wallTime="02:00:00" + +TBG_devices_x=1 +TBG_devices_y=1 +TBG_devices_z=1 + +TBG_gridSize="128 128 128" +TBG_steps="1000" + +TBG_periodic="--periodic 1 1 1" + + +################################# +## Section: Optional Variables ## +################################# + +TBG_plugins=" --p_macroParticlesCount.period 100 \ + --e_macroParticlesCount.period 100 \ + --fields_energy.period 100 \ + --e_energy.period 100 --e_energy.filter all \ + --p_energy.period 100 --p_energy.filter all" + + +################################# +## Section: Program Parameters ## +################################# + +TBG_deviceDist="!TBG_devices_x !TBG_devices_y !TBG_devices_z" + +TBG_programParams="-d !TBG_deviceDist \ + -g !TBG_gridSize \ + -s !TBG_steps \ + !TBG_periodic \ + !TBG_plugins \ + --versionOnce" + +# TOTAL number of devices +TBG_tasks="$(( TBG_devices_x * TBG_devices_y * TBG_devices_z ))" + +"$TBG_cfgPath"/submitAction.sh diff --git a/share/picongpu/benchmarks/SPEC/include/picongpu/param/density.param b/share/picongpu/benchmarks/SPEC/include/picongpu/param/density.param new file mode 100644 index 0000000000..5ab7ed52c3 --- /dev/null +++ b/share/picongpu/benchmarks/SPEC/include/picongpu/param/density.param @@ -0,0 +1,46 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, + * Richard Pausch + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/particles/densityProfiles/profiles.def" + + +namespace picongpu +{ + namespace SI + { + /** Base density in particles per m^3 in the density profiles. + * + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. + * + * unit: ELEMENTS/m^3 + */ + constexpr float_64 BASE_DENSITY_SI = 1.e25; + } // namespace SI + + namespace densityProfiles + { + /* definition of homogenous density profile */ + using Homogenous = HomogenousImpl; + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/benchmarks/SPEC/include/picongpu/param/fileOutput.param b/share/picongpu/benchmarks/SPEC/include/picongpu/param/fileOutput.param new file mode 100644 index 0000000000..3b81ce69e2 --- /dev/null +++ b/share/picongpu/benchmarks/SPEC/include/picongpu/param/fileOutput.param @@ -0,0 +1,50 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, + * Benjamin Worpitz, Richard Pausch + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include + +/* some forward declarations we need */ +#include "picongpu/fields/Fields.def" +#include "picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def" + +#include + + +namespace picongpu +{ + /** FieldTmpSolvers groups all solvers that create data for FieldTmp ****** + * + * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size + */ + using FieldTmpSolvers = MakeSeq_t<>; + + /** FileOutputFields: Groups all Fields that shall be dumped *************/ + using FileOutputFields = MakeSeq_t<>; + + /** FileOutputParticles: Groups all Species that shall be dumped ********** + * + * hint: to disable particle output set to + * using FileOutputParticles = MakeSeq_t< >; + */ + using FileOutputParticles = MakeSeq_t<>; + +} // namespace picongpu diff --git a/share/picongpu/benchmarks/SPEC/include/picongpu/param/grid.param b/share/picongpu/benchmarks/SPEC/include/picongpu/param/grid.param new file mode 100644 index 0000000000..df502296f5 --- /dev/null +++ b/share/picongpu/benchmarks/SPEC/include/picongpu/param/grid.param @@ -0,0 +1,81 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + +namespace picongpu +{ + namespace SI + { + /** Duration of one timestep + * unit: seconds */ + constexpr float_64 DELTA_T_SI = 3.0e-17; + + /** equals X + * unit: meter */ + constexpr float_64 CELL_WIDTH_SI = 1.8e-8; + /** equals Y + * unit: meter */ + constexpr float_64 CELL_HEIGHT_SI = 1.8e-8; + /** equals Z + * unit: meter */ + constexpr float_64 CELL_DEPTH_SI = 1.8e-8; + + /** Note on units in reduced dimensions + * + * In 2D3V simulations, the CELL_DEPTH_SI (Z) cell length + * is still used for normalization of densities, etc. + * + * A 2D3V simulation in a cartesian PIC simulation such as + * ours only changes the degrees of freedom in motion for + * (macro) particles and all (field) information in z + * travels instantaneous, making the 2D3V simulation + * behave like the interaction of infinite "wire particles" + * in fields with perfect symmetry in Z. + */ + } // namespace SI + + //! Defines the size of the absorbing zone (in cells) + constexpr uint32_t ABSORBER_CELLS[3][2] = { + {0, 0}, /*x direction [negative,positive]*/ + {0, 0}, /*y direction [negative,positive]*/ + {0, 0} /*z direction [negative,positive]*/ + }; // unit: number of cells + + //! Define the strength of the absorber for any direction + constexpr float_X ABSORBER_STRENGTH[3][2] = { + {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ + {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; // unit: none + + /** When to move the co-moving window. + * An initial pseudo particle, flying with the speed of light, + * is fired at the begin of the simulation. + * When it reaches movePoint % of the absolute(*) simulation area, + * the co-moving window starts to move with the speed of light. + * + * (*) Note: beware, that there is one "hidden" row of gpus at the y-front, + * when you use the co-moving window + * 0.75 means only 75% of simulation area is used for real simulation + */ + constexpr float_64 movePoint = 0.90; + +} // namespace picongpu diff --git a/share/picongpu/benchmarks/SPEC/include/picongpu/param/isaac.param b/share/picongpu/benchmarks/SPEC/include/picongpu/param/isaac.param new file mode 100644 index 0000000000..135d13420b --- /dev/null +++ b/share/picongpu/benchmarks/SPEC/include/picongpu/param/isaac.param @@ -0,0 +1,64 @@ +/* Copyright 2016-2021 Alexander Matthes + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Definition which native fields and density fields of particles will be + * visualizable with ISAAC. ISAAC is an in-situ visualization library with which + * the PIC simulation can be observed while it is running avoiding the time + * consuming writing and reading of simulation data for the classical post + * processing of data. + * + * ISAAC can directly visualize natives fields like the E or B field, but + * density fields of particles need to be calculated from PIConGPU on the fly + * which slightly increases the runtime and the memory consumption. Every + * particle density field will reduce the amount of memory left for PIConGPUs + * particles and fields. + * + * To get best performance, ISAAC defines an exponential amount of different + * visualization kernels for every combination of (at runtime) activated + * fields. So furthermore a lot of fields will increase the compilation time. + * + */ + +#pragma once + +namespace picongpu +{ + namespace isaacP + { + /** Intermediate list of native particle species of PIConGPU which shall be + * visualized. */ + using Particle_Seq = MakeSeq_t<>; + + /** Intermediate list of native fields of PIConGPU which shall be + * visualized. */ + using Native_Seq = MakeSeq_t<>; + + /** Intermediate list of particle species, from which density fields + * shall be created at runtime to visualize them. */ + using Density_Seq = MakeSeq_t<>; + + /** Compile time sequence of all fields which shall be visualized. Basically + * the join of Native_Seq and Density_Seq. */ + using Fields_Seq = MakeSeq_t<>; + + + } // namespace isaacP +} // namespace picongpu diff --git a/share/picongpu/benchmarks/SPEC/include/picongpu/param/memory.param b/share/picongpu/benchmarks/SPEC/include/picongpu/param/memory.param new file mode 100644 index 0000000000..17fdfd2aa9 --- /dev/null +++ b/share/picongpu/benchmarks/SPEC/include/picongpu/param/memory.param @@ -0,0 +1,115 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Define low-level memory settings for compute devices. + * + * Settings for memory layout for supercells and particle frame-lists, + * data exchanges in multi-device domain-decomposition and reserved + * fields for temporarily derived quantities are defined here. + */ + +#pragma once + +#include +#include + +namespace picongpu +{ + /* We have to hold back 350MiB for gpu-internal operations: + * - random number generator + * - reduces + * - ... + */ + constexpr size_t reservedGpuMemorySize = 400 * 1024 * 1024; + + /* short namespace*/ + namespace mCT = pmacc::math::CT; + /** size of a superCell + * + * volume of a superCell must be <= 1024 + */ + using SuperCellSize = typename mCT::shrinkTo, simDim>::type; + + /** define the object for mapping superCells to cells*/ + using MappingDesc = MappingDescription; + + /** define the size of the core, border and guard area + * + * PIConGPU uses spatial domain-decomposition for parallelization + * over multiple devices with non-shared memory architecture. + * The global spatial domain is organized per device in three + * sections: the GUARD area contains copies of neighboring + * devices (also known as "halo"/"ghost"). + * The BORDER area is the outermost layer of cells of a device, + * equally to what neighboring devices see as GUARD area. + * The CORE area is the innermost area of a device. In union with + * the BORDER area it defines the "active" spatial domain on a device. + * + * GuardSize is defined in units of SuperCellSize per dimension. + */ + using GuardSize = typename mCT::shrinkTo, simDim>::type; + + /** bytes reserved for species exchange buffer + * + * This is the default configuration for species exchanges buffer sizes. + * The default exchange buffer sizes can be changed per species by adding + * the alias exchangeMemCfg with similar members like in DefaultExchangeMemCfg + * to its flag list. + */ + struct DefaultExchangeMemCfg + { + // memory used for a direction + static constexpr uint32_t BYTES_EXCHANGE_X = 1 * 1024 * 1024; // 4 MiB + static constexpr uint32_t BYTES_EXCHANGE_Y = 1 * 1024 * 1024; // 1 MiB + static constexpr uint32_t BYTES_EXCHANGE_Z = 6 * 1024 * 1024; // 6 MiB + static constexpr uint32_t BYTES_EDGES = 512 * 1024; // 512 kiB + static constexpr uint32_t BYTES_CORNER = 256 * 1024; // 256 kiB + + /** Reference local domain size + * + * The size of the local domain for which the exchange sizes `BYTES_*` are configured for. + * The required size of each exchange will be calculated at runtime based on the local domain size and the + * reference size. The exchange size will be scaled only up and not down. Zero means that there is no reference + * domain size, exchanges will not be scaled. + */ + using REF_LOCAL_DOM_SIZE = mCT::Int<128, 128, 128>; + /** Scaling rate per direction. + * + * 1.0 means it scales linear with the ratio between the local domain size at runtime and the reference local + * domain size. + */ + const std::array DIR_SCALING_FACTOR = {0.5, 0.5, 1.0}; + }; + + /** number of scalar fields that are reserved as temporary fields */ + constexpr uint32_t fieldTmpNumSlots = 1; + + /** can `FieldTmp` gather neighbor information + * + * If `true` it is possible to call the method `asyncCommunicationGather()` + * to copy data from the border of neighboring GPU into the local guard. + * This is also known as building up a "ghost" or "halo" region in domain + * decomposition and only necessary for specific algorithms that extend + * the basic PIC cycle, e.g. with dependence on derived density or energy fields. + */ + constexpr bool fieldTmpSupportGatherCommunication = false; + +} // namespace picongpu diff --git a/share/picongpu/benchmarks/SPEC/include/picongpu/param/particle.param b/share/picongpu/benchmarks/SPEC/include/picongpu/param/particle.param new file mode 100644 index 0000000000..d778bd4dc7 --- /dev/null +++ b/share/picongpu/benchmarks/SPEC/include/picongpu/param/particle.param @@ -0,0 +1,101 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, + * Richard Pausch + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/particles/startPosition/functors.def" +#include "picongpu/particles/manipulators/manipulators.def" + +#include +#include + + +namespace picongpu +{ + namespace particles + { + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted + * unit: none + */ + constexpr float_X MIN_WEIGHTING = 1.0; + + namespace manipulators + { + CONST_VECTOR(float_X, 3, DriftParamElectrons_direction, 0.0, 0.0, 1.0); + struct DriftParamElectrons + { + /** Initial particle drift velocity + * unit: none + */ + static constexpr float_64 gamma = 5.0; + const DriftParamElectrons_direction_t direction; + }; + using AssignZDriftElectrons = unary::Drift; + + CONST_VECTOR(float_X, 3, DriftParamPositrons_direction, 0.0, 0.0, -1.0); + struct DriftParamPositrons + { + /** Initial particle drift velocity + * unit: none + */ + static constexpr float_64 gamma = 5.0; + const DriftParamPositrons_direction_t direction; + }; + // definition of SetDrift start + using AssignZDriftPositrons = unary::Drift; + + } // namespace manipulators + + namespace startPosition + { + struct QuietParamElectrons + { + /** Count of particles per cell per direction at initial state + * unit: none + */ + using numParticlesPerDimension = mCT::shrinkTo, simDim>::type; + }; + + // definition of quiet particle start + using QuietElectrons = QuietImpl; + + struct QuietParamPositrons + { + /** Count of particles per cell per direction at initial state + * unit: none + */ + using numParticlesPerDimension = mCT::shrinkTo, simDim>::type; + }; + + // definition of quiet particle start + using QuietPositrons = QuietImpl; + + } // namespace startPosition + + /** During unit normalization, we assume this is a typical + * number of particles per cell for normalization of weighted + * particle attributes. + */ + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL + = mCT::volume::type::value; + + } // namespace particles +} // namespace picongpu diff --git a/share/picongpu/benchmarks/SPEC/include/picongpu/param/species.param b/share/picongpu/benchmarks/SPEC/include/picongpu/param/species.param new file mode 100644 index 0000000000..f6b68bad57 --- /dev/null +++ b/share/picongpu/benchmarks/SPEC/include/picongpu/param/species.param @@ -0,0 +1,106 @@ +/* Copyright 2014-2021 Rene Widera, Richard Pausch, Annegret Roeszler, Klaus Steiniger + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Particle shape, field to particle interpolation, current solver, and particle pusher + * can be declared here for usage in `speciesDefinition.param`. + * + * @see + * **MODELS / Hierarchy of Charge Assignment Schemes** + * in the online documentation for information on particle shapes. + * + * + * \attention + * The higher order shape names are redefined with release 0.6.0 in order to provide a consistent naming: + * * PQS is the name of the 3rd order assignment function (instead of PCS) + * * PCS is the name of the 4th order assignment function (instead of P4S) + * * P4S does not exist anymore + */ + +#pragma once + +#include "picongpu/particles/shapes.hpp" +#include "picongpu/algorithms/FieldToParticleInterpolationNative.hpp" +#include "picongpu/algorithms/FieldToParticleInterpolation.hpp" +#include "picongpu/algorithms/AssignedTrilinearInterpolation.hpp" +#include "picongpu/particles/flylite/NonLTE.def" +#include "picongpu/fields/currentDeposition/Solver.def" + + +namespace picongpu +{ + /** select macroparticle shape + * + * **WARNING** the shape names are redefined and diverge from PIConGPU versions before 0.6.0. + * + * - particles::shapes::CIC : Assignment function is a piecewise linear spline + * - particles::shapes::TSC : Assignment function is a piecewise quadratic spline + * - particles::shapes::PQS : Assignment function is a piecewise cubic spline + * - particles::shapes::PCS : Assignment function is a piecewise quartic spline + */ + using UsedParticleShape = particles::shapes::PQS; + + /** select interpolation method to be used for interpolation of grid-based field values to particle positions + */ + using UsedField2Particle = FieldToParticleInterpolation; + + /*! select current solver method + * - currentSolver::Esirkepov< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * - currentSolver::VillaBune< SHAPE, STRATEGY > : particle shapes - CIC (1st order) only + * - currentSolver::EmZ< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * + * For development purposes: + * - currentSolver::EsirkepovNative< SHAPE, STRATEGY > : generic version of currentSolverEsirkepov + * without optimization (~4x slower and needs more shared memory) + * + * STRATEGY (optional): + * - currentSolver::strategy::StridedCachedSupercells + * - currentSolver::strategy::StridedCachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::CachedSupercells + * - currentSolver::strategy::CachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::NonCachedSupercells + * - currentSolver::strategy::NonCachedSupercellsScaled with N >= 1 + */ + using UsedParticleCurrentSolver = currentSolver::EmZ; + + /** particle pusher configuration + * + * Defining a pusher is optional for particles + * + * - particles::pusher::HigueraCary : Higuera & Cary's relativistic pusher preserving both volume and ExB velocity + * - particles::pusher::Vay : Vay's relativistic pusher preserving ExB velocity + * - particles::pusher::Boris : Boris' relativistic pusher preserving volume + * - particles::pusher::ReducedLandauLifshitz : 4th order RungeKutta pusher + * with classical radiation reaction + * - particles::pusher::Composite : composite of two given pushers, + * switches between using one (or none) of those + * + * For diagnostics & modeling: ------------------------------------------------ + * - particles::pusher::Acceleration : Accelerate particles by applying a constant electric field + * - particles::pusher::Free : free propagation, ignore fields + * (= free stream model) + * - particles::pusher::Photon : propagate with c in direction of normalized mom. + * - particles::pusher::Probe : Probe particles that interpolate E & B + * For development purposes: -------------------------------------------------- + * - particles::pusher::Axel : a pusher developed at HZDR during 2011 (testing) + */ + using UsedParticlePusher = particles::pusher::Boris; + +} // namespace picongpu diff --git a/share/picongpu/benchmarks/SPEC/include/picongpu/param/speciesDefinition.param b/share/picongpu/benchmarks/SPEC/include/picongpu/param/speciesDefinition.param new file mode 100644 index 0000000000..ff84524b3d --- /dev/null +++ b/share/picongpu/benchmarks/SPEC/include/picongpu/param/speciesDefinition.param @@ -0,0 +1,86 @@ +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/particles/Particles.hpp" + +#include +#include +#include +#include +#include + + +namespace picongpu +{ + /*########################### define particle attributes #####################*/ + + /** describe attributes of a particle */ + using DefaultParticleAttributes = MakeSeq_t, momentum, weighting>; + + /*########################### end particle attributes ########################*/ + + /*########################### define species #################################*/ + + + /*--------------------------- electrons --------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + + using ParticleFlagsElectrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio>; + + /* define species electrons */ + using PIC_Electrons = Particles; + + /*--------------------------- positrons -------------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioPositrons, 1.0); + value_identifier(float_X, ChargeRatioPositrons, -1.0); + + /* ratio relative to BASE_DENSITY */ + value_identifier(float_X, DensityRatioPositrons, 1.0); + + using ParticleFlagsPositrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio, + densityRatio>; + + /*define specie ions*/ + using PIC_Positrons = Particles; + + /*########################### end species ####################################*/ + + using VectorAllSpecies = MakeSeq_t; + +} // namespace picongpu diff --git a/share/picongpu/benchmarks/SPEC/include/picongpu/param/speciesInitialization.param b/share/picongpu/benchmarks/SPEC/include/picongpu/param/speciesInitialization.param new file mode 100644 index 0000000000..7763ca1214 --- /dev/null +++ b/share/picongpu/benchmarks/SPEC/include/picongpu/param/speciesInitialization.param @@ -0,0 +1,49 @@ +/* Copyright 2015-2021 Rene Widera, Axel Huebl + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Initialize particles inside particle species. This is the final step in + * setting up particles (defined in `speciesDefinition.param`) via density + * profiles (defined in `density.param`). One can then further derive particles + * from one species to another and manipulate attributes with "manipulators" + * and "filters" (defined in `particle.param` and `particleFilters.param`). + */ + +#pragma once + +#include "picongpu/particles/InitFunctors.hpp" + + +namespace picongpu +{ + namespace particles + { + /** InitPipeline define in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< + CreateDensity, + CreateDensity, + Manipulate, + Manipulate>; + + } // namespace particles +} // namespace picongpu diff --git a/share/picongpu/dockerfiles/README.rst b/share/picongpu/dockerfiles/README.rst index 7d2a93047d..128c294ebc 100644 --- a/share/picongpu/dockerfiles/README.rst +++ b/share/picongpu/dockerfiles/README.rst @@ -25,7 +25,7 @@ This exposes the ISAAC port to connect via the webclient to. .. code:: bash docker pull ax3l/picongpu - docker run --runtime=nvidia -p 2459:2459 -t ax3l/picongpu:0.5.0 lwfa_live + docker run --runtime=nvidia -p 2459:2459 -t ax3l/picongpu:0.6.0-dev lwfa_live # open firefox and isaac client or @@ -56,12 +56,12 @@ You can also push the result to dockerhub and singularity-hub (you need an accou cd ubuntu-1604 # docker image - docker build -t ax3l/picongpu:0.5.0 + docker build -t ax3l/picongpu:0.6.0-dev . # optional: push to dockerhub (needed for singularity bootstrap) docker login - docker push ax3l/picongpu:0.5.0 + docker push ax3l/picongpu:0.6.0-dev # optional: mark as latest release - docker tag ax3l/picongpu:0.5.0 ax3l/picongpu:latest + docker tag ax3l/picongpu:0.6.0-dev ax3l/picongpu:latest docker push ax3l/picongpu:latest # singularity image @@ -69,7 +69,7 @@ You can also push the result to dockerhub and singularity-hub (you need an accou sudo singularity bootstrap picongpu.img Singularity # optional: push to a singularity registry # setup your $HOME/.sregistry first - sregistry push picongpu.img --name ax3l/picongpu --tag 0.5.0 + sregistry push picongpu.img --name ax3l/picongpu --tag 0.6.0-dev Recipes ------- diff --git a/share/picongpu/dockerfiles/ubuntu-1604/Singularity b/share/picongpu/dockerfiles/ubuntu-1604/Singularity deleted file mode 100644 index 3578c8a996..0000000000 --- a/share/picongpu/dockerfiles/ubuntu-1604/Singularity +++ /dev/null @@ -1,11 +0,0 @@ -Bootstrap: docker -From: ax3l/picongpu:0.5.0 - - -%labels -Maintainer "Axel Huebl , Rene Widera " -Version 0.5.0 - - -%runscript -exec /bin/bash -l diff --git a/share/picongpu/dockerfiles/ubuntu-1604/compilers.yaml b/share/picongpu/dockerfiles/ubuntu-1604/compilers.yaml deleted file mode 100644 index 696f5b9a99..0000000000 --- a/share/picongpu/dockerfiles/ubuntu-1604/compilers.yaml +++ /dev/null @@ -1,14 +0,0 @@ -compilers: -- compiler: - environment: {} - extra_rpaths: [] - flags: {} - modules: [] - operating_system: ubuntu16.04 - paths: - cc: /usr/bin/gcc-5 - cxx: /usr/bin/g++-5 - f77: /usr/bin/gfortran-5 - fc: /usr/bin/gfortran-5 - spec: gcc@5.4.0 - target: x86_64 diff --git a/share/picongpu/dockerfiles/ubuntu-1604/packages.yaml b/share/picongpu/dockerfiles/ubuntu-1604/packages.yaml deleted file mode 100644 index c7e994ba12..0000000000 --- a/share/picongpu/dockerfiles/ubuntu-1604/packages.yaml +++ /dev/null @@ -1,25 +0,0 @@ -packages: - cuda: - paths: - cuda@9.2.148%gcc@5.4.0 arch=linux-ubuntu16-x86_64: /usr/local/cuda - buildable: False - pkg-config: - paths: - pkg-config@0.29.1%gcc@5.4.0 arch=linux-ubuntu16-x86_64: /usr - buildable: False - python: - paths: - python@2.7.12%gcc@5.4.0 arch=linux-ubuntu16-x86_64: /usr - buildable: False - openmpi: - version: [3.1.3] - variants: +cuda fabrics=libfabric - hwloc: - variants: +cuda - # install issue with gettext - # https://github.com/spack/spack/issues/11551 - flex: - version: [2.6.3] - all: - providers: - mpi: [openmpi] diff --git a/share/picongpu/dockerfiles/ubuntu-1604/Dockerfile b/share/picongpu/dockerfiles/ubuntu-2004/Dockerfile similarity index 89% rename from share/picongpu/dockerfiles/ubuntu-1604/Dockerfile rename to share/picongpu/dockerfiles/ubuntu-2004/Dockerfile index f772a07452..47d5c56927 100644 --- a/share/picongpu/dockerfiles/ubuntu-1604/Dockerfile +++ b/share/picongpu/dockerfiles/ubuntu-2004/Dockerfile @@ -1,12 +1,14 @@ -FROM nvidia/cuda:9.2-base -LABEL maintainer="Axel Huebl , Rene Widera " +FROM nvidia/cuda:11.2.0-base-ubuntu20.04 +MAINTAINER Axel Huebl +LABEL authors="Axel Huebl, René Widera" # docker and image environment ENV DEBIAN_FRONTEND=noninteractive \ FORCE_UNSAFE_CONFIGURE=1 \ SPACK_ROOT=/usr/local \ SPACK_EXTRA_REPO=/usr/local/share/spack-repo \ - PIC_PACKAGE='picongpu@0.5.0+isaac backend=cuda' + PIC_PACKAGE='picongpu@develop+isaac backend=cuda target=x86_64' \ + CUDA_PKG_VERSION="11-2" # install minimal spack dependencies # - adds gfortran for spack's openmpi package @@ -24,11 +26,9 @@ RUN apt-get update && \ coreutils \ cuda-cupti-$CUDA_PKG_VERSION \ cuda-command-line-tools-$CUDA_PKG_VERSION \ - cuda-core-$CUDA_PKG_VERSION \ cuda-cudart-dev-$CUDA_PKG_VERSION \ - cuda-curand-dev-$CUDA_PKG_VERSION \ + libcurand-dev-$CUDA_PKG_VERSION \ cuda-minimal-build-$CUDA_PKG_VERSION \ - cuda-misc-headers-$CUDA_PKG_VERSION \ cuda-nvml-dev-$CUDA_PKG_VERSION \ curl \ environment-modules \ @@ -55,6 +55,7 @@ RUN curl -s -L https://github.com/spack/spack/archive/develop.tar.gz \ curl -s -L https://api.github.com/repos/ComputationalRadiationPhysics/spack-repo/tarball \ | tar xzC $SPACK_EXTRA_REPO --strip 1 && \ spack repo add --scope=system $SPACK_EXTRA_REPO +RUN spack install --only dependencies $PIC_PACKAGE RUN spack install $PIC_PACKAGE && \ spack clean -a @@ -81,7 +82,7 @@ RUN /bin/echo -e '#!/bin/bash -l\n' \ RUN /bin/bash -l -c ' \ pic-create $PICSRC/share/picongpu/examples/LaserWakefield /opt/picInputs/lwfa && \ cd /opt/picInputs/lwfa && \ - pic-build -b "cuda:30;35;37;50;60;70" -c"-DCUDAMEMTEST_ENABLE=OFF" && \ + pic-build -b "cuda:35;37;50;60;70;80" -c"-DCUDAMEMTEST_ENABLE=OFF" && \ rm -rf .build && \ chmod a+x /opt/picInputs/*/bin/* && \ chmod a+r -R /opt/picInputs/* && \ @@ -90,7 +91,7 @@ RUN /bin/bash -l -c ' \ RUN /bin/bash -l -c ' \ pic-create $PICSRC/share/picongpu/examples/KelvinHelmholtz /opt/picInputs/khi && \ cd /opt/picInputs/khi && \ - pic-build -b "cuda:30;35;37;50;60;70" -c"-DCUDAMEMTEST_ENABLE=OFF" && \ + pic-build -b "cuda:35;37;50;60;70;80" -c"-DCUDAMEMTEST_ENABLE=OFF" && \ rm -rf .build && \ chmod a+x /opt/picInputs/*/bin/* && \ chmod a+r -R /opt/picInputs/* && \ @@ -99,7 +100,7 @@ RUN /bin/bash -l -c ' \ RUN /bin/bash -l -c ' \ pic-create $PICSRC/share/picongpu/examples/FoilLCT /opt/picInputs/foil && \ cd /opt/picInputs/foil && \ - pic-build -b "cuda:30;35;37;50;60;70" -c"-DCUDAMEMTEST_ENABLE=OFF" && \ + pic-build -b "cuda:35;37;50;60;70;80" -c"-DCUDAMEMTEST_ENABLE=OFF" && \ rm -rf .build && \ chmod a+x /opt/picInputs/*/bin/* && \ chmod a+r -R /opt/picInputs/* && \ diff --git a/share/picongpu/dockerfiles/ubuntu-2004/Singularity b/share/picongpu/dockerfiles/ubuntu-2004/Singularity new file mode 100644 index 0000000000..7080868e12 --- /dev/null +++ b/share/picongpu/dockerfiles/ubuntu-2004/Singularity @@ -0,0 +1,11 @@ +Bootstrap: docker +From: ax3l/picongpu:0.6.0-dev + + +%labels +Maintainer "Axel Huebl , Rene Widera " +Version 0.6.0-dev + + +%runscript +exec /bin/bash -l diff --git a/share/picongpu/dockerfiles/ubuntu-2004/compilers.yaml b/share/picongpu/dockerfiles/ubuntu-2004/compilers.yaml new file mode 100644 index 0000000000..e28ec37f97 --- /dev/null +++ b/share/picongpu/dockerfiles/ubuntu-2004/compilers.yaml @@ -0,0 +1,14 @@ +compilers: +- compiler: + environment: {} + extra_rpaths: [] + flags: {} + modules: [] + operating_system: ubuntu20.04 + paths: + cc: /usr/bin/gcc-9 + cxx: /usr/bin/g++-9 + f77: /usr/bin/gfortran-9 + fc: /usr/bin/gfortran-9 + spec: gcc@9.3.0 + target: x86_64 diff --git a/share/picongpu/dockerfiles/ubuntu-1604/modules.yaml b/share/picongpu/dockerfiles/ubuntu-2004/modules.yaml similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/modules.yaml rename to share/picongpu/dockerfiles/ubuntu-2004/modules.yaml diff --git a/share/picongpu/dockerfiles/ubuntu-2004/packages.yaml b/share/picongpu/dockerfiles/ubuntu-2004/packages.yaml new file mode 100644 index 0000000000..ab4e9dbae5 --- /dev/null +++ b/share/picongpu/dockerfiles/ubuntu-2004/packages.yaml @@ -0,0 +1,28 @@ +packages: + cuda: + buildable: false + externals: + - prefix: /usr/local/cuda + spec: cuda@11.2%gcc@9.3.0 arch=linux-ubuntu20.04-x86_64 + pkg-config: + buildable: false + externals: + - prefix: /usr + spec: pkg-config@0.29.1%gcc@9.3.0 arch=linux-ubuntu20.04-x86_64 + python: + buildable: false + externals: + - prefix: /usr + spec: python@2.7.18%gcc@9.3.0 arch=linux-ubuntu20.04-x86_64 + openmpi: + version: [4.1.0] + variants: +cuda fabrics=auto + hwloc: + variants: +cuda + # install issue with gettext + # https://github.com/spack/spack/issues/11551 + flex: + version: [2.6.3] + all: + providers: + mpi: [openmpi] diff --git a/share/picongpu/dockerfiles/ubuntu-1604/start_foil_4.sh b/share/picongpu/dockerfiles/ubuntu-2004/start_foil_4.sh similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/start_foil_4.sh rename to share/picongpu/dockerfiles/ubuntu-2004/start_foil_4.sh diff --git a/share/picongpu/dockerfiles/ubuntu-1604/start_foil_8.sh b/share/picongpu/dockerfiles/ubuntu-2004/start_foil_8.sh similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/start_foil_8.sh rename to share/picongpu/dockerfiles/ubuntu-2004/start_foil_8.sh diff --git a/share/picongpu/dockerfiles/ubuntu-1604/start_khi_1.sh b/share/picongpu/dockerfiles/ubuntu-2004/start_khi_1.sh similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/start_khi_1.sh rename to share/picongpu/dockerfiles/ubuntu-2004/start_khi_1.sh diff --git a/share/picongpu/dockerfiles/ubuntu-1604/start_khi_4.sh b/share/picongpu/dockerfiles/ubuntu-2004/start_khi_4.sh similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/start_khi_4.sh rename to share/picongpu/dockerfiles/ubuntu-2004/start_khi_4.sh diff --git a/share/picongpu/dockerfiles/ubuntu-1604/start_khi_8.sh b/share/picongpu/dockerfiles/ubuntu-2004/start_khi_8.sh similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/start_khi_8.sh rename to share/picongpu/dockerfiles/ubuntu-2004/start_khi_8.sh diff --git a/share/picongpu/dockerfiles/ubuntu-1604/start_lwfa.sh b/share/picongpu/dockerfiles/ubuntu-2004/start_lwfa.sh similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/start_lwfa.sh rename to share/picongpu/dockerfiles/ubuntu-2004/start_lwfa.sh diff --git a/share/picongpu/dockerfiles/ubuntu-1604/start_lwfa_4.sh b/share/picongpu/dockerfiles/ubuntu-2004/start_lwfa_4.sh similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/start_lwfa_4.sh rename to share/picongpu/dockerfiles/ubuntu-2004/start_lwfa_4.sh diff --git a/share/picongpu/dockerfiles/ubuntu-1604/start_lwfa_8.sh b/share/picongpu/dockerfiles/ubuntu-2004/start_lwfa_8.sh similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/start_lwfa_8.sh rename to share/picongpu/dockerfiles/ubuntu-2004/start_lwfa_8.sh diff --git a/share/picongpu/dockerfiles/ubuntu-1604/start_lwfa_live.sh b/share/picongpu/dockerfiles/ubuntu-2004/start_lwfa_live.sh similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/start_lwfa_live.sh rename to share/picongpu/dockerfiles/ubuntu-2004/start_lwfa_live.sh diff --git a/share/picongpu/dockerfiles/ubuntu-1604/start_lwfa_live_4.sh b/share/picongpu/dockerfiles/ubuntu-2004/start_lwfa_live_4.sh similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/start_lwfa_live_4.sh rename to share/picongpu/dockerfiles/ubuntu-2004/start_lwfa_live_4.sh diff --git a/share/picongpu/dockerfiles/ubuntu-1604/start_lwfa_live_8.sh b/share/picongpu/dockerfiles/ubuntu-2004/start_lwfa_live_8.sh similarity index 100% rename from share/picongpu/dockerfiles/ubuntu-1604/start_lwfa_live_8.sh rename to share/picongpu/dockerfiles/ubuntu-2004/start_lwfa_live_8.sh diff --git a/share/picongpu/examples/Bremsstrahlung/bin/plot_energy_histogram.py b/share/picongpu/examples/Bremsstrahlung/bin/plot_energy_histogram.py index 8adfa7d46a..2faf025d9c 100644 --- a/share/picongpu/examples/Bremsstrahlung/bin/plot_energy_histogram.py +++ b/share/picongpu/examples/Bremsstrahlung/bin/plot_energy_histogram.py @@ -21,7 +21,7 @@ There will be 5 datasets for the 5 different output iterations. The plots will also not contain the outliers. -Copyright 2017-2020 Marco Garten, Axel Huebl +Copyright 2017-2021 Marco Garten, Axel Huebl Authors: Axel Huebl License: GPLv3+ """ diff --git a/share/picongpu/examples/Bremsstrahlung/bin/plot_particle_calorimeter.py b/share/picongpu/examples/Bremsstrahlung/bin/plot_particle_calorimeter.py index 69bc1b1ff1..fbe928f2b5 100644 --- a/share/picongpu/examples/Bremsstrahlung/bin/plot_particle_calorimeter.py +++ b/share/picongpu/examples/Bremsstrahlung/bin/plot_particle_calorimeter.py @@ -21,7 +21,7 @@ There will be 5 datasets for the 5 different output iterations. The plots will also not contain the outliers. -Copyright 2017-2020 Marco Garten, Axel Huebl +Copyright 2017-2021 Marco Garten, Axel Huebl Authors: Axel Huebl License: GPLv3+ """ diff --git a/share/picongpu/examples/Bremsstrahlung/etc/picongpu/1.cfg b/share/picongpu/examples/Bremsstrahlung/etc/picongpu/1.cfg index 6adb667207..aef299a5bb 100644 --- a/share/picongpu/examples/Bremsstrahlung/etc/picongpu/1.cfg +++ b/share/picongpu/examples/Bremsstrahlung/etc/picongpu/1.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Heiko Burau, Richard Pausch, Felix Schmitt, Axel Huebl +# Copyright 2013-2021 Heiko Burau, Richard Pausch, Felix Schmitt, Axel Huebl # # This file is part of PIConGPU. # @@ -52,11 +52,16 @@ TBG_ph_calorimeter="--ph_calorimeter.period 1000 --ph_calorimeter.openingYaw 360 TBG_ph_energyHistogram="--ph_energyHistogram.period 1000 --ph_energyHistogram.filter all --ph_energyHistogram.minEnergy 10 --ph_energyHistogram.maxEnergy 10000" -TBG_plugins="--hdf5.period 1000 --hdf5.file simData \ - --e_macroParticlesCount.period 1000 \ - --i_macroParticlesCount.period 1000 \ +# file I/O with openPMD-HDF5 +TBG_openPMD="--openPMD.period 1000 \ + --openPMD.file simData \ + --openPMD.ext h5" + +TBG_plugins="!TBG_openPMD \ + --e_macroParticlesCount.period 1000 \ + --i_macroParticlesCount.period 1000 \ --ph_macroParticlesCount.period 1000 \ - !TBG_ph_calorimeter \ + !TBG_ph_calorimeter \ !TBG_ph_energyHistogram" diff --git a/share/picongpu/examples/Bremsstrahlung/etc/picongpu/8.cfg b/share/picongpu/examples/Bremsstrahlung/etc/picongpu/8.cfg index 98b73db410..8536a07732 100644 --- a/share/picongpu/examples/Bremsstrahlung/etc/picongpu/8.cfg +++ b/share/picongpu/examples/Bremsstrahlung/etc/picongpu/8.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Heiko Burau, Richard Pausch, Felix Schmitt, Axel Huebl +# Copyright 2013-2021 Heiko Burau, Richard Pausch, Felix Schmitt, Axel Huebl # # This file is part of PIConGPU. # @@ -52,7 +52,12 @@ TBG_ph_calorimeter="--ph_calorimeter.period 1000 --ph_calorimeter.openingYaw 360 TBG_ph_energyHistogram="--ph_energyHistogram.period 1000 --ph_energyHistogram.filter all --ph_energyHistogram.minEnergy 10 --ph_energyHistogram.maxEnergy 10000" -TBG_plugins="--hdf5.period 1000 --hdf5.file simData \ +# file I/O with openPMD-HDF5 +TBG_openPMD="--openPMD.period 1000 \ + --openPMD.file simData \ + --openPMD.ext h5" + +TBG_plugins="!TBG_openPMD \ --e_macroParticlesCount.period 1000 \ --i_macroParticlesCount.period 1000 \ --ph_macroParticlesCount.period 1000 \ diff --git a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/bremsstrahlung.param b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/bremsstrahlung.param index d9c182465e..8ab631aded 100644 --- a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/bremsstrahlung.param +++ b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/bremsstrahlung.param @@ -1,4 +1,4 @@ -/* Copyright 2016-2020 Heiko Burau +/* Copyright 2016-2021 Heiko Burau * * This file is part of PIConGPU. * @@ -21,112 +21,110 @@ namespace picongpu { -namespace particles -{ -namespace bremsstrahlung -{ - - -/** params related to the energy loss and deflection of the incident electron */ -namespace electron -{ - /** Minimal kinetic electron energy in MeV for the lookup table. - * - * For electrons below this value Bremsstrahlung is not taken into account. - */ - constexpr float_64 MIN_ENERGY_MeV = 0.5; - - /** Maximal kinetic electron energy in MeV for the lookup table. - * - * Electrons above this value cause a out-of-bounds access at the - * lookup table. Bounds checking is enabled for "CRITICAL" log level. - */ - constexpr float_64 MAX_ENERGY_MeV = 100.0; - - /** Minimal polar deflection angle due to screening. */ - constexpr float_64 MIN_THETA = 0.001; - - /** number of lookup table divisions for the kappa axis. - * - * Kappa is the energy loss normalized to the initial kinetic energy. - * The axis is scaled linearly. - */ - constexpr uint32_t NUM_SAMPLES_KAPPA = 64; - - /** number of lookup table divisions for the initial kinetic energy axis. - * - * The axis is scaled logarithmically. - */ - constexpr uint32_t NUM_SAMPLES_EKIN = 128; - - /** Kappa is the energy loss normalized to the initial kinetic energy. - * - * This minimal value is needed by the numerics to avoid a division by zero. - */ - constexpr float_64 MIN_KAPPA = 1.0e-10; - -} // namespace electron - -/** params related to the creation and the emission angle of the photon */ -namespace photon -{ - /** Low-energy threshold in keV of the incident electron for the creation of photons. - * - * Below this value photon emission is neglected. - */ - constexpr float_64 SOFT_PHOTONS_CUTOFF_keV = 5.0; - - /** number of lookup table divisions for the delta axis. - * - * Delta is the angular emission probability (normalized to one) integrated from zero to theta, - * where theta is the angle between the photon momentum and the final electron momentum. - * - * The axis is scaled linearly. - */ - constexpr uint32_t NUM_SAMPLES_DELTA = 256; - - /** number of lookup table divisions for the gamma axis. - * - * Gamma is the relativistic factor of the incident electron. - * - * The axis is scaled logarithmically. - */ - constexpr uint32_t NUM_SAMPLES_GAMMA = 64; - - /** Maximal value of delta for the lookup table. - * - * Delta is the angular emission probability (normalized to one) integrated from zero to theta, - * where theta is the angle between the photon momentum and the final electron momentum. - * - * A value close to one is reasonable. Though exactly one was actually correct, - * because it would map to theta = pi (maximum polar angle), the sampling then would be bad - * in the ultrarelativistic case. In this regime the emission primarily takes place at small thetas. - * So a maximum delta close to one maps to a reasonable maximum theta. - */ - constexpr float_64 MAX_DELTA = 0.95; - - /** minimal gamma for the lookup table. */ - constexpr float_64 MIN_GAMMA = 1.0; - - /** maximal gamma for the lookup table. - * - * Bounds checking is enabled for "CRITICAL" log level. - */ - constexpr float_64 MAX_GAMMA = 200; - - /** if the emission probability per timestep is higher than this value and the log level is set to - * "CRITICAL" a warning will be raised. - */ - constexpr float_64 SINGLE_EMISSION_PROB_LIMIT = 0.4; - - /** ratio between macro electron weighting (numerator) and macro photon weighting (denominator) - * at the time of creation. - * - * The emission probability is proportional to this parameter. - */ - constexpr float_64 WEIGHTING_RATIO = 5; -} // namespace photon - -} // namespace bremsstrahlung -} // namespace particles + namespace particles + { + namespace bremsstrahlung + { + /** params related to the energy loss and deflection of the incident electron */ + namespace electron + { + /** Minimal kinetic electron energy in MeV for the lookup table. + * + * For electrons below this value Bremsstrahlung is not taken into account. + */ + constexpr float_64 MIN_ENERGY_MeV = 0.5; + + /** Maximal kinetic electron energy in MeV for the lookup table. + * + * Electrons above this value cause a out-of-bounds access at the + * lookup table. Bounds checking is enabled for "CRITICAL" log level. + */ + constexpr float_64 MAX_ENERGY_MeV = 100.0; + + /** Minimal polar deflection angle due to screening. */ + constexpr float_64 MIN_THETA = 0.001; + + /** number of lookup table divisions for the kappa axis. + * + * Kappa is the energy loss normalized to the initial kinetic energy. + * The axis is scaled linearly. + */ + constexpr uint32_t NUM_SAMPLES_KAPPA = 64; + + /** number of lookup table divisions for the initial kinetic energy axis. + * + * The axis is scaled logarithmically. + */ + constexpr uint32_t NUM_SAMPLES_EKIN = 128; + + /** Kappa is the energy loss normalized to the initial kinetic energy. + * + * This minimal value is needed by the numerics to avoid a division by zero. + */ + constexpr float_64 MIN_KAPPA = 1.0e-10; + + } // namespace electron + + /** params related to the creation and the emission angle of the photon */ + namespace photon + { + /** Low-energy threshold in keV of the incident electron for the creation of photons. + * + * Below this value photon emission is neglected. + */ + constexpr float_64 SOFT_PHOTONS_CUTOFF_keV = 5.0; + + /** number of lookup table divisions for the delta axis. + * + * Delta is the angular emission probability (normalized to one) integrated from zero to theta, + * where theta is the angle between the photon momentum and the final electron momentum. + * + * The axis is scaled linearly. + */ + constexpr uint32_t NUM_SAMPLES_DELTA = 256; + + /** number of lookup table divisions for the gamma axis. + * + * Gamma is the relativistic factor of the incident electron. + * + * The axis is scaled logarithmically. + */ + constexpr uint32_t NUM_SAMPLES_GAMMA = 64; + + /** Maximal value of delta for the lookup table. + * + * Delta is the angular emission probability (normalized to one) integrated from zero to theta, + * where theta is the angle between the photon momentum and the final electron momentum. + * + * A value close to one is reasonable. Though exactly one was actually correct, + * because it would map to theta = pi (maximum polar angle), the sampling then would be bad + * in the ultrarelativistic case. In this regime the emission primarily takes place at small thetas. + * So a maximum delta close to one maps to a reasonable maximum theta. + */ + constexpr float_64 MAX_DELTA = 0.95; + + /** minimal gamma for the lookup table. */ + constexpr float_64 MIN_GAMMA = 1.0; + + /** maximal gamma for the lookup table. + * + * Bounds checking is enabled for "CRITICAL" log level. + */ + constexpr float_64 MAX_GAMMA = 200; + + /** if the emission probability per timestep is higher than this value and the log level is set to + * "CRITICAL" a warning will be raised. + */ + constexpr float_64 SINGLE_EMISSION_PROB_LIMIT = 0.4; + + /** ratio between macro electron weighting (numerator) and macro photon weighting (denominator) + * at the time of creation. + * + * The emission probability is proportional to this parameter. + */ + constexpr float_64 WEIGHTING_RATIO = 5; + } // namespace photon + + } // namespace bremsstrahlung + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/density.param b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/density.param index 47e2a6b906..e6caad1521 100644 --- a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/density.param +++ b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/density.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -26,61 +26,55 @@ namespace picongpu { -namespace SI -{ - /** The maximum density in particles per m^3 in the gas distribution - * unit: ELEMENTS/m^3 - * - * He (2e- / Atom ) with 1.e15 He / m^3 - * = 2.e15 e- / m^3 */ - - constexpr float_64 BASE_DENSITY_SI = 5.9e28; // solid gold - -} + namespace SI + { + /** The maximum density in particles per m^3 in the gas distribution + * unit: ELEMENTS/m^3 + * + * He (2e- / Atom ) with 1.e15 He / m^3 + * = 2.e15 e- / m^3 */ -namespace densityProfiles -{ + constexpr float_64 BASE_DENSITY_SI = 5.9e28; // solid gold -struct FoilFunctor -{ + } // namespace SI - /** - * This formula uses SI quantities only - * The profile will be multiplied by BASE_DENSITY_SI. - * - * @param position_SI total offset including all slides [in meter] - * @param cellSize_SI cell sizes [in meter] - * - * @return float_X density [normalized to 1.0] - */ - HDINLINE float_X operator()( - float2_64 pos, - const float3_64& cellSize_SI - ) + namespace densityProfiles { - /* center point of foil */ - constexpr float_64 plateauPos = 4e-6; - /* thickness of foil */ - constexpr float_64 plateauLength = 2e-6; - /* gaussian ramp length of density above the surface */ - constexpr float_64 rampLength = 0.1e-6; + struct FoilFunctor + { + /** + * This formula uses SI quantities only + * The profile will be multiplied by BASE_DENSITY_SI. + * + * @param position_SI total offset including all slides [in meter] + * @param cellSize_SI cell sizes [in meter] + * + * @return float_X density [normalized to 1.0] + */ + HDINLINE float_X operator()(float2_64 pos, const float3_64& cellSize_SI) + { + /* center point of foil */ + constexpr float_64 plateauPos = 4e-6; + /* thickness of foil */ + constexpr float_64 plateauLength = 2e-6; + /* gaussian ramp length of density above the surface */ + constexpr float_64 rampLength = 0.1e-6; - using namespace pmacc::algorithms::math; + using namespace pmacc::math; - if( abs( pos.y() - plateauPos) < plateauLength / 2.0 ) - { - return 1.0_X; - } - const float_64 d = math::min( - abs( pos.y() - plateauPos + plateauLength / 2.0 ), - abs( pos.y() - plateauPos - plateauLength / 2.0 ) - ); - return float_X( exp( -d * d / ( 2.0_X * rampLength * rampLength ) ) ); - } -}; + if(abs(pos.y() - plateauPos) < plateauLength / 2.0) + { + return 1.0_X; + } + const float_64 d = math::min( + abs(pos.y() - plateauPos + plateauLength / 2.0), + abs(pos.y() - plateauPos - plateauLength / 2.0)); + return float_X(exp(-d * d / (2.0_X * rampLength * rampLength))); + } + }; -//! definition of free formula profile -using Foil = FreeFormulaImpl< FoilFunctor >; + //! definition of free formula profile + using Foil = FreeFormulaImpl; -} // namespace densityProfiles -} // namepsace picongpu + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/dimension.param b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/dimension.param index 0d727bc754..7d41fd9e9f 100644 --- a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/dimension.param +++ b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/dimension.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl +/* Copyright 2014-2021 Axel Huebl * * This file is part of PIConGPU. * diff --git a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/grid.param b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/grid.param index e23d45b366..65ddb03586 100644 --- a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/grid.param +++ b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Richard Pausch, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -21,7 +21,6 @@ namespace picongpu { - namespace SI { /** extent of one cell in x-direction @@ -43,21 +42,21 @@ namespace picongpu /** Duration of one timestep * unit: seconds */ constexpr float_64 DELTA_T_SI = CELL_WIDTH_SI / SPEED_OF_LIGHT_SI / SQRT_OF_2 / EPS_CFL; - } //namespace SI + } // namespace SI //! Defines the size of the absorbing zone (in cells) constexpr uint32_t ABSORBER_CELLS[3][2] = { - {32, 32}, /*x direction [negative,positive]*/ - {32, 32}, /*y direction [negative,positive]*/ - {32, 32} /*z direction [negative,positive]*/ - }; //unit: number of cells + {32, 32}, /*x direction [negative,positive]*/ + {32, 32}, /*y direction [negative,positive]*/ + {32, 32} /*z direction [negative,positive]*/ + }; // unit: number of cells //! Define the strength of the absorber for any direction constexpr float_X ABSORBER_STRENGTH[3][2] = { {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ - {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ - }; //unit: none + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; // unit: none /** When to move the co-moving window. * An initial pseudo particle, flying with the speed of light, @@ -76,5 +75,4 @@ namespace picongpu */ constexpr float_64 movePoint = 0.90; -} - +} // namespace picongpu diff --git a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/laser.param b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/laser.param index 361d562cbd..f164b3a779 100644 --- a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/laser.param +++ b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/laser.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, Alexander Debus +/* Copyright 2013-2021 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, Alexander Debus * * This file is part of PIConGPU. * @@ -48,106 +48,110 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace gaussianBeam -{ - //! Use only the 0th Laguerremode for a standard Gaussian - static constexpr uint32_t MODENUMBER = 0; - PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, 1.0); - // This is just an example for a more complicated set of Laguerre modes - //constexpr uint32_t MODENUMBER = 12; - //PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, -1.0, 0.0300519, 0.319461, -0.23783, 0.0954839, 0.0318653, -0.144547, 0.0249208, -0.111989, 0.0434385, -0.030038, -0.00896321, -0.0160788); - -} // namespace gaussianBeam - - struct GaussianBeamParam + namespace fields { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - static constexpr float_64 _A0 = 40.0; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 8.0e-15; - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * at the focus position of the laser - * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } - * [ 1.17741 ] - * - * unit: meter */ - static constexpr float_64 W0_SI = 1.5e-6; - /** the distance to the laser focus in y-direction - * unit: meter */ - static constexpr float_64 FOCUS_POS_SI = 4.e-6; - - /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH - * - * unit: none */ - static constexpr float_64 PULSE_INIT = 6.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - using LAGUERREMODES_t = gaussianBeam::LAGUERREMODES_t; - static constexpr uint32_t MODENUMBER = gaussianBeam::MODENUMBER; - - /** Available polarisation types - */ - enum PolarisationType + namespace laserProfiles { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; - - //! currently selected laser profile - using Selected = GaussianBeam< GaussianBeamParam >; - -} // namespace laserProfiles -} // namespace fields + namespace gaussianBeam + { + //! Use only the 0th Laguerremode for a standard Gaussian + static constexpr uint32_t MODENUMBER = 0; + PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, 1.0); + // This is just an example for a more complicated set of Laguerre modes + // constexpr uint32_t MODENUMBER = 12; + // PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, -1.0, 0.0300519, 0.319461, -0.23783, + // 0.0954839, 0.0318653, -0.144547, 0.0249208, -0.111989, 0.0434385, -0.030038, -0.00896321, + // -0.0160788); + + } // namespace gaussianBeam + + struct GaussianBeamParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + static constexpr float_64 _A0 = 40.0; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 8.0e-15; + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * at the focus position of the laser + * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } + * [ 1.17741 ] + * + * unit: meter */ + static constexpr float_64 W0_SI = 1.5e-6; + /** the distance to the laser focus in y-direction + * unit: meter */ + static constexpr float_64 FOCUS_POS_SI = 4.e-6; + + /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH + * + * unit: none */ + static constexpr float_64 PULSE_INIT = 6.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + using LAGUERREMODES_t = gaussianBeam::LAGUERREMODES_t; + static constexpr uint32_t MODENUMBER = gaussianBeam::MODENUMBER; + + /** Available polarisation types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + + //! currently selected laser profile + using Selected = GaussianBeam; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/particle.param b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/particle.param index c8a3b2901b..ad34931a73 100644 --- a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/particle.param +++ b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/particle.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch +/* Copyright 2013-2021 Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -28,48 +28,41 @@ namespace picongpu { + namespace particles + { + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted + * unit: none */ + constexpr float_X MIN_WEIGHTING = 1.0; -namespace particles -{ + namespace startPosition + { + struct RandomParameter100ppc + { + /** Count of particles per cell at initial state + * unit: none + */ + static constexpr uint32_t numParticlesPerCell = 100u; + }; + using Random100ppc = RandomImpl; - /** a particle with a weighting below MIN_WEIGHTING will not - * be created / will be deleted - * unit: none */ - constexpr float_X MIN_WEIGHTING = 1.0; -namespace startPosition -{ + struct QuietParameter1ppc + { + /** Count of particles per cell per direction at initial state + * unit: none + */ + using numParticlesPerDimension = typename mCT::shrinkTo, simDim>::type; + }; + using Quiet1ppc = QuietImpl; - struct RandomParameter100ppc - { - /** Count of particles per cell at initial state - * unit: none - */ - static constexpr uint32_t numParticlesPerCell = 100u; - }; - using Random100ppc = RandomImpl< RandomParameter100ppc >; + } // namespace startPosition - - struct QuietParameter1ppc - { - /** Count of particles per cell per direction at initial state - * unit: none + /** During unit normalization, we assume this is a typical + * number of particles per cell for normalization of weighted + * particle attributes. */ - using numParticlesPerDimension = typename mCT::shrinkTo< - mCT::Int< 1, 1, 1 >, - simDim - >::type; - }; - using Quiet1ppc = QuietImpl< QuietParameter1ppc >; - -} // namespace startPosition - - /** During unit normalization, we assume this is a typical - * number of particles per cell for normalization of weighted - * particle attributes. - */ - constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = - startPosition::RandomParameter100ppc::numParticlesPerCell; + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = startPosition::RandomParameter100ppc::numParticlesPerCell; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/speciesDefinition.param b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/speciesDefinition.param index c2bd2922f2..121b78fe9c 100644 --- a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/speciesDefinition.param +++ b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/speciesDefinition.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Heiko Burau +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Heiko Burau * * This file is part of PIConGPU. * @@ -31,106 +31,87 @@ namespace picongpu { - -/*########################### define particle attributes #####################*/ - -/** describe attributes of a particle*/ -using DefaultParticleAttributes = MakeSeq_t< - position< position_pic >, - momentum, - weighting, - particleId, - momentumPrev1 -#if( RAD_MARK_PARTICLE > 1 ) || ( RAD_ACTIVATE_GAMMA_FILTER != 0 ) - , radiationFlag + /*########################### define particle attributes #####################*/ + + /** describe attributes of a particle*/ + using DefaultParticleAttributes = MakeSeq_t< + position, + momentum, + weighting, + particleId, + momentumPrev1 +#if(RAD_MARK_PARTICLE > 1) || (RAD_ACTIVATE_GAMMA_FILTER != 0) + , + radiationFlag #endif ->; - -/*########################### end particle attributes ########################*/ - -/*########################### define species #################################*/ - -/*--------------------------- photons -------------------------------------------*/ - -value_identifier( float_X, MassRatioPhotons, 0.0 ); -value_identifier( float_X, ChargeRatioPhotons, 0.0 ); - -using ParticleFlagsPhotons = MakeSeq_t< - particlePusher< particles::pusher::Photon >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - massRatio< MassRatioPhotons >, - chargeRatio< ChargeRatioPhotons > ->; - -/* define species photons */ -using PIC_Photons = Particles< - PMACC_CSTRING( "ph" ), - ParticleFlagsPhotons, - DefaultParticleAttributes ->; - - -/*--------------------------- ions -------------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier(float_X, MassRatioIons, 359100); -value_identifier(float_X, ChargeRatioIons, -79.0); -value_identifier(float_X, DensityRatioIons, 1.0); - -using ParticleFlagsIons = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioIons >, - chargeRatio< ChargeRatioIons >, - densityRatio< DensityRatioIons >, - atomicNumbers< ionization::atomicNumbers::Gold_t > ->; - -/* define species ions */ -using PIC_Ions = Particles< - PMACC_CSTRING( "i" ), - ParticleFlagsIons, - DefaultParticleAttributes ->; - - -/*--------------------------- electrons --------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioElectrons, 1.0 ); -value_identifier( float_X, ChargeRatioElectrons, 1.0 ); -value_identifier( float_X, DensityRatioElectrons, 79.0 ); - -using ParticleFlagsElectrons = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioElectrons >, - chargeRatio< ChargeRatioElectrons >, - densityRatio< DensityRatioElectrons >, - bremsstrahlungIons< PIC_Ions >, - bremsstrahlungPhotons< PIC_Photons > ->; - -/* define species electrons */ -using PIC_Electrons = Particles< - PMACC_CSTRING( "e" ), - ParticleFlagsElectrons, - DefaultParticleAttributes ->; - - -/*########################### end species ####################################*/ - - -using VectorAllSpecies = MakeSeq_t< - PIC_Electrons, - PIC_Ions, - PIC_Photons ->; - -} //namespace picongpu + >; + + /*########################### end particle attributes ########################*/ + + /*########################### define species #################################*/ + + /*--------------------------- photons -------------------------------------------*/ + + value_identifier(float_X, MassRatioPhotons, 0.0); + value_identifier(float_X, ChargeRatioPhotons, 0.0); + + using ParticleFlagsPhotons = MakeSeq_t< + particlePusher, + shape, + interpolation, + massRatio, + chargeRatio>; + + /* define species photons */ + using PIC_Photons = Particles; + + + /*--------------------------- ions -------------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioIons, 359100); + value_identifier(float_X, ChargeRatioIons, -79.0); + value_identifier(float_X, DensityRatioIons, 1.0); + + using ParticleFlagsIons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio, + densityRatio, + atomicNumbers>; + + /* define species ions */ + using PIC_Ions = Particles; + + + /*--------------------------- electrons --------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + value_identifier(float_X, DensityRatioElectrons, 79.0); + + using ParticleFlagsElectrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio, + densityRatio, + bremsstrahlungIons, + bremsstrahlungPhotons>; + + /* define species electrons */ + using PIC_Electrons = Particles; + + + /*########################### end species ####################################*/ + + + using VectorAllSpecies = MakeSeq_t; + +} // namespace picongpu diff --git a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/speciesInitialization.param index 28664393fc..dc2b167e24 100644 --- a/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/speciesInitialization.param +++ b/share/picongpu/examples/Bremsstrahlung/include/picongpu/param/speciesInitialization.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,24 +33,15 @@ namespace picongpu { -namespace particles -{ - /** InitPipeline define in which order species are initialized - * - * the functors are called in order (from first to last functor) - */ - using InitPipeline = bmpl::vector< - CreateDensity< - densityProfiles::Foil, - startPosition::Quiet1ppc, - PIC_Ions - >, - CreateDensity< - densityProfiles::Foil, - startPosition::Random100ppc, - PIC_Electrons - > - >; + namespace particles + { + /** InitPipeline define in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< + CreateDensity, + CreateDensity>; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/Bunch/cmakeFlags b/share/picongpu/examples/Bunch/cmakeFlags index 31fc52ed6b..e57cd552b9 100755 --- a/share/picongpu/examples/Bunch/cmakeFlags +++ b/share/picongpu/examples/Bunch/cmakeFlags @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch +# Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch # # This file is part of PIConGPU. # @@ -30,15 +30,17 @@ # - increase by 1, no gaps flags[0]="" -flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_INCLUDE_FIELDBACKGROUND=true'" -flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_INCLUDE_FIELDBACKGROUND=true;-DPARAM_DIMENSION=DIM2'" -flags[3]="-DPARAM_OVERWRITES:LIST='-DPARAM_SINGLE_PARTICLE=true;-DPARAM_RADFORMFACTOR=radFormFactor_coherent'" -flags[4]="-DPARAM_OVERWRITES:LIST='-DENABLE_SYNCHROTRON_PHOTONS=1;-DPARAM_RADFORMFACTOR=radFormFactor_Gauss_cell'" -flags[5]="-DPARAM_OVERWRITES:LIST='-DPARAM_FILTER_GAMMA=1;-DPARAM_RADFORMFACTOR=radFormFactor_incoherent'" -flags[6]="-DPARAM_OVERWRITES:LIST='-DPARAM_RADWINDOW=radWindowFunctionTriangle;-DPARAM_RADFORMFACTOR=radFormFactor_CIC_3D'" -flags[7]="-DPARAM_OVERWRITES:LIST='-DPARAM_RADWINDOW=radWindowFunctionHamming;-DPARAM_RADFORMFACTOR=radFormFactor_TSC_3D'" -flags[8]="-DPARAM_OVERWRITES:LIST='-DPARAM_RADWINDOW=radWindowFunctionTriplett;-DPARAM_RADFORMFACTOR=radFormFactor_PCS_3D'" -flags[9]="-DPARAM_OVERWRITES:LIST='-DPARAM_RADWINDOW=radWindowFunctionGauss;-DPARAM_RADFORMFACTOR=radFormFactor_CIC_1Dy'" +flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_INCLUDE_FIELDBACKGROUND=true;-DPARAM_TWTSFAST=1'" +flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_INCLUDE_FIELDBACKGROUND=true;-DPARAM_TWTSFAST=1;-DPARAM_DIMENSION=DIM2'" +flags[3]="-DPARAM_OVERWRITES:LIST='-DPARAM_INCLUDE_FIELDBACKGROUND=true'" +flags[4]="-DPARAM_OVERWRITES:LIST='-DPARAM_INCLUDE_FIELDBACKGROUND=true;-DPARAM_DIMENSION=DIM2'" +flags[5]="-DPARAM_OVERWRITES:LIST='-DPARAM_SINGLE_PARTICLE=true;-DPARAM_RADFORMFACTOR=radFormFactor_coherent'" +flags[6]="-DPARAM_OVERWRITES:LIST='-DENABLE_SYNCHROTRON_PHOTONS=1;-DPARAM_RADFORMFACTOR=radFormFactor_Gauss_cell'" +flags[7]="-DPARAM_OVERWRITES:LIST='-DPARAM_FILTER_GAMMA=1;-DPARAM_RADFORMFACTOR=radFormFactor_incoherent'" +flags[8]="-DPARAM_OVERWRITES:LIST='-DPARAM_RADWINDOW=radWindowFunctionTriangle;-DPARAM_RADFORMFACTOR=radFormFactor_CIC_3D'" +flags[9]="-DPARAM_OVERWRITES:LIST='-DPARAM_RADWINDOW=radWindowFunctionHamming;-DPARAM_RADFORMFACTOR=radFormFactor_TSC_3D'" +flags[10]="-DPARAM_OVERWRITES:LIST='-DPARAM_RADWINDOW=radWindowFunctionTriplett;-DPARAM_RADFORMFACTOR=radFormFactor_PCS_3D'" +flags[11]="-DPARAM_OVERWRITES:LIST='-DPARAM_RADWINDOW=radWindowFunctionGauss;-DPARAM_RADFORMFACTOR=radFormFactor_CIC_1Dy'" ################################################################################ diff --git a/share/picongpu/examples/Bunch/etc/picongpu/32.cfg b/share/picongpu/examples/Bunch/etc/picongpu/32.cfg index 26f8031b0d..432e7b6507 100644 --- a/share/picongpu/examples/Bunch/etc/picongpu/32.cfg +++ b/share/picongpu/examples/Bunch/etc/picongpu/32.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Richard Pausch, Felix Schmitt, Axel Huebl +# Copyright 2013-2021 Richard Pausch, Felix Schmitt, Axel Huebl # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/components.param b/share/picongpu/examples/Bunch/include/picongpu/param/components.param index f983693a0f..276f6a8998 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/components.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/components.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Anton Helm, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Anton Helm, Richard Pausch * * This file is part of PIConGPU. * @@ -29,9 +29,9 @@ namespace picongpu { -/*! Simulation Starter --------------------------------------------------- - * - defaultPIConGPU : default PIConGPU configuration - */ -namespace simulation_starter = defaultPIConGPU; + /*! Simulation Starter --------------------------------------------------- + * - defaultPIConGPU : default PIConGPU configuration + */ + namespace simulation_starter = defaultPIConGPU; -} +} // namespace picongpu diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/density.param b/share/picongpu/examples/Bunch/include/picongpu/param/density.param index 4c991b0438..266276a085 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/density.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/density.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -27,108 +27,95 @@ namespace picongpu { -namespace SI -{ - /** Base density in particles per m^3 in the density profiles. - * - * This is often taken as reference maximum density in normalized profiles. - * Individual particle species can define a `densityRatio` flag relative - * to this value. - * - * unit: ELEMENTS/m^3 - * - */ + namespace SI + { + /** Base density in particles per m^3 in the density profiles. + * + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. + * + * unit: ELEMENTS/m^3 + * + */ #ifdef PARAM_SINGLE_PARTICLE - /* one particle per cell with weighting 1.0 */ - constexpr float_64 BASE_DENSITY_SI = - 1.0 / - ( CELL_WIDTH_SI * CELL_HEIGHT_SI * CELL_DEPTH_SI ); + /* one particle per cell with weighting 1.0 */ + constexpr float_64 BASE_DENSITY_SI = 1.0 / (CELL_WIDTH_SI * CELL_HEIGHT_SI * CELL_DEPTH_SI); #else - constexpr float_64 BASE_DENSITY_SI = 1.e25; + constexpr float_64 BASE_DENSITY_SI = 1.e25; #endif -} - -namespace densityProfiles -{ - - PMACC_STRUCT(GaussianCloudParam, - /** Profile Formula: - * exponent = |globalCellPos - center| / sigma - * density = e^[ gasFactor * exponent^gasPower ] - */ - (PMACC_C_VALUE(float_X, gasFactor, -0.5)) - (PMACC_C_VALUE(float_X, gasPower, 2.0)) - - /** height of vacuum area on top border - * - * this vacuum is important because of the laser initialization, - * which is done in the first cell of the simulation - * unit: cells - */ - (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) - - /** The central position of the density distribution - * unit: meter - */ - (PMACC_C_VECTOR_DIM(float_64, simDim, center_SI, 1.024e-5, 9.072e-5, 1.024e-5)) - - /** the distance from gasCenter_SI until the density decreases to its 1/e-th part - * unit: meter */ - (PMACC_C_VECTOR_DIM(float_64, simDim, sigma_SI, 6.0e-6, 6.0e-6, 6.0e-6)) - ); /* struct GaussianCloudParam */ - - /* definition of cloud profile */ - using GaussianCloud = GaussianCloudImpl< GaussianCloudParam >; + } // namespace SI - - struct FreeFormulaFunctor + namespace densityProfiles { - /** This formula uses SI quantities only - * - * The profile will be multiplied by BASE_DENSITY. - * - * @param position_SI total offset including all slides [in meter] - * @param cellSize_SI cell sizes [in meter] - * - * @return float_X density [normalized to 1.0] - */ - HDINLINE float_X operator()( - const floatD_64& position_SI, - const float3_64& cellSize_SI - ) + PMACC_STRUCT( + GaussianCloudParam, + /** Profile Formula: + * exponent = |globalCellPos - center| / sigma + * density = e^[ gasFactor * exponent^gasPower ] + */ + (PMACC_C_VALUE(float_X, gasFactor, -0.5))(PMACC_C_VALUE(float_X, gasPower, 2.0)) + + /** height of vacuum area on top border + * + * this vacuum is important because of the laser initialization, + * which is done in the first cell of the simulation + * unit: cells + */ + (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) + + /** The central position of the density distribution + * unit: meter + */ + (PMACC_C_VECTOR_DIM(float_64, simDim, center_SI, 1.024e-5, 9.072e-5, 1.024e-5)) + + /** the distance from gasCenter_SI until the density decreases to its 1/e-th part + * unit: meter */ + (PMACC_C_VECTOR_DIM(float_64, simDim, sigma_SI, 6.0e-6, 6.0e-6, 6.0e-6))); /* struct GaussianCloudParam */ + + /* definition of cloud profile */ + using GaussianCloud = GaussianCloudImpl; + + + struct FreeFormulaFunctor { - /* add particle in cell at center of Gaussian Cloud profile */ - const float3_64 position_start_SI( 1.024e-5, 9.072e-5, 1.024e-5 ); - - /* from all cells ... */ - const pmacc::math::UInt64< simDim > cell_id( position_SI / cellSize_SI.shrink< simDim >() ); - - /* ... we calculate the corresponding "center" cell to init the particle in ... */ - const pmacc::math::UInt64< simDim > cell_start( - precisionCast< uint64_t >( - math::floor( - position_start_SI.shrink< simDim >() / - cellSize_SI.shrink< simDim >() - ) - ) - ); - - /* ... and only in that center cell the density is 1.0, outside zero */ - bool isStartCell = true; - for( uint64_t d = 0; d < simDim; ++d ) - if( cell_id[d] != cell_start[d] ) - isStartCell = false; - - if( isStartCell ) - return 1.0; - - return 0.0; - } - }; - - /* definition of free formula profile */ - using FreeFormula = FreeFormulaImpl< FreeFormulaFunctor >; - -} -} + /** This formula uses SI quantities only + * + * The profile will be multiplied by BASE_DENSITY. + * + * @param position_SI total offset including all slides [in meter] + * @param cellSize_SI cell sizes [in meter] + * + * @return float_X density [normalized to 1.0] + */ + HDINLINE float_X operator()(const floatD_64& position_SI, const float3_64& cellSize_SI) + { + /* add particle in cell at center of Gaussian Cloud profile */ + const float3_64 position_start_SI(1.024e-5, 9.072e-5, 1.024e-5); + + /* from all cells ... */ + const pmacc::math::UInt64 cell_id(position_SI / cellSize_SI.shrink()); + + /* ... we calculate the corresponding "center" cell to init the particle in ... */ + const pmacc::math::UInt64 cell_start(precisionCast( + math::floor(position_start_SI.shrink() / cellSize_SI.shrink()))); + + /* ... and only in that center cell the density is 1.0, outside zero */ + bool isStartCell = true; + for(uint64_t d = 0; d < simDim; ++d) + if(cell_id[d] != cell_start[d]) + isStartCell = false; + + if(isStartCell) + return 1.0; + + return 0.0; + } + }; + + /* definition of free formula profile */ + using FreeFormula = FreeFormulaImpl; + + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/fieldBackground.param b/share/picongpu/examples/Bunch/include/picongpu/param/fieldBackground.param index 9dcba0c80e..2bae837d4b 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/fieldBackground.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/fieldBackground.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Alexander Debus, Richard Pausch +/* Copyright 2014-2021 Axel Huebl, Alexander Debus, Richard Pausch * * This file is part of PIConGPU. * @@ -20,200 +20,204 @@ #pragma once /** Load pre-defined templates */ -#include "picongpu/fields/background/templates/TWTS/TWTS.hpp" +#if PARAM_TWTSFAST == 1 +# include "picongpu/fields/background/templates/twtsfast/twtsfast.hpp" +#else +# include "picongpu/fields/background/templates/TWTS/TWTS.hpp" +#endif #ifndef PARAM_INCLUDE_FIELDBACKGROUND -#define PARAM_INCLUDE_FIELDBACKGROUND false +# define PARAM_INCLUDE_FIELDBACKGROUND false #endif -/** Load external background fields - * - */ +/* Load external background fields */ namespace picongpu { class FieldBackgroundE { public: - - /* Add this additional field for pushing particles */ + /** Add this additional field for pushing particles */ static constexpr bool InfluenceParticlePusher = PARAM_INCLUDE_FIELDBACKGROUND; - /* We use this to calculate your SI input back to our unit system */ + /** We use this to calculate your SI input back to our unit system */ PMACC_ALIGN(m_unitField, const float3_64); - /* TWTS E-fields need to be initialized on host, - * so they can look up global grid dimensions. + /** TWTS E-fields need to be initialized on host, + * so they can look up global grid dimensions. * - * Note: No PMACC_ALIGN(...) used, since this *additional* memory alignment would require - * roughly float_64 the number of registers in the corresponding kernel on the device. + * Note: No PMACC_ALIGN(...) used, since this *additional* memory alignment would require + * roughly float_64 the number of registers in the corresponding kernel on the device. */ +#if PARAM_TWTSFAST == 1 + const templates::twtsfast::EField twtsFieldE; +#else const templates::twts::EField twtsFieldE; +#endif - /* Constructor is host-only, because of subGrid and halfSimSize initialization */ - HINLINE FieldBackgroundE( const float3_64 unitField ) : - m_unitField(unitField), - twtsFieldE( - /* focus_y [m], the distance to the laser focus in y-direction */ - 30.0e-6, - /* wavelength [m] */ - 0.8e-6, - /* pulselength [s], sigma of std. gauss for intensity (E^2) */ - 10.0e-15 / 2.3548200450309493820231386529194, - /* w_x [m], cylindrically focused spot size */ - 5.0e-6, - /* w_y [m] */ - 0.01, - /* interaction angle between TWTS laser propagation vector and the y-axis [rad] */ - 60. * (PI/180.), - /* propagation speed of overlap [speed of light]. */ - 1.0, - /* manual time delay [s] if auto_tdelay is false */ - 39.3e-6 / SI::SPEED_OF_LIGHT_SI, - /* Should PIConGPU automatically choose a suitable time delay? [true/false] */ - false ) - {} + /** Constructor is host-only, because of subGrid and halfSimSize initialization */ + HINLINE FieldBackgroundE(const float3_64 unitField) + : m_unitField(unitField) + , twtsFieldE( + /* focus_y [m], the distance to the laser focus in y-direction */ + 30.0e-6, + /* wavelength [m] */ + 0.8e-6, + /* pulselength [s], sigma of std. gauss for intensity (E^2) */ + 10.0e-15 / 2.3548200450309493820231386529194, +#if PARAM_TWTSFAST == 0 + /* w_x [m], cylindrically focused spot size */ + 5.0e-6, +#endif + /* w_y [m] */ + 0.01, + /* interaction angle between TWTS laser propagation vector and the y-axis [rad] */ + 60. * (PI / 180.), + /* propagation speed of overlap [speed of light]. */ + 1.0, + /* manual time delay [s] if auto_tdelay is false */ + 39.3e-6 / SI::SPEED_OF_LIGHT_SI, + /* Should PIConGPU automatically choose a suitable time delay? [true/false] */ + false) + { + } /** Specify your background field E(r,t) here * - * \param cellIdx The total cell id counted from the start at t=0 - * \param currentStep The current time step */ - HDINLINE float3_X - operator()( const DataSpace& cellIdx, - const uint32_t currentStep ) const + * @param cellIdx The total cell id counted from the start at t=0 + * @param currentStep The current time step */ + HDINLINE float3_X operator()(const DataSpace& cellIdx, const uint32_t currentStep) const { /* unit: meter */ constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - /** UNITCONV */ - constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI - * SI::ELECTRON_MASS_SI * SI::SPEED_OF_LIGHT_SI - * SI::SPEED_OF_LIGHT_SI / SI::ELECTRON_CHARGE_SI; + /* UNITCONV */ + constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * SI::ELECTRON_MASS_SI + * SI::SPEED_OF_LIGHT_SI * SI::SPEED_OF_LIGHT_SI / SI::ELECTRON_CHARGE_SI; - /** unit: W / m^2 */ + /* unit: W / m^2 */ // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) /* unit: none */ - constexpr float_64 _A0 = 1.0; + constexpr float_64 _A0 = 1.0; /* unit: Volt /meter *\todo #738 implement math::vector, native type operations */ - const float3_64 invUnitField = float3_64( 1.0 / m_unitField[0], - 1.0 / m_unitField[1], - 1.0 / m_unitField[2] ); + const float3_64 invUnitField = float3_64(1.0 / m_unitField[0], 1.0 / m_unitField[1], 1.0 / m_unitField[2]); /* laser amplitude in picongpu units [ unit: (Volt /meter) / unitField-factor ] * Note: the laser amplitude is included in all field components * polarization and other properties are established by the peak amplitude * normalized twtsFieldE(...) */ - const float3_X amplitude = precisionCast( - float_64(_A0 * UNITCONV_A0_to_Amplitude_SI) * invUnitField ); + const float3_X amplitude + = precisionCast(float_64(_A0 * UNITCONV_A0_to_Amplitude_SI) * invUnitField); /* Note: twtsFieldE(...) is normalized, such that peak amplitude equals unity. */ - return amplitude * twtsFieldE( cellIdx, currentStep ); + return amplitude * twtsFieldE(cellIdx, currentStep); } }; class FieldBackgroundB { public: - /* Add this additional field for pushing particles */ + /** Add this additional field for pushing particles */ static constexpr bool InfluenceParticlePusher = PARAM_INCLUDE_FIELDBACKGROUND; - /* TWTS B-fields need to be initialized on host, - * so they can look up global grid dimensions. + /** TWTS B-fields need to be initialized on host, + * so they can look up global grid dimensions. * - * Note: No PMACC_ALIGN(...) used, since this *additional* memory alignment would require - * roughly float_64 the number of registers in the corresponding kernel on the device. + * Note: No PMACC_ALIGN(...) used, since this *additional* memory alignment would require + * roughly float_64 the number of registers in the corresponding kernel on the device. */ +#if PARAM_TWTSFAST == 1 + const templates::twtsfast::BField twtsFieldB; +#else const templates::twts::BField twtsFieldB; +#endif - /* We use this to calculate your SI input back to our unit system */ + /** We use this to calculate your SI input back to our unit system */ PMACC_ALIGN(m_unitField, const float3_64); - HINLINE FieldBackgroundB( const float3_64 unitField ) : - m_unitField(unitField), - twtsFieldB( - /* focus_y [m], the distance to the laser focus in y-direction */ - 30.0e-6, - /* wavelength [m] */ - 0.8e-6, - /* pulselength [s], sigma of std. gauss for intensity (E^2) */ - 10.0e-15 / 2.3548200450309493820231386529194, - /* w_x [m], cylindrically focused spot size */ - 5.0e-6, - /* w_y [m] */ - 0.01, - /* interaction angle between TWTS laser propagation vector and the y-axis [rad] */ - 60. * (PI / 180.), - /* propagation speed of overlap [speed of light]. */ - 1.0, - /* manual time delay [s] if auto_tdelay is false */ - 39.3e-6 / SI::SPEED_OF_LIGHT_SI, - /* Should PIConGPU automatically choose a suitable time delay? [true / false] */ - false ) - {} + HINLINE FieldBackgroundB(const float3_64 unitField) + : m_unitField(unitField) + , twtsFieldB( + /* focus_y [m], the distance to the laser focus in y-direction */ + 30.0e-6, + /* wavelength [m] */ + 0.8e-6, + /* pulselength [s], sigma of std. gauss for intensity (E^2) */ + 10.0e-15 / 2.3548200450309493820231386529194, +#if PARAM_TWTSFAST == 0 + /* w_x [m], cylindrically focused spot size */ + 5.0e-6, +#endif + /* w_y [m] */ + 0.01, + /* interaction angle between TWTS laser propagation vector and the y-axis [rad] */ + 60. * (PI / 180.), + /* propagation speed of overlap [speed of light]. */ + 1.0, + /* manual time delay [s] if auto_tdelay is false */ + 39.3e-6 / SI::SPEED_OF_LIGHT_SI, + /* Should PIConGPU automatically choose a suitable time delay? [true / false] */ + false) + { + } /** Specify your background field B(r,t) here * - * \param cellIdx The total cell id counted from the start at t=0 - * \param currentStep The current time step */ - HDINLINE float3_X - operator()( const DataSpace& cellIdx, - const uint32_t currentStep ) const + * @param cellIdx The total cell id counted from the start at t=0 + * @param currentStep The current time step */ + HDINLINE float3_X operator()(const DataSpace& cellIdx, const uint32_t currentStep) const { /* unit: meter */ constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - /** UNITCONV */ - constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI - * SI::ELECTRON_MASS_SI * SI::SPEED_OF_LIGHT_SI - * SI::SPEED_OF_LIGHT_SI / SI::ELECTRON_CHARGE_SI; + /* UNITCONV */ + constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * SI::ELECTRON_MASS_SI + * SI::SPEED_OF_LIGHT_SI * SI::SPEED_OF_LIGHT_SI / SI::ELECTRON_CHARGE_SI; - /** unit: W / m^2 */ + /* unit: W / m^2 */ // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) /** unit: none */ - constexpr float_64 _A0 = 1.0; + constexpr float_64 _A0 = 1.0; /** unit: Volt /meter */ - const float3_64 invUnitField = float3_64( 1.0 / m_unitField[0], - 1.0 / m_unitField[1], - 1.0 / m_unitField[2] ); + const float3_64 invUnitField = float3_64(1.0 / m_unitField[0], 1.0 / m_unitField[1], 1.0 / m_unitField[2]); /* laser amplitude in picongpu units [ unit: (Volt/meter) / unitField-factor ] * Note: the laser amplitude is included in all field components * polarization and other properties are established by the peak amplitude * normalized twtsFieldB(...) */ - const float3_X amplitude = precisionCast( - float_64(_A0 * UNITCONV_A0_to_Amplitude_SI) * invUnitField ); + const float3_X amplitude + = precisionCast(float_64(_A0 * UNITCONV_A0_to_Amplitude_SI) * invUnitField); /* Note: twtsFieldB(...) is normalized, such that peak amplitude equals unity. */ - return amplitude * twtsFieldB( cellIdx, currentStep ); + return amplitude * twtsFieldB(cellIdx, currentStep); } }; class FieldBackgroundJ { public: - /* Add this additional field? */ + /** Add this additional field? */ static constexpr bool activated = false; - /* We use this to calculate your SI input back to our unit system */ + /** We use this to calculate your SI input back to our unit system */ PMACC_ALIGN(m_unitField, const float3_64); - HDINLINE FieldBackgroundJ( const float3_64 unitField ) : m_unitField(unitField) - {} + HDINLINE FieldBackgroundJ(const float3_64 unitField) : m_unitField(unitField) + { + } /** Specify your background field J(r,t) here * - * \param cellIdx The total cell id counted from the start at t=0 - * \param currentStep The current time step */ - HDINLINE float3_X - operator()( const DataSpace& cellIdx, - const uint32_t currentStep ) const + * @param cellIdx The total cell id counted from the start at t=0 + * @param currentStep The current time step */ + HDINLINE float3_X operator()(const DataSpace& cellIdx, const uint32_t currentStep) const { return float3_X(0.0, 0.0, 0.0); } diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/grid.param b/share/picongpu/examples/Bunch/include/picongpu/param/grid.param index 27c1c90065..7cba20d5a5 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/grid.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Richard Pausch, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -18,12 +18,10 @@ */ - #pragma once namespace picongpu { - namespace SI { /** Duration of one timestep @@ -52,21 +50,21 @@ namespace picongpu * behave like the interaction of infinite "wire particles" * in fields with perfect symmetry in Z. */ - } //namespace SI + } // namespace SI //! Defines the size of the absorbing zone (in cells) constexpr uint32_t ABSORBER_CELLS[3][2] = { - {32, 32}, /*x direction [negative,positive]*/ - {32, 32}, /*y direction [negative,positive]*/ - {32, 32} /*z direction [negative,positive]*/ - }; //unit: number of cells + {32, 32}, /*x direction [negative,positive]*/ + {32, 32}, /*y direction [negative,positive]*/ + {32, 32} /*z direction [negative,positive]*/ + }; // unit: number of cells //! Define the strength of the absorber for any direction constexpr float_X ABSORBER_STRENGTH[3][2] = { {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ - {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ - }; //unit: none + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; // unit: none /** When to move the co-moving window. * An initial pseudo particle, flying with the speed of light, @@ -85,7 +83,4 @@ namespace picongpu */ constexpr float_64 movePoint = 0.90; -} - - - +} // namespace picongpu diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/laser.param b/share/picongpu/examples/Bunch/include/picongpu/param/laser.param index 4f2bf9f0a9..8127081277 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/laser.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/laser.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Anton Helm, Richard Pausch, Axel Huebl, Alexander Debus +/* Copyright 2013-2021 Anton Helm, Richard Pausch, Axel Huebl, Alexander Debus * * This file is part of PIConGPU. * @@ -48,83 +48,86 @@ namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ - struct PlaneWaveParam + namespace fields { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - static constexpr float_64 _A0 = 1.0; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** The profile of the test Lasers 0 and 2 can be stretched by a - * constant area between the up and downramp - * unit: seconds */ - static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 50.0 * WAVE_LENGTH_SI / ::picongpu::SI::SPEED_OF_LIGHT_SI; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 2.65e-15; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before and after the plateau - * unit: none */ - static constexpr float_64 RAMP_INIT = 20.6146; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - /** Available polarization types - */ - enum PolarisationType + namespace laserProfiles { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; - - //! currently selected laser profile - using Selected = PlaneWave< PlaneWaveParam >; - -} // namespace laserProfiles -} // namespace fields + struct PlaneWaveParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + static constexpr float_64 _A0 = 1.0; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** The profile of the test Lasers 0 and 2 can be stretched by a + * constant area between the up and downramp + * unit: seconds */ + static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI + = 50.0 * WAVE_LENGTH_SI / ::picongpu::SI::SPEED_OF_LIGHT_SI; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 2.65e-15; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before and after + * the plateau unit: none */ + static constexpr float_64 RAMP_INIT = 20.6146; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + /** Available polarization types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + + //! currently selected laser profile + using Selected = PlaneWave; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/particle.param b/share/picongpu/examples/Bunch/include/picongpu/param/particle.param index dc272be978..c76480f74c 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/particle.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/particle.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch, Axel Huebl +/* Copyright 2013-2021 Rene Widera, Richard Pausch, Axel Huebl * * This file is part of PIConGPU. * @@ -30,85 +30,77 @@ namespace picongpu { - -namespace particles -{ - - /* a particle with a weighting below MIN_WEIGHTING will not - * be created / will be deleted - * unit: none - */ -#ifdef PARAM_SINGLE_PARTICLE - // note: this specific setting allows all kinds of weightings > 0.0 - constexpr float_X MIN_WEIGHTING = std::numeric_limits< float_X >::min(); - - constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = 1; -#else - constexpr float_X MIN_WEIGHTING = 10.0; - - constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = 6; -#endif - -namespace manipulators -{ - - CONST_VECTOR( float_X, 3, DriftParamNegative_direction, 0.0, -1.0, 0.0 ); - struct DriftParamNegative + namespace particles { - /** Initial particle drift velocity for electrons and ions - * Examples: - * - No drift is equal to 1.0 + /* a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted * unit: none */ - static constexpr float_64 gamma = 5.0; - const DriftParamNegative_direction_t direction; - }; - // definition of SetDrift start - using AssignYDriftNegative = unary::Drift< - DriftParamNegative, - nvidia::functors::Assign - >; - -} // namespace manipulators - - -namespace startPosition -{ - - struct RandomParameter - { - /** Count of particles per cell at initial state - * unit: none - */ - static constexpr uint32_t numParticlesPerCell = TYPICAL_PARTICLES_PER_CELL; - }; - using Random = RandomImpl< RandomParameter >; - - - // sit directly in lower corner of the cell - CONST_VECTOR( - float_X, - 3, - InCellOffset, - /* each x, y, z in-cell position component in range [0.0, 1.0) */ - 0.0, - 0.0, - 0.0 - ); - - struct OnePositionParameter - { - /** Count of particles per cell at initial state - * unit: none - */ - static constexpr uint32_t numParticlesPerCell = TYPICAL_PARTICLES_PER_CELL; +#ifdef PARAM_SINGLE_PARTICLE + // note: this specific setting allows all kinds of weightings > 0.0 + constexpr float_X MIN_WEIGHTING = std::numeric_limits::min(); - const InCellOffset_t inCellOffset; - }; + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = 1; +#else + constexpr float_X MIN_WEIGHTING = 10.0; - // definition of one specific position for particle start - using OnePosition = OnePositionImpl< OnePositionParameter >; + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = 6; +#endif -} // namespace startPosition -} // namespace particles + namespace manipulators + { + CONST_VECTOR(float_X, 3, DriftParamNegative_direction, 0.0, -1.0, 0.0); + struct DriftParamNegative + { + /** Initial particle drift velocity for electrons and ions + * Examples: + * - No drift is equal to 1.0 + * unit: none + */ + static constexpr float_64 gamma = 5.0; + const DriftParamNegative_direction_t direction; + }; + // definition of SetDrift start + using AssignYDriftNegative = unary::Drift; + + } // namespace manipulators + + + namespace startPosition + { + struct RandomParameter + { + /** Count of particles per cell at initial state + * unit: none + */ + static constexpr uint32_t numParticlesPerCell = TYPICAL_PARTICLES_PER_CELL; + }; + using Random = RandomImpl; + + + // sit directly in lower corner of the cell + CONST_VECTOR( + float_X, + 3, + InCellOffset, + /* each x, y, z in-cell position component in range [0.0, 1.0) */ + 0.0, + 0.0, + 0.0); + + struct OnePositionParameter + { + /** Count of particles per cell at initial state + * unit: none + */ + static constexpr uint32_t numParticlesPerCell = TYPICAL_PARTICLES_PER_CELL; + + const InCellOffset_t inCellOffset; + }; + + // definition of one specific position for particle start + using OnePosition = OnePositionImpl; + + } // namespace startPosition + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/png.param b/share/picongpu/examples/Bunch/include/picongpu/param/png.param index e05a31a24c..bdc71b3e02 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/png.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/png.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Richard Pausch * * This file is part of PIConGPU. * @@ -24,17 +24,17 @@ namespace picongpu { -/*scale image before write to file, only scale if value is not 1.0 - */ -constexpr float_64 scale_image = 1.0; + /*scale image before write to file, only scale if value is not 1.0 + */ + constexpr float_64 scale_image = 1.0; -/*if true image is scaled if cellsize is not quadratic, else no scale*/ -constexpr bool scale_to_cellsize = true; + /*if true image is scaled if cellsize is not quadratic, else no scale*/ + constexpr bool scale_to_cellsize = true; -constexpr bool white_box_per_GPU = true; + constexpr bool white_box_per_GPU = true; -namespace visPreview -{ + namespace visPreview + { // normalize EM fields to typical laser or plasma quantities //-1: Auto: enable adaptive scaling for each output // 1: Laser: typical fields calculated out of the laser amplitude @@ -49,33 +49,32 @@ namespace visPreview #define EM_FIELD_SCALE_CHANNEL2 1 #define EM_FIELD_SCALE_CHANNEL3 1 -// multiply highest undisturbed particle density with factor -constexpr float_X preParticleDens_opacity = 0.25; -constexpr float_X preChannel1_opacity = 1.0; -constexpr float_X preChannel2_opacity = 1.0; -constexpr float_X preChannel3_opacity = 1.0; - -// specify color scales for each channel -namespace preParticleDensCol = colorScales::red; -namespace preChannel1Col = colorScales::blue; -namespace preChannel2Col = colorScales::green; -namespace preChannel3Col = colorScales::none; + // multiply highest undisturbed particle density with factor + constexpr float_X preParticleDens_opacity = 0.25; + constexpr float_X preChannel1_opacity = 1.0; + constexpr float_X preChannel2_opacity = 1.0; + constexpr float_X preChannel3_opacity = 1.0; -/* png preview settings for each channel */ -DINLINE float_X preChannel1(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) -{ - return math::abs2(field_J); -} + // specify color scales for each channel + namespace preParticleDensCol = colorScales::red; + namespace preChannel1Col = colorScales::blue; + namespace preChannel2Col = colorScales::green; + namespace preChannel3Col = colorScales::none; -DINLINE float_X preChannel2(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) -{ - return field_E.x() * field_E.x(); -} + /* png preview settings for each channel */ + DINLINE float_X preChannel1(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) + { + return pmacc::math::abs2(field_J); + } -DINLINE float_X preChannel3(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) -{ - return -1.0_X * field_E.y(); -} -} -} + DINLINE float_X preChannel2(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) + { + return field_E.x() * field_E.x(); + } + DINLINE float_X preChannel3(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) + { + return -1.0_X * field_E.y(); + } + } // namespace visPreview +} // namespace picongpu diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/radiation.param b/share/picongpu/examples/Bunch/include/picongpu/param/radiation.param index 09833200e3..a7b2f545df 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/radiation.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/radiation.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch +/* Copyright 2013-2021 Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -18,13 +18,12 @@ */ - #pragma once - /* - radiation verbose level: - 0=nothing, 1=physics, 2=simulation_state, 4=memory, 8=critical - */ +/* + radiation verbose level: + 0=nothing, 1=physics, 2=simulation_state, 4=memory, 8=critical +*/ #define PIC_VERBOSE_RADIATION 3 @@ -36,146 +35,162 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ -namespace linear_frequencies -{ -namespace SI -{ -constexpr float_64 omega_min = 0.0; -constexpr float_64 omega_max = 5.8869e17; -} - -constexpr unsigned int N_omega = 1024; // number of frequencies -} - -namespace log_frequencies -{ -namespace SI -{ -constexpr float_64 omega_min = 1.0e14; -constexpr float_64 omega_max = 1.0e17; -} - -constexpr unsigned int N_omega = 2048; // number of frequencies -} + namespace plugins + { + namespace radiation + { + namespace linear_frequencies + { + namespace SI + { + constexpr float_64 omega_min = 0.0; + constexpr float_64 omega_max = 5.8869e17; + } // namespace SI + + constexpr unsigned int N_omega = 1024; // number of frequencies + } // namespace linear_frequencies + + namespace log_frequencies + { + namespace SI + { + constexpr float_64 omega_min = 1.0e14; + constexpr float_64 omega_max = 1.0e17; + } // namespace SI + + constexpr unsigned int N_omega = 2048; // number of frequencies + } // namespace log_frequencies + + + namespace frequencies_from_list + { + /** path to text file with frequencies */ + constexpr const char* listLocation = "/path/to/frequency.list"; + constexpr unsigned int N_omega = 2048; // number of frequencies + } // namespace frequencies_from_list + + + namespace radiation_frequencies = linear_frequencies; + + + namespace radiationNyquist + { + constexpr float_32 NyquistFactor = 0.5; + } + + /////////////////////////////////////////////////// + + + // correct treatment of coherent and incoherent radiation from macroparticles + /* Choose different form factors in order to consider different particle shapes for radiation + * - radFormFactor_CIC_3D ... CIC charge distribution + * - radFormFactor_TSC_3D ... TSC charge distribution + * - radFormFactor_PCS_3D ... PCS charge distribution + * - radFormFactor_CIC_1Dy ... only CIC charge distribution in y + * - radFormFactor_Gauss_spherical ... symmetric Gauss charge distribution + * - radFormFactor_Gauss_cell ... Gauss charge distribution according to cell size + * - radFormFactor_incoherent ... only incoherent radiation + * - radFormFactor_coherent ... only coherent radiation + */ + namespace radFormFactor_CIC_3D + { + } + namespace radFormFactor_TSC_3D + { + } + namespace radFormFactor_PCS_3D + { + } + namespace radFormFactor_CIC_1Dy + { + } + namespace radFormFactor_Gauss_spherical + { + } + namespace radFormFactor_Gauss_cell + { + } + namespace radFormFactor_incoherent + { + } + namespace radFormFactor_coherent + { + } +#ifndef PARAM_RADFORMFACTOR +# define PARAM_RADFORMFACTOR radFormFactor_Gauss_spherical +#endif + namespace radFormFactor = PARAM_RADFORMFACTOR; -namespace frequencies_from_list -{ -/** path to text file with frequencies */ -constexpr const char * listLocation = "/path/to/frequency.list"; -constexpr unsigned int N_omega = 2048; // number of frequencies -} + /////////////////////////////////////////////////////////// -namespace radiation_frequencies = linear_frequencies; + namespace parameters + { + constexpr unsigned int N_observer = 128; // number of looking directions -namespace radiationNyquist -{ - constexpr float_32 NyquistFactor = 0.5; -} - -/////////////////////////////////////////////////// - - - // correct treatment of coherent and incoherent radiation from macroparticles - /* Choose different form factors in order to consider different particle shapes for radiation - * - radFormFactor_CIC_3D ... CIC charge distribution - * - radFormFactor_TSC_3D ... TSC charge distribution - * - radFormFactor_PCS_3D ... PCS charge distribution - * - radFormFactor_CIC_1Dy ... only CIC charge distribution in y - * - radFormFactor_Gauss_spherical ... symmetric Gauss charge distribution - * - radFormFactor_Gauss_cell ... Gauss charge distribution according to cell size - * - radFormFactor_incoherent ... only incoherent radiation - * - radFormFactor_coherent ... only coherent radiation - */ - namespace radFormFactor_CIC_3D { } - namespace radFormFactor_TSC_3D { } - namespace radFormFactor_PCS_3D { } - namespace radFormFactor_CIC_1Dy { } - namespace radFormFactor_Gauss_spherical { } - namespace radFormFactor_Gauss_cell { } - namespace radFormFactor_incoherent { } - namespace radFormFactor_coherent { } + } /* end namespace parameters */ -#ifndef PARAM_RADFORMFACTOR -# define PARAM_RADFORMFACTOR radFormFactor_Gauss_spherical -#endif -namespace radFormFactor = PARAM_RADFORMFACTOR; + /** activate particles for radiation */ + struct GammaFilterFunctor + { + static constexpr float_X radiationGamma = 3.0; + template + HDINLINE void operator()(T_Particle& particle) + { + if(picongpu::gamma( + particle[picongpu::momentum_], + picongpu::traits::attribute::getMass(particle[picongpu::weighting_], particle)) + >= radiationGamma) + particle[picongpu::radiationMask_] = true; + } + }; -/////////////////////////////////////////////////////////// + /* filter to enable radiation for electrons + * + * to enable the filter: + * - goto file `speciesDefinition.param` + * - add the attribute `radiationMask` to the electron species + */ + using RadiationParticleFilter = picongpu::particles::manipulators::generic::Free; -namespace parameters -{ + // add a window function weighting to the radiation in order + // to avoid ringing effects from sharpe boundaries + // default: no window function via `radWindowFunctionNone` -constexpr unsigned int N_observer = 128; // number of looking directions - -} /* end namespace parameters */ - - /** activate particles for radiation */ - struct GammaFilterFunctor - { - static constexpr float_X radiationGamma = 3.0; - - template< typename T_Particle > - HDINLINE void operator()( T_Particle& particle ) - { - if( - picongpu::gamma( - particle[ picongpu::momentum_ ], - picongpu::traits::attribute::getMass( - particle[ picongpu::weighting_ ], - particle - ) - ) >= radiationGamma - ) - particle[ picongpu::radiationMask_ ] = true; - } - }; - - - /* filter to enable radiation for electrons - * - * to enable the filter: - * - goto file `speciesDefinition.param` - * - add the attribute `radiationMask` to the electron species - */ - using RadiationParticleFilter = picongpu::particles::manipulators::generic::Free< - GammaFilterFunctor - >; - - - -// add a window function weighting to the radiation in order -// to avoid ringing effects from sharpe boundaries -// default: no window function via `radWindowFunctionNone` - -/* Choose different window function in order to get better ringing reduction - * radWindowFunctionTriangle - * radWindowFunctionHamming - * radWindowFunctionTriplett - * radWindowFunctionGauss - * radWindowFunctionNone - */ + /* Choose different window function in order to get better ringing reduction + * radWindowFunctionTriangle + * radWindowFunctionHamming + * radWindowFunctionTriplett + * radWindowFunctionGauss + * radWindowFunctionNone + */ #ifndef PARAM_RADWINDOW -# define PARAM_RADWINDOW radWindowFunctionNone +# define PARAM_RADWINDOW radWindowFunctionNone #endif - namespace radWindowFunctionTriangle { } - namespace radWindowFunctionHamming { } - namespace radWindowFunctionTriplett { } - namespace radWindowFunctionGauss { } - namespace radWindowFunctionNone { } - - namespace radWindowFunction = PARAM_RADWINDOW; - -} // namespace radiation -} // namespace plugins + namespace radWindowFunctionTriangle + { + } + namespace radWindowFunctionHamming + { + } + namespace radWindowFunctionTriplett + { + } + namespace radWindowFunctionGauss + { + } + namespace radWindowFunctionNone + { + } + + namespace radWindowFunction = PARAM_RADWINDOW; + + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/radiationObserver.param b/share/picongpu/examples/Bunch/include/picongpu/param/radiationObserver.param index 392bc7d24b..0ffaa98dbb 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/radiationObserver.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/radiationObserver.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Richard Pausch * * This file is part of PIConGPU. * @@ -22,60 +22,59 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ -namespace radiation_observer -{ - /** Compute observation angles - * - * This function is used in the Radiation plug-in kernel to compute - * the observation directions given as a unit vector pointing - * towards a 'virtual' detector - * - * @param observation_id_extern - * int index that identifies each block on the GPU - * to compute the observation direction - * - * @return unit vector pointing in observation direction - * type: vector_64 - * - */ - HDINLINE vector_64 observation_direction(const int observation_id_extern) + namespace plugins { - /** Computes observation angles along the x-y plane. - * Assuming electron(s) fly in -y direction and the laser - * propages in +y direction, the observation angles are centered - * around the -y-axis (0,-1,0) . - * By setting gamma, the angle range can be adjusted to the - * energy of the electrons. - */ - - /* in this case only one id is needed: an index for theta */ - const int my_theta_id = observation_id_extern; + namespace radiation + { + namespace radiation_observer + { + /** Compute observation angles + * + * This function is used in the Radiation plug-in kernel to compute + * the observation directions given as a unit vector pointing + * towards a 'virtual' detector + * + * @param observation_id_extern + * int index that identifies each block on the GPU + * to compute the observation direction + * + * @return unit vector pointing in observation direction + * type: vector_64 + * + */ + HDINLINE vector_64 observation_direction(const int observation_id_extern) + { + /** Computes observation angles along the x-y plane. + * Assuming electron(s) fly in -y direction and the laser + * propages in +y direction, the observation angles are centered + * around the -y-axis (0,-1,0) . + * By setting gamma, the angle range can be adjusted to the + * energy of the electrons. + */ - /* set up: */ - constexpr picongpu::float_64 gamma_times_thetaMax = 1.5; /* max normalized angle */ - constexpr picongpu::float_64 gamma = 5.0; /* relativistic gamma */ - constexpr picongpu::float_64 thetaMax = gamma_times_thetaMax / gamma; /* max angle */ + /* in this case only one id is needed: an index for theta */ + const int my_theta_id = observation_id_extern; - /* stepwith of theta for from [-thetaMax : +thetaMax] */ - constexpr picongpu::float_64 delta_theta = 2.0 * thetaMax / (parameters::N_observer); + /* set up: */ + constexpr picongpu::float_64 gamma_times_thetaMax = 1.5; /* max normalized angle */ + constexpr picongpu::float_64 gamma = 5.0; /* relativistic gamma */ + constexpr picongpu::float_64 thetaMax = gamma_times_thetaMax / gamma; /* max angle */ - /* compute angle theta for index */ - const picongpu::float_64 theta(my_theta_id * delta_theta - thetaMax + picongpu::PI); - /* + picongpu::PI -> turn observation direction 180 degrees towards -y */ + /* stepwith of theta for from [-thetaMax : +thetaMax] */ + constexpr picongpu::float_64 delta_theta = 2.0 * thetaMax / (parameters::N_observer); - /* compute observation unit vector */ - picongpu::float_32 sinTheta; - picongpu::float_32 cosTheta; - math::sincos(precisionCast(theta), sinTheta, cosTheta); - return vector_64(sinTheta, cosTheta, 0.0); + /* compute angle theta for index */ + const picongpu::float_64 theta(my_theta_id * delta_theta - thetaMax + picongpu::PI); + /* + picongpu::PI -> turn observation direction 180 degrees towards -y */ - } + /* compute observation unit vector */ + picongpu::float_32 sinTheta; + picongpu::float_32 cosTheta; + pmacc::math::sincos(precisionCast(theta), sinTheta, cosTheta); + return vector_64(sinTheta, cosTheta, 0.0); + } -} // namespace radiation_observer -} // namespace radiation -} // namespace plugins + } // namespace radiation_observer + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/species.param b/share/picongpu/examples/Bunch/include/picongpu/param/species.param deleted file mode 100644 index df5d1a5664..0000000000 --- a/share/picongpu/examples/Bunch/include/picongpu/param/species.param +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright 2014-2020 Rene Widera, Richard Pausch - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/particles/shapes.hpp" -#include "picongpu/algorithms/FieldToParticleInterpolationNative.hpp" -#include "picongpu/algorithms/FieldToParticleInterpolation.hpp" -#include "picongpu/algorithms/AssignedTrilinearInterpolation.hpp" - -#include "picongpu/particles/flylite/NonLTE.def" -#include "picongpu/fields/currentDeposition/Solver.def" - - -namespace picongpu -{ -/*---------------------------- generic solver---------------------------------*/ - -/*! Particle Shape definitions ------------------------------------------------- - * - particles::shapes::CIC : 1st order - * - particles::shapes::TSC : 2nd order - * - particles::shapes::PCS : 3rd order - * - particles::shapes::P4S : 4th order - * - * example: using UsedParticleShape = particles::shapes::CIC; - */ -using UsedParticleShape = particles::shapes::CIC; - -/* define which interpolation method is used to interpolate fields to particle*/ -using UsedField2Particle = FieldToParticleInterpolation< UsedParticleShape, AssignedTrilinearInterpolation >; - -/*! select current solver method ----------------------------------------------- - * - currentSolver::Esirkepov : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - currentSolver::VillaBune<> : particle shapes - CIC (1st order) only - * - currentSolver::EmZ : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - * For development purposes: --------------------------------------------------- - * - currentSolver::EsirkepovNative : generic version of currentSolverEsirkepov - * without optimization (~4x slower and needs more shared memory) - */ -using UsedParticleCurrentSolver = currentSolver::Esirkepov< UsedParticleShape >; - -/*! particle pusher configuration ---------------------------------------------- - * - * Defining a pusher is optional for particles - * - * - particles::pusher::Vay : better suited relativistic boris pusher - * - particles::pusher::Boris : standard boris pusher - * - particles::pusher::ReducedLandauLifshitz : 4th order RungeKutta pusher - * with classical radiation reaction - * - * For diagnostics & modeling: ------------------------------------------------ - * - particles::pusher::Free : free propagation, ignore fields - * (= free stream model) - * - particles::pusher::Photon : propagate with c in direction of normalized mom. - * - particles::pusher::Probe : Probe particles that interpolate E & B - * For development purposes: -------------------------------------------------- - * - particles::pusher::Axel : a pusher developed at HZDR during 2011 (testing) - */ -using UsedParticlePusher = particles::pusher::Boris; - -} // namespace picongpu diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/speciesDefinition.param b/share/picongpu/examples/Bunch/include/picongpu/param/speciesDefinition.param index 383754520c..19b005bfb9 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/speciesDefinition.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/speciesDefinition.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Heiko Burau +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Heiko Burau * * This file is part of PIConGPU. * @@ -31,82 +31,75 @@ namespace picongpu { - /*########################### define particle attributes #####################*/ -//disable or enable functor RadiationParticleFilter -//diable (0) / enable (1) +// disable or enable functor RadiationParticleFilter +// diable (0) / enable (1) #ifndef PARAM_FILTER_GAMMA # define PARAM_FILTER_GAMMA 0 #endif -/** describe attributes of a particle*/ -using DefaultParticleAttributes = MakeSeq_t< - position< position_pic >, - momentum, - weighting, - particleId, - momentumPrev1 -# if( PARAM_FILTER_GAMMA == 1 ) - , radiationMask -# endif ->; - -/*########################### end particle attributes ########################*/ - -/*########################### define species #################################*/ - -/*--------------------------- photons -------------------------------------------*/ - -value_identifier( float_X, MassRatioPhotons, 0.0 ); -value_identifier( float_X, ChargeRatioPhotons, 0.0 ); - -using ParticleFlagsPhotons = MakeSeq_t< - particlePusher< particles::pusher::Photon >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - massRatio< MassRatioPhotons >, - chargeRatio< ChargeRatioPhotons > ->; - -/* define species photons */ -using PIC_Photons = Particles< - PMACC_CSTRING( "ph" ), - ParticleFlagsPhotons, - DefaultParticleAttributes ->; - -/*--------------------------- electrons --------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioElectrons, 1.0 ); -value_identifier( float_X, ChargeRatioElectrons, 1.0 ); - -using ParticleFlagsElectrons = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - massRatio< MassRatioElectrons >, - chargeRatio< ChargeRatioElectrons > -#if( ENABLE_SYNCHROTRON_PHOTONS == 1 ) - , synchrotronPhotons< PIC_Photons > + /** describe attributes of a particle*/ + using DefaultParticleAttributes = MakeSeq_t< + position, + momentum, + weighting, + particleId, + momentumPrev1 +#if(PARAM_FILTER_GAMMA == 1) + , + radiationMask +#endif + >; + + /*########################### end particle attributes ########################*/ + + /*########################### define species #################################*/ + + /*--------------------------- photons -------------------------------------------*/ + + value_identifier(float_X, MassRatioPhotons, 0.0); + value_identifier(float_X, ChargeRatioPhotons, 0.0); + + using ParticleFlagsPhotons = MakeSeq_t< + particlePusher, + shape, + interpolation, + massRatio, + chargeRatio>; + + /* define species photons */ + using PIC_Photons = Particles; + + /*--------------------------- electrons --------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + + using ParticleFlagsElectrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + massRatio, + chargeRatio +#if(ENABLE_SYNCHROTRON_PHOTONS == 1) + , + synchrotronPhotons #endif ->; + >; -/* define species electrons */ -using PIC_Electrons = Particles< - PMACC_CSTRING( "e" ), - ParticleFlagsElectrons, - DefaultParticleAttributes ->; + /* define species electrons */ + using PIC_Electrons = Particles; -/*########################### end species ####################################*/ + /*########################### end species ####################################*/ -using VectorAllSpecies = MakeSeq_t< - PIC_Electrons -#if( ENABLE_SYNCHROTRON_PHOTONS == 1 ) - , PIC_Photons + using VectorAllSpecies = MakeSeq_t< + PIC_Electrons +#if(ENABLE_SYNCHROTRON_PHOTONS == 1) + , + PIC_Photons #endif ->; + >; -} //namespace picongpu +} // namespace picongpu diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/Bunch/include/picongpu/param/speciesInitialization.param index e340ab48da..3693d1c211 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/speciesInitialization.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/speciesInitialization.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,31 +33,19 @@ namespace picongpu { -namespace particles -{ - /** InitPipeline define in which order species are initialized - * - * the functors are called in order (from first to last functor) - */ - using InitPipeline = bmpl::vector< + namespace particles + { + /** InitPipeline define in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< #ifdef PARAM_SINGLE_PARTICLE - CreateDensity< - densityProfiles::FreeFormula, - startPosition::OnePosition, - PIC_Electrons - >, + CreateDensity, #else - CreateDensity< - densityProfiles::GaussianCloud, - startPosition::Random, - PIC_Electrons - >, + CreateDensity, #endif - Manipulate< - manipulators::AssignYDriftNegative, - PIC_Electrons - > - >; + Manipulate>; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/Bunch/include/picongpu/param/starter.param b/share/picongpu/examples/Bunch/include/picongpu/param/starter.param index fb40c2ab70..e05a8715b6 100644 --- a/share/picongpu/examples/Bunch/include/picongpu/param/starter.param +++ b/share/picongpu/examples/Bunch/include/picongpu/param/starter.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Richard Pausch +/* Copyright 2013-2021 Richard Pausch * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once @@ -26,9 +25,5 @@ namespace picongpu { namespace defaultPIConGPU { - } -} - - - +} // namespace picongpu diff --git a/share/picongpu/examples/Empty/etc/picongpu/1.cfg b/share/picongpu/examples/Empty/etc/picongpu/1.cfg index 4b01125bae..1d1945c8e9 100644 --- a/share/picongpu/examples/Empty/etc/picongpu/1.cfg +++ b/share/picongpu/examples/Empty/etc/picongpu/1.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/FieldAbsorberTest/cmakeFlags b/share/picongpu/examples/FieldAbsorberTest/cmakeFlags index bc530b7f09..89a833122d 100755 --- a/share/picongpu/examples/FieldAbsorberTest/cmakeFlags +++ b/share/picongpu/examples/FieldAbsorberTest/cmakeFlags @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch, Sergei Bastrakov +# Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch, Sergei Bastrakov # # This file is part of PIConGPU. # @@ -30,62 +30,68 @@ # - increase by 1, no gaps # Test that exponential damping compiles in 3D and 2D -flags[0]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=8'" -flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=8;-DPARAM_DIMENSION=DIM2'" +flags[0]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=10'" +flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=10;-DPARAM_DIMENSION=DIM2'" +# Test that arbitrary-order solver compiles in 3D and 2D +flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=ArbitraryOrderFDTD;-DPARAM_ABSORBER_SIZE=10'" +flags[3]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=ArbitraryOrderFDTD;-DPARAM_ABSORBER_SIZE=10;-DPARAM_DIMENSION=DIM2'" # Test that PML compiles in 3D and 2D -flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=8;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2'" -flags[3]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=8;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" +flags[4]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2'" +flags[5]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" +# Test that arbitrary-order solver with PML absorbing boundary conditions compiles in 3D and 2D +flags[6]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=ArbitraryOrderFDTDPML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2'" +flags[7]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=ArbitraryOrderFDTDPML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" # The following tests are for absorber performance and demonstration of # reasonable parameters, commented out to make compile-time tests faster # Exponential damping in 3D, default strength -#flags[4]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=12'" -#flags[5]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=16'" -#flags[6]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=24'" -#flags[7]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=32'" +#flags[8]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=12'" +#flags[9]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=16'" +#flags[10]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=24'" +#flags[11]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=32'" # Exponential damping in 2D, default strength -#flags[8]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=12;-DPARAM_DIMENSION=DIM2'" -#flags[9]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=16;-DPARAM_DIMENSION=DIM2'" -#flags[10]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=24;-DPARAM_DIMENSION=DIM2'" -#flags[11]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=32;-DPARAM_DIMENSION=DIM2'" +#flags[12]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=12;-DPARAM_DIMENSION=DIM2'" +#flags[13]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=16;-DPARAM_DIMENSION=DIM2'" +#flags[14]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=24;-DPARAM_DIMENSION=DIM2'" +#flags[15]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Yee;-DPARAM_ABSORBER_SIZE=32;-DPARAM_DIMENSION=DIM2'" # Convolutional PML in 3D, default strength, no stretching -#flags[12]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=6;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2'" -#flags[13]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2'" -#flags[14]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=12;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2'" -#flags[15]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2'" +#flags[16]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=6;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2'" +#flags[17]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2'" +#flags[18]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=12;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2'" +#flags[19]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2'" # Convolutional PML in 3D, default strength, stretching -#flags[16]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=8;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.2'" -#flags[17]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.2'" +#flags[20]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.2'" +#flags[21]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.2'" # Convolutional PML in 2D, default strength, no stretching -#flags[18]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=6;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" -#flags[19]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" -#flags[20]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=12;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" -#flags[21]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" +#flags[22]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=6;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" +#flags[23]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" +#flags[24]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=12;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" +#flags[25]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" # Convolutional PML in 2D, default strength, stretching -#flags[22]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=8;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" -#flags[23]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" +#flags[26]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" +#flags[27]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.2;-DPARAM_DIMENSION=DIM2'" -# Unixaxial PML in 3D, default strength, no stretching -#flags[24]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=8;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.0'" -#flags[25]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.0'" +# Uniaxial PML in 3D, default strength, no stretching +#flags[28]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.0'" +#flags[29]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.0'" -# Unixaxial PML in 3D, default strength, stretching -#flags[26]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=8;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.0'" -#flags[27]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.0'" +# Uniaxial PML in 3D, default strength, stretching +#flags[30]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.0'" +#flags[31]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.0'" -# Unixaxial PML in 2D, default strength, no stretching -#flags[28]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=8;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.0;-DPARAM_DIMENSION=DIM2'" -#flags[29]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.0;-DPARAM_DIMENSION=DIM2'" +# Uniaxial PML in 2D, default strength, no stretching +#flags[32]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.0;-DPARAM_DIMENSION=DIM2'" +#flags[33]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=1.0;-DPARAM_PML_ALPHA_MAX=0.0;-DPARAM_DIMENSION=DIM2'" -# Unixaxial PML in 2D, default strength, stretching -#flags[30]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=8;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.0;-DPARAM_DIMENSION=DIM2'" -#flags[31]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.0;-DPARAM_DIMENSION=DIM2'" +# Uniaxial PML in 2D, default strength, stretching +#flags[34]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=10;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.0;-DPARAM_DIMENSION=DIM2'" +#flags[35]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=YeePML;-DPARAM_PML_SIZE=16;-DPARAM_PML_KAPPA_MAX=10.0;-DPARAM_PML_ALPHA_MAX=0.0;-DPARAM_DIMENSION=DIM2'" ################################################################################ diff --git a/share/picongpu/examples/FieldAbsorberTest/etc/picongpu/1.cfg b/share/picongpu/examples/FieldAbsorberTest/etc/picongpu/1.cfg index b7d014e716..dc0dabefbb 100644 --- a/share/picongpu/examples/FieldAbsorberTest/etc/picongpu/1.cfg +++ b/share/picongpu/examples/FieldAbsorberTest/etc/picongpu/1.cfg @@ -1,5 +1,4 @@ -# Copyright 2013-2020 Heiko Burau, Rene Widera, Felix Schmitt, Axel Huebl, -# Sergei Bastrakov +# Copyright 2013-2021 Heiko Burau, Rene Widera, Felix Schmitt, Axel Huebl, Sergei Bastrakov # # This file is part of PIConGPU. # @@ -37,9 +36,9 @@ TBG_devices_x=1 TBG_devices_y=1 TBG_devices_z=1 -# When changing the number of cells consider changing sourceIdx -# in FieldBackgroundJ::operator() -TBG_numCells=128 +# When changing the number of cells consider changing positionX, positionY in FieldBackgroundJ::operator(). +# To match the setup from the Taflove book, the size should be 40 + PML size min border + PML size max border. +TBG_numCells=60 TBG_gridSize="!TBG_numCells !TBG_numCells !TBG_numCells" TBG_steps="1000" @@ -48,7 +47,12 @@ TBG_steps="1000" ## Section: Optional Variables ## ################################# -TBG_plugins="--fields_energy.period 10 --hdf5.period 10 --hdf5.file simData" +# file I/O with openPMD-HDF5 +TBG_openPMD="--openPMD.period 100 \ + --openPMD.file simData \ + --openPMD.ext h5" + +TBG_plugins="--fields_energy.period 10 !TBG_openPMD" ################################# diff --git a/share/picongpu/examples/FieldAbsorberTest/etc/picongpu/4.cfg b/share/picongpu/examples/FieldAbsorberTest/etc/picongpu/4.cfg new file mode 100644 index 0000000000..e3ffe8b2cf --- /dev/null +++ b/share/picongpu/examples/FieldAbsorberTest/etc/picongpu/4.cfg @@ -0,0 +1,74 @@ +# Copyright 2013-2021 Heiko Burau, Rene Widera, Felix Schmitt, Axel Huebl, Sergei Bastrakov +# +# This file is part of PIConGPU. +# +# PIConGPU is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PIConGPU is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PIConGPU. +# If not, see . +# + +## +## This configuration file is used by PIConGPU's TBG tool to create a +## batch script for PIConGPU runs. For a detailed description of PIConGPU +## configuration files including all available variables, see +## +## docs/TBG_macros.cfg +## + + +################################# +## Section: Required Variables ## +################################# + +TBG_wallTime="0:10:00" + +# This setup does not need multiple MPI ranks for performance, merely to test that it works +TBG_devices_x=2 +TBG_devices_y=2 +TBG_devices_z=1 + +# When changing the number of cells consider changing positionX, positionY in FieldBackgroundJ::operator(). +# To match the setup from the Taflove book, the size should be 40 + PML size min border + PML size max border. +TBG_numCells=60 +TBG_gridSize="!TBG_numCells !TBG_numCells !TBG_numCells" +TBG_steps="1000" + + +################################# +## Section: Optional Variables ## +################################# + +# file I/O with openPMD-HDF5 +TBG_openPMD="--openPMD.period 100 \ + --openPMD.file simData \ + --openPMD.ext h5" + +TBG_plugins="--fields_energy.period 10 !TBG_openPMD" + + +################################# +## Section: Program Parameters ## +################################# + +TBG_deviceDist="!TBG_devices_x !TBG_devices_y !TBG_devices_z" + +TBG_programParams="-d !TBG_deviceDist \ + -g !TBG_gridSize \ + -s !TBG_steps \ + !TBG_plugins \ + --versionOnce" + +# TOTAL number of devices +TBG_tasks="$(( TBG_devices_x * TBG_devices_y * TBG_devices_z ))" + +"$TBG_cfgPath"/submitAction.sh diff --git a/share/picongpu/examples/FieldAbsorberTest/etc/picongpu/8.cfg b/share/picongpu/examples/FieldAbsorberTest/etc/picongpu/8.cfg deleted file mode 100644 index 51481df696..0000000000 --- a/share/picongpu/examples/FieldAbsorberTest/etc/picongpu/8.cfg +++ /dev/null @@ -1,69 +0,0 @@ -# Copyright 2013-2020 Heiko Burau, Rene Widera, Felix Schmitt, Axel Huebl, -# Sergei Bastrakov -# -# This file is part of PIConGPU. -# -# PIConGPU is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# PIConGPU is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with PIConGPU. -# If not, see . -# - -## -## This configuration file is used by PIConGPU's TBG tool to create a -## batch script for PIConGPU runs. For a detailed description of PIConGPU -## configuration files including all available variables, see -## -## docs/TBG_macros.cfg -## - - -################################# -## Section: Required Variables ## -################################# - -TBG_wallTime="0:10:00" - -TBG_devices_x=2 -TBG_devices_y=2 -TBG_devices_z=2 - -# When changing the number of cells consider changing sourceIdx -# in FieldBackgroundJ::operator() -TBG_numCells=128 -TBG_gridSize="!TBG_numCells !TBG_numCells !TBG_numCells" -TBG_steps="1000" - - -################################# -## Section: Optional Variables ## -################################# - -TBG_plugins="--fields_energy.period 10 --hdf5.period 10 --hdf5.file simData" - - -################################# -## Section: Program Parameters ## -################################# - -TBG_deviceDist="!TBG_devices_x !TBG_devices_y !TBG_devices_z" - -TBG_programParams="-d !TBG_deviceDist \ - -g !TBG_gridSize \ - -s !TBG_steps \ - !TBG_plugins \ - --versionOnce" - -# TOTAL number of devices -TBG_tasks="$(( TBG_devices_x * TBG_devices_y * TBG_devices_z ))" - -"$TBG_cfgPath"/submitAction.sh diff --git a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/dimension.param b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/dimension.param index 0881e9884b..efb7c42757 100644 --- a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/dimension.param +++ b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/dimension.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Rene Widera, Richard Pausch +/* Copyright 2014-2021 Axel Huebl, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -20,7 +20,7 @@ #pragma once #ifndef PARAM_DIMENSION -#define PARAM_DIMENSION DIM3 +# define PARAM_DIMENSION DIM3 #endif #define SIMDIM PARAM_DIMENSION diff --git a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fieldBackground.param b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fieldBackground.param index 7575bbf10a..8465084d0f 100644 --- a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fieldBackground.param +++ b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fieldBackground.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Alexander Debus +/* Copyright 2014-2021 Axel Huebl, Alexander Debus, Klaus Steiniger, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -17,11 +17,13 @@ * If not, see . */ -#pragma once - -/** Load external background fields +/** @file fieldBackground.param * + * Load external background fields */ + +#pragma once + namespace picongpu { class FieldBackgroundE @@ -31,31 +33,23 @@ namespace picongpu static constexpr bool InfluenceParticlePusher = true; /* We use this to calculate your SI input back to our unit system */ - PMACC_ALIGN( - m_unitField, - const float3_64 - ); + PMACC_ALIGN(m_unitField, const float3_64); - HDINLINE FieldBackgroundE( const float3_64 unitField ) : - m_unitField( unitField ) - {} + HDINLINE FieldBackgroundE(const float3_64 unitField) : m_unitField(unitField) + { + } /** Specify your background field E(r,t) here * * \param cellIdx The total cell id counted from the start at t = 0 * \param currentStep The current time step */ - HDINLINE float3_X - operator()( + HDINLINE float3_X operator()( const DataSpace& /*cellIdx*/, const uint32_t /*currentStep*/ ) const { /* specify your E-Field in V/m and convert to PIConGPU units */ - return float3_X( - 0.0, - 0.0, - 0.0 - ); + return float3_X(0.0, 0.0, 0.0); } }; @@ -66,31 +60,23 @@ namespace picongpu static constexpr bool InfluenceParticlePusher = true; /* We use this to calculate your SI input back to our unit system */ - PMACC_ALIGN( - m_unitField, - const float3_64 - ); + PMACC_ALIGN(m_unitField, const float3_64); - HDINLINE FieldBackgroundB( const float3_64 unitField ) : - m_unitField( unitField ) - {} + HDINLINE FieldBackgroundB(const float3_64 unitField) : m_unitField(unitField) + { + } /** Specify your background field B(r,t) here * * \param cellIdx The total cell id counted from the start at t=0 * \param currentStep The current time step */ - HDINLINE float3_X - operator()( + HDINLINE float3_X operator()( const DataSpace& /*cellIdx*/, const uint32_t /*currentStep*/ ) const { /* specify your B-Field in T and convert to PIConGPU units */ - return float3_X( - 0.0, - 0.0, - 0.0 - ); + return float3_X(0.0, 0.0, 0.0); } }; @@ -100,55 +86,59 @@ namespace picongpu /* Add this additional field? */ static constexpr bool activated = true; + /* This setup is based on [Taflove, Hagness], section 7.11.1. + * The difference is we consider both 2D and 3D cases, and grid size may be increased due to our absorber being + * part of the simulation area, not located outside of it as in the book. + * + * Example of a rectangular conductor with a steady current. + * + * The conductor is oriented along the y-axis. + * Its edge length can be adjusted by the variable halfWidth in order to apply the test with meaningful results + * to higher-order solvers, too. + * The current in the wire ramps up over time according to a differentiated Gaussian. + * This defines the current density amplitude, too. + * Therefore, the total current through the wire scales with the wire's halfWidth. + */ + + //! Conductor is oriented along y-axis with the following coordinates and size, values for 60 cells in the grid + static constexpr int32_t positionX = 30; // unit: cells + static constexpr int32_t positionY = 30; // unit: cells + // We support non-unit source for high order field solver + static constexpr int32_t halfWidth = 1; // unit: cells + + //! Amplitude in terms of current density in SI + float_X amplitudeSI = -2._X; // unit: A / m^2 + /* We use this to calculate your SI input back to our unit system */ - PMACC_ALIGN( - m_unitField, - const float3_64 - ); + PMACC_ALIGN(m_unitField, const float3_64); - HDINLINE FieldBackgroundJ( const float3_64 unitField ) : - m_unitField(unitField) - {} + HDINLINE FieldBackgroundJ(const float3_64 unitField) : m_unitField(unitField) + { + } /** Specify your background field J(r,t) here * * \param cellIdx The total cell id counted from the start at t=0 - * \param currentStep The current time step */ - HDINLINE float3_X - operator()( - const DataSpace& cellIdx, - const uint32_t currentStep - ) const + * \param currentStep The current time step + */ + HDINLINE float3_X operator()(const DataSpace& cellIdx, const uint32_t currentStep) const { - /* Source index is hard-coded, should be in the center of the global - * domain, so has to be changed together with the grid size. - */ - DataSpace< simDim > const sourceIdx = - DataSpace< simDim >::create( 64u ); - if( cellIdx != sourceIdx ) - return float3_X( - 0.0, - 0.0, - 0.0 - ); - - /* This setup is based on [Taflove, Hagness], section 7.11.1 - * The difference is we consider both 2D and 3D cases, - * and grid size may be increased due to our absorber being part of - * the simulation area, not located outside of it as in the book. - */ - constexpr float_X duration_SI = 26.53e-12; // 26.53 ps - constexpr float_X delay_SI = 4.0_X * duration_SI; - float_X const time_SI = currentStep * SI::DELTA_T_SI; - float_X const normalizedTime = ( time_SI - delay_SI ) / duration_SI; - float_X const value = -2.0 * normalizedTime * - math::exp( -normalizedTime * normalizedTime ); - /* specify your J-Field in A/m^2 and convert to PIConGPU units */ + /* specify J-Field */ + float_X currentDensity = 0.0_X; + + if(math::abs(float_X(static_cast(cellIdx.x()) - positionX) + .5_X) < halfWidth + && math::abs(float_X(static_cast(cellIdx.y()) - positionY) + .5_X) < halfWidth) + { + float_X const duration = 26.53e-12 / SI::DELTA_T_SI; // 26.53 ps in PIC units + float_X const delay = 4._X * duration; + float_X const relativeTime = (static_cast(currentStep) - delay) / duration; + currentDensity = amplitudeSI * relativeTime * math::exp(-relativeTime * relativeTime); + } + return float3_X( - 0.0, - 0.0, - value / m_unitField[1] - ); + 0.0_X, + currentDensity / m_unitField[1], // unit: none + 0.0_X); } }; diff --git a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fieldSolver.param b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fieldSolver.param index 5448e5bdaf..7973dab9cc 100644 --- a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fieldSolver.param +++ b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fieldSolver.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Sergei Bastrakov +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Sergei Bastrakov, Klaus Steiniger * * This file is part of PIConGPU. * @@ -25,6 +25,11 @@ * * Also allows to configure ad hoc mitigations for high frequency * noise in some setups via current smoothing. + * + * \attention + * Currently, the laser initialization in PIConGPU is implemented to work with the standard Yee solver. + * Using a solver of higher order will result in a slightly increased laser amplitude and energy than expected. + * */ #pragma once @@ -32,48 +37,58 @@ #include "picongpu/fields/MaxwellSolver/Solvers.def" #include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" +#include + namespace picongpu { -namespace fields -{ - - /** Current Interpolation - * - * CurrentInterpolation is used to set a method performing the - * interpolate/assign operation from the generated currents of particle - * species to the electro-magnetic fields. - * - * Allowed values are: - * - None: - * - default for staggered grids/Yee-scheme - * - updates E - * - Binomial: 2nd order Binomial filter - * - smooths the current before assignment in staggered grid - * - updates E & breaks local charge conservation slightly - * - NoneDS: - * - experimental assignment for all-centered/directional splitting - * - updates E & B at the same time - */ - using CurrentInterpolation = currentInterpolation::None; + namespace fields + { + /** Current Interpolation + * + * CurrentInterpolation is used to set a method performing the + * interpolate/assign operation from the generated currents of particle + * species to the electro-magnetic fields. + * + * Allowed values are: + * - None: + * - default for staggered grids/Yee-scheme + * - updates E + * - Binomial: 2nd order Binomial filter + * - smooths the current before assignment in staggered grid + * - updates E & breaks local charge conservation slightly + */ + using CurrentInterpolation = currentInterpolation::None; - /** FieldSolver - * - * Field Solver Selection: - * - Yee< CurrentInterpolation >: standard Yee solver - * - YeePML< CurrentInterpolation >: standard Yee solver with PML absorber - * - Lehe< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction - * - DirSplitting< CurrentInterpolation >: Sentoku's Directional Splitting Method - * - None< CurrentInterpolation >: disable the vacuum update of E and B - */ + /** FieldSolver + * + * Field Solver Selection: + * - Yee< CurrentInterpolation > : Standard Yee solver approximating derivatives with respect to time and + * space by second order finite differences. + * - YeePML< CurrentInterpolation >: Standard Yee solver using Perfectly Matched Layer Absorbing Boundary + * Conditions (PML) + * - Lehe< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction + * - LehePML< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction + * using Perfectly Matched Layer Absorbing Boundary Conditions (PML) + * - ArbitraryOrderFDTD< 4, CurrentInterpolation >: Solver using 4 neighbors to each direction to approximate + * *spatial* derivatives by finite differences. The number of neighbors can be changed from 4 to any positive, + * integer number. The order of the solver will be twice the number of neighbors in each direction. Yee's + * method is a special case of this using one neighbor to each direction. + * - ArbitraryOrderFDTDPML< 4, CurrentInterpolation >: ArbitraryOrderFDTD solver using Perfectly Matched Layer + * Absorbing Boundary Conditions (PML) + * - None< CurrentInterpolation >: disable the vacuum update of E and B + */ #ifndef PARAM_FIELDSOLVER - /* WARNING: if you change field solver by hand please update your CELL_WIDTH_SI - * in `grid.param` to fulfill the convergence condition (CFL) - */ -# define PARAM_FIELDSOLVER Yee + /* WARNING: if you change field solver by hand please update your CELL_WIDTH_SI + * in `grid.param` to fulfill the convergence condition (CFL) + */ +# define SELECTED_FIELD_SOLVER Yee +#else +# define FOURTH_ORDER 4 BOOST_PP_COMMA() CurrentInterpolation +# define SELECTED_FIELD_SOLVER PARAM_FIELDSOLVER #endif - using Solver = maxwellSolver::PARAM_FIELDSOLVER< CurrentInterpolation >; + using Solver = maxwellSolver::SELECTED_FIELD_SOLVER; -} // namespace fields + } // namespace fields } // namespace picongpu diff --git a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fileOutput.param b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fileOutput.param index a486a87cfe..e703d1ccad 100644 --- a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fileOutput.param +++ b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/fileOutput.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, * Benjamin Worpitz, Richard Pausch * * This file is part of PIConGPU. @@ -63,33 +63,23 @@ namespace picongpu namespace deriveField = particles::particleToGrid; /* ChargeDensity section */ - using ChargeDensity_Seq = deriveField::CreateEligible_t< - VectorAllSpecies, - deriveField::derivedAttributes::ChargeDensity - >; + using ChargeDensity_Seq + = deriveField::CreateEligible_t; /** FieldTmpSolvers groups all solvers that create data for FieldTmp ****** * * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size */ - using FieldTmpSolvers = MakeSeq_t< - ChargeDensity_Seq - >; + using FieldTmpSolvers = MakeSeq_t; /** FileOutputFields: Groups all Fields that shall be dumped *************/ /** Possible native fields: FieldE, FieldB, FieldJ */ - using NativeFileOutputFields = MakeSeq_t< - FieldE, - FieldB, - FieldJ - >; + using NativeFileOutputFields = MakeSeq_t; - using FileOutputFields = MakeSeq_t< - NativeFileOutputFields - >; + using FileOutputFields = MakeSeq_t; /** FileOutputParticles: Groups all Species that shall be dumped ********** @@ -97,6 +87,6 @@ namespace picongpu * hint: to disable particle output set to * using FileOutputParticles = MakeSeq_t< >; */ - using FileOutputParticles = MakeSeq_t< >; + using FileOutputParticles = MakeSeq_t<>; -} +} // namespace picongpu diff --git a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/grid.param b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/grid.param index 335fd8c1d4..2de7f59db7 100644 --- a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/grid.param +++ b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz, Sergei Bastrakov +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, Sergei Bastrakov * * This file is part of PIConGPU. * @@ -18,15 +18,12 @@ */ - #pragma once namespace picongpu { - namespace SI { - /** This setup is based on section 7.11.1 of * A. Taflove, S.C. Hagness. Computational Electrodynamics * The Finite-Difference Time-Domain Method. 3rd Edition. @@ -66,27 +63,27 @@ namespace picongpu * in fields with perfect symmetry in Z. */ - } //namespace SI + } // namespace SI //! Define the size of the absorbing zone (in cells) for both exponential //! absorber and PML, a compile-time parameter #ifndef PARAM_ABSORBER_SIZE -# define PARAM_ABSORBER_SIZE 8 +# define PARAM_ABSORBER_SIZE 8 #endif constexpr uint32_t ABSORBER_SIZE = PARAM_ABSORBER_SIZE; constexpr uint32_t ABSORBER_CELLS[3][2] = { - {ABSORBER_SIZE, ABSORBER_SIZE}, /*x direction [negative,positive]*/ - {ABSORBER_SIZE, ABSORBER_SIZE}, /*y direction [negative,positive]*/ - {ABSORBER_SIZE, ABSORBER_SIZE} /*z direction [negative,positive]*/ - }; //unit: number of cells + {ABSORBER_SIZE, ABSORBER_SIZE}, /*x direction [negative,positive]*/ + {ABSORBER_SIZE, ABSORBER_SIZE}, /*y direction [negative,positive]*/ + {ABSORBER_SIZE, ABSORBER_SIZE} /*z direction [negative,positive]*/ + }; // unit: number of cells //! Define the strength of the exponential absorber only constexpr float_X ABSORBER_STRENGTH_VALUE = 1.0e-3; constexpr float_X ABSORBER_STRENGTH[3][2] = { {ABSORBER_STRENGTH_VALUE, ABSORBER_STRENGTH_VALUE}, /*x direction [negative,positive]*/ {ABSORBER_STRENGTH_VALUE, ABSORBER_STRENGTH_VALUE}, /*y direction [negative,positive]*/ - {ABSORBER_STRENGTH_VALUE, ABSORBER_STRENGTH_VALUE} /*z direction [negative,positive]*/ - }; //unit: none + {ABSORBER_STRENGTH_VALUE, ABSORBER_STRENGTH_VALUE} /*z direction [negative,positive]*/ + }; // unit: none /** When to move the co-moving window. * An initial pseudo particle, flying with the speed of light, @@ -105,4 +102,4 @@ namespace picongpu */ constexpr float_64 movePoint = 0.90; -} +} // namespace picongpu diff --git a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/memory.param b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/memory.param new file mode 100644 index 0000000000..c128523630 --- /dev/null +++ b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/memory.param @@ -0,0 +1,103 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Define low-level memory settings for compute devices. + * + * Settings for memory layout for supercells and particle frame-lists, + * data exchanges in multi-device domain-decomposition and reserved + * fields for temporarily derived quantities are defined here. + */ + +#pragma once +#include +#include + + +namespace picongpu +{ + /* We have to hold back 350MiB for gpu-internal operations: + * - random number generator + * - reduces + * - ... + */ + constexpr size_t reservedGpuMemorySize = 350 * 1024 * 1024; + + /* short namespace*/ + namespace mCT = pmacc::math::CT; + /** size of a superCell + * + * volume of a superCell must be <= 1024. + * This setup may use local grid size that is a multiple of 2 along x, y. + */ + using SuperCellSize = typename mCT::shrinkTo, simDim>::type; + + /** define mapper which is used for kernel call mappings */ + using MappingDesc = MappingDescription; + + /** define the size of the core, border and guard area + * + * PIConGPU uses spatial domain-decomposition for parallelization + * over multiple devices with non-shared memory architecture. + * The global spatial domain is organized per device in three + * sections: the GUARD area contains copies of neighboring + * devices (also known as "halo"/"ghost"). + * The BORDER area is the outermost layer of cells of a device, + * equally to what neighboring devices see as GUARD area. + * The CORE area is the innermost area of a device. In union with + * the BORDER area it defines the "active" spatial domain on a device. + * + * GuardSize is defined in units of SuperCellSize per dimension. + * This setup may need several guard supercells along x, y for the arbitrary order field solver. + * Also, Esirkepov current deposition requires at least 2 supercells when supercell size is 2. + */ + using GuardSize = typename mCT::shrinkTo, simDim>::type; + + /** bytes reserved for species exchange buffer + * + * This is the default configuration for species exchanges buffer sizes. + * The default exchange buffer sizes can be changed per species by adding + * the alias exchangeMemCfg with similar members like in DefaultExchangeMemCfg + * to its flag list. + */ + struct DefaultExchangeMemCfg + { + // memory used for a direction + static constexpr uint32_t BYTES_EXCHANGE_X = 1 * 1024 * 1024; // 1 MiB + static constexpr uint32_t BYTES_EXCHANGE_Y = 3 * 1024 * 1024; // 3 MiB + static constexpr uint32_t BYTES_EXCHANGE_Z = 1 * 1024 * 1024; // 1 MiB + static constexpr uint32_t BYTES_EDGES = 32 * 1024; // 32 kiB + static constexpr uint32_t BYTES_CORNER = 8 * 1024; // 8 kiB + }; + + /** number of scalar fields that are reserved as temporary fields */ + constexpr uint32_t fieldTmpNumSlots = 1; + + /** can `FieldTmp` gather neighbor information + * + * If `true` it is possible to call the method `asyncCommunicationGather()` + * to copy data from the border of neighboring GPU into the local guard. + * This is also known as building up a "ghost" or "halo" region in domain + * decomposition and only necessary for specific algorithms that extend + * the basic PIC cycle, e.g. with dependence on derived density or energy fields. + */ + constexpr bool fieldTmpSupportGatherCommunication = true; + +} // namespace picongpu diff --git a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/pml.param b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/pml.param index 687848479c..68e2d8d6bd 100644 --- a/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/pml.param +++ b/share/picongpu/examples/FieldAbsorberTest/include/picongpu/param/pml.param @@ -1,4 +1,4 @@ -/* Copyright 2019-2020 Sergei Bastrakov +/* Copyright 2019-2021 Sergei Bastrakov * * This file is part of PIConGPU. * @@ -21,7 +21,7 @@ * * Configure the perfectly matched layer (PML). * - * To enable PML use YeePML field solver. + * To enable PML use YeePML, LehePML or ArbitraryOrderFDTDPML field solver. */ #pragma once @@ -29,140 +29,129 @@ namespace picongpu { -namespace fields -{ -namespace maxwellSolver -{ -namespace yeePML -{ - - /* The parameters in this file are only used if field solver is YeePML. - * The original paper on this approach is J.A. Roden, S.D. Gedney. - * Convolution PML (CPML): An efficient FDTD implementation of the CFS - PML - * for arbitrary media. Microwave and optical technology letters. 27 (5), - * 334-339 (2000). - * https://doi.org/10.1002/1098-2760(20001205)27:5%3C334::AID-MOP14%3E3.0.CO;2-A - * Our implementation based on a more detailed description in section 7.9 of - * the book A. Taflove, S.C. Hagness. Computational Electrodynamics. - * The Finite-Difference Time-Domain Method. Third Edition. Artech house, - * Boston (2005), referred to as [Taflove, Hagness]. - */ - -# ifndef PARAM_PML_SIZE -# define PARAM_PML_SIZE 8 -# endif - - constexpr uint32_t THICKNESS = PARAM_PML_SIZE; - - /** Thickness of the absorbing layer, in number of cells - * - * PML is located inside the global simulation area, near the outer borders. - * Setting size to 0 results in disabling absorption at the corresponding - * boundary. Normally thickness is between 6 and 16 cells, with larger - * values providing less reflections. - * 8 cells should be good enough for most simulations. There are no - * requirements on thickness being a multiple of the supercell size. - * It is only required that PML is small enough to fit near-boundary local - * domains at all time steps. - * Unit: number of cells. - */ - constexpr uint32_t NUM_CELLS[ 3 ][ 2 ] = { - { THICKNESS, THICKNESS }, // x direction [negative, positive] - { THICKNESS, THICKNESS }, // y direction [negative, positive] - { THICKNESS, THICKNESS } // z direction [negative, positive] - }; - - /** Order of polynomial grading for artificial electric conductivity and - * stretching coefficient - * - * The conductivity (sigma) is polynomially scaling from 0 at the internal - * border of PML to the maximum value (defined below) at the external - * border. The stretching coefficient (kappa) scales from 1 to the - * corresponding maximum value (defined below) with the same polynomial. - * The grading is given in [Taflove, Hagness], eq. (7.60a, b), with - * the order denoted 'm'. - * Must be >= 0. Normally between 3 and 4, not required to be integer. - * Unitless. - */ - constexpr float_64 SIGMA_KAPPA_GRADING_ORDER = 4.0; - - // [Taflove, Hagness], eq. (7.66) - constexpr float_64 SIGMA_OPT_SI[ 3 ] = { - 0.8 * ( SIGMA_KAPPA_GRADING_ORDER + 1.0 ) / ( SI::Z0_SI * SI::CELL_WIDTH_SI ), - 0.8 * ( SIGMA_KAPPA_GRADING_ORDER + 1.0 ) / ( SI::Z0_SI * SI::CELL_HEIGHT_SI ), - 0.8 * ( SIGMA_KAPPA_GRADING_ORDER + 1.0 ) / ( SI::Z0_SI * SI::CELL_DEPTH_SI ) - }; - - // Muptiplier to express SIGMA_MAX_SI with SIGMA_OPT_SI - constexpr float_64 SIGMA_OPT_MULTIPLIER = 1.0; - - /** Max value of artificial electric conductivity in PML - * - * Components correspond to directions: element 0 corresponds to absorption - * along x direction, 1 = y, 2 = z. Grading is described in comments for - * SIGMA_KAPPA_GRADING_ORDER. - * Too small values lead to significant reflections from the external - * border, too large - to reflections due to discretization errors. - * Artificial magnetic permeability will be chosen to perfectly match this. - * Must be >= 0. Normally between 0.7 * SIGMA_OPT_SI and 1.1 * SIGMA_OPT_SI. - * Unit: siemens / m. - */ - constexpr float_64 SIGMA_MAX_SI[ 3 ] = { - SIGMA_OPT_SI[ 0 ] * SIGMA_OPT_MULTIPLIER, - SIGMA_OPT_SI[ 1 ] * SIGMA_OPT_MULTIPLIER, - SIGMA_OPT_SI[ 2 ] * SIGMA_OPT_MULTIPLIER - }; - - /** Max value of coordinate stretching coefficient in PML - * - * Components correspond to directions: element 0 corresponds to absorption - * along x direction, 1 = y, 2 = z. Grading is described in comments for - * SIGMA_KAPPA_GRADING_ORDER. - * Must be >= 1. For relatively homogeneous domains 1.0 is a reasonable value. - * Highly elongated domains can have better absorption with values between - * 7.0 and 20.0, for example, see section 7.11.2 in [Taflove, Hagness]. - * Unitless. - */ -# ifndef PARAM_PML_KAPPA_MAX -# define PARAM_PML_KAPPA_MAX 1.0 -# endif - constexpr float_64 KAPPA_MAX[ 3 ] = { - PARAM_PML_KAPPA_MAX, - PARAM_PML_KAPPA_MAX, - PARAM_PML_KAPPA_MAX - }; - - /** Order of polynomial grading for complex frequency shift - * - * The complex frequency shift (alpha) is polynomially downscaling from the - * maximum value (defined below) at the internal border of PML to 0 at the - * external border. The grading is given in [Taflove, Hagness], eq. (7.79), - * with the order denoted 'm_a'. - * Must be >= 0. Normally values are around 1.0. - * Unitless. - */ - constexpr float_64 ALPHA_GRADING_ORDER = 1.0; - - /** Complex frequency shift in PML - * - * Components correspond to directions: element 0 corresponds to absorption - * along x direction, 1 = y, 2 = z. Setting it to 0 will make PML behave - * as uniaxial PML. Setting it to a positive value helps to attenuate - * evanescent modes, but can degrade absorption of propagating modes, as - * described in section 7.7 and 7.11.3 in [Taflove, Hagness]. - * Must be >= 0. Normally values are 0 or between 0.15 and 0.3. - * Unit: siemens / m. - */ -# ifndef PARAM_PML_ALPHA_MAX -# define PARAM_PML_ALPHA_MAX 0.2 -# endif - constexpr float_64 ALPHA_MAX_SI[ 3 ] = { - PARAM_PML_ALPHA_MAX, - PARAM_PML_ALPHA_MAX, - PARAM_PML_ALPHA_MAX - }; - -} // namespace yeePML -} // namespace maxwellSolver -} // namespace fields + namespace fields + { + namespace maxwellSolver + { + namespace Pml + { + /* The parameters in this file are only used if field solver is YeePML or LehePML. + * The original paper on this approach is J.A. Roden, S.D. Gedney. + * Convolution PML (CPML): An efficient FDTD implementation of the CFS - PML + * for arbitrary media. Microwave and optical technology letters. 27 (5), + * 334-339 (2000). + * https://doi.org/10.1002/1098-2760(20001205)27:5%3C334::AID-MOP14%3E3.0.CO;2-A + * Our implementation based on a more detailed description in section 7.9 of + * the book A. Taflove, S.C. Hagness. Computational Electrodynamics. + * The Finite-Difference Time-Domain Method. Third Edition. Artech house, + * Boston (2005), referred to as [Taflove, Hagness]. + */ + +#ifndef PARAM_PML_SIZE +# define PARAM_PML_SIZE 10 +#endif + + constexpr uint32_t THICKNESS = PARAM_PML_SIZE; + + /** Thickness of the absorbing layer, in number of cells + * + * PML is located inside the global simulation area, near the outer borders. + * Setting size to 0 results in disabling absorption at the corresponding + * boundary. Normally thickness is between 6 and 16 cells, with larger + * values providing less reflections. + * 8 cells should be good enough for most simulations. There are no + * requirements on thickness being a multiple of the supercell size. + * It is only required that PML is small enough to fit near-boundary local + * domains at all time steps. + * Unit: number of cells. + */ + constexpr uint32_t NUM_CELLS[3][2] = { + {THICKNESS, THICKNESS}, // x direction [negative, positive] + {THICKNESS, THICKNESS}, // y direction [negative, positive] + {THICKNESS, THICKNESS} // z direction [negative, positive] + }; + + /** Order of polynomial grading for artificial electric conductivity and + * stretching coefficient + * + * The conductivity (sigma) is polynomially scaling from 0 at the internal + * border of PML to the maximum value (defined below) at the external + * border. The stretching coefficient (kappa) scales from 1 to the + * corresponding maximum value (defined below) with the same polynomial. + * The grading is given in [Taflove, Hagness], eq. (7.60a, b), with + * the order denoted 'm'. + * Must be >= 0. Normally between 3 and 4, not required to be integer. + * Unitless. + */ + constexpr float_64 SIGMA_KAPPA_GRADING_ORDER = 4.0; + + // [Taflove, Hagness], eq. (7.66) + constexpr float_64 SIGMA_OPT_SI[3] + = {0.8 * (SIGMA_KAPPA_GRADING_ORDER + 1.0) / (SI::Z0_SI * SI::CELL_WIDTH_SI), + 0.8 * (SIGMA_KAPPA_GRADING_ORDER + 1.0) / (SI::Z0_SI * SI::CELL_HEIGHT_SI), + 0.8 * (SIGMA_KAPPA_GRADING_ORDER + 1.0) / (SI::Z0_SI * SI::CELL_DEPTH_SI)}; + + // Muptiplier to express SIGMA_MAX_SI with SIGMA_OPT_SI + constexpr float_64 SIGMA_OPT_MULTIPLIER = 1.0; + + /** Max value of artificial electric conductivity in PML + * + * Components correspond to directions: element 0 corresponds to absorption + * along x direction, 1 = y, 2 = z. Grading is described in comments for + * SIGMA_KAPPA_GRADING_ORDER. + * Too small values lead to significant reflections from the external + * border, too large - to reflections due to discretization errors. + * Artificial magnetic permeability will be chosen to perfectly match this. + * Must be >= 0. Normally between 0.7 * SIGMA_OPT_SI and 1.1 * SIGMA_OPT_SI. + * Unit: siemens / m. + */ + constexpr float_64 SIGMA_MAX_SI[3] + = {SIGMA_OPT_SI[0] * SIGMA_OPT_MULTIPLIER, + SIGMA_OPT_SI[1] * SIGMA_OPT_MULTIPLIER, + SIGMA_OPT_SI[2] * SIGMA_OPT_MULTIPLIER}; + + /** Max value of coordinate stretching coefficient in PML + * + * Components correspond to directions: element 0 corresponds to absorption + * along x direction, 1 = y, 2 = z. Grading is described in comments for + * SIGMA_KAPPA_GRADING_ORDER. + * Must be >= 1. For relatively homogeneous domains 1.0 is a reasonable value. + * Highly elongated domains can have better absorption with values between + * 7.0 and 20.0, for example, see section 7.11.2 in [Taflove, Hagness]. + * Unitless. + */ +#ifndef PARAM_PML_KAPPA_MAX +# define PARAM_PML_KAPPA_MAX 1.0 +#endif + constexpr float_64 KAPPA_MAX[3] = {PARAM_PML_KAPPA_MAX, PARAM_PML_KAPPA_MAX, PARAM_PML_KAPPA_MAX}; + + /** Order of polynomial grading for complex frequency shift + * + * The complex frequency shift (alpha) is polynomially downscaling from the + * maximum value (defined below) at the internal border of PML to 0 at the + * external border. The grading is given in [Taflove, Hagness], eq. (7.79), + * with the order denoted 'm_a'. + * Must be >= 0. Normally values are around 1.0. + * Unitless. + */ + constexpr float_64 ALPHA_GRADING_ORDER = 1.0; + + /** Complex frequency shift in PML + * + * Components correspond to directions: element 0 corresponds to absorption + * along x direction, 1 = y, 2 = z. Setting it to 0 will make PML behave + * as uniaxial PML. Setting it to a positive value helps to attenuate + * evanescent modes, but can degrade absorption of propagating modes, as + * described in section 7.7 and 7.11.3 in [Taflove, Hagness]. + * Must be >= 0. Normally values are 0 or between 0.15 and 0.3. + * Unit: siemens / m. + */ +#ifndef PARAM_PML_ALPHA_MAX +# define PARAM_PML_ALPHA_MAX 0.2 +#endif + constexpr float_64 ALPHA_MAX_SI[3] = {PARAM_PML_ALPHA_MAX, PARAM_PML_ALPHA_MAX, PARAM_PML_ALPHA_MAX}; + + } // namespace Pml + } // namespace maxwellSolver + } // namespace fields } // namespace picongpu diff --git a/share/picongpu/examples/FoilLCT/README.rst b/share/picongpu/examples/FoilLCT/README.rst index 9426007de4..a4938e2e6e 100644 --- a/share/picongpu/examples/FoilLCT/README.rst +++ b/share/picongpu/examples/FoilLCT/README.rst @@ -7,7 +7,7 @@ FoilLCT: Ion Acceleration from a Liquid-Crystal Target .. moduleauthor:: Axel Huebl, T. Kluge The following example models a laser-ion accelerator in the [TNSA]_ regime. -An optically over-dense target (:math:`n_\text{max} = 192 n_\text{c}`) consisting of a liquid-crystal material *8CB* (4-octyl-4'-cyanobiphenyl) :math:`C_{21}H_{25}N` is used. +An optically over-dense target (:math:`n_\text{max} = 192 n_\text{c}`) consisting of a liquid-crystal material *8CB* (4-octyl-4'-cyanobiphenyl) :math:`C_{21}H_{25}N` is used [LCT]_. Irradiated with a high-power laser pulse with :math:`a_0 = 5` the target is assumed to be partly pre-ionized due to realistic laser contrast and pre-pulses to :math:`C^{2+}`, :math:`H^+` and :math:`N^{2+}` while being slightly expanded on its surfaces (modeled as exponential density slope). The overall target is assumed to be initially quasi-neutral and the *8CB* ion components are are not demixed in the surface regions. @@ -32,3 +32,9 @@ References *Energetic proton generation in ultra-intense laser-solid interactions*, Physics of Plasmas **8**, 542 (2001), https://dx.doi.org/10.1063/1.1333697 + +.. [LCT] + P.L. Poole, L. Obst, G.E. Cochran, J. Metzkes, H.-P. Schlenvoigt, I. Prencipe, T. Kluge, T.E. Cowan, U. Schramm, and D.W. Schumacher. + *Laser-driven ion acceleration via target normal sheath acceleration in the relativistic transparency regime*, + New Journal of Physics **20**, 013019 (2018), + https://dx.doi.org/10.1088/1367-2630/aa9d47 diff --git a/share/picongpu/examples/FoilLCT/bin/plot_charge_density.py b/share/picongpu/examples/FoilLCT/bin/plot_charge_density.py index acf019aa57..a53df67332 100755 --- a/share/picongpu/examples/FoilLCT/bin/plot_charge_density.py +++ b/share/picongpu/examples/FoilLCT/bin/plot_charge_density.py @@ -3,7 +3,7 @@ """ This file is part of the PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Axel Huebl License: GPLv3+ """ diff --git a/share/picongpu/examples/FoilLCT/cmakeFlags b/share/picongpu/examples/FoilLCT/cmakeFlags index f7039acc31..1dd3a5c723 100755 --- a/share/picongpu/examples/FoilLCT/cmakeFlags +++ b/share/picongpu/examples/FoilLCT/cmakeFlags @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch +# Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch, Jakob Trojok # # This file is part of PIConGPU. # @@ -30,7 +30,7 @@ # - increase by 1, no gaps flags[0]="" -flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_LASERPROFILE=ExpRampWithPrepulse'" +flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_LASERPROFILE=ExpRampWithPrepulse;-DPARAM_IONIZATIONCURRENT=EnergyConservation'" ################################################################################ # execution diff --git a/share/picongpu/examples/FoilLCT/etc/picongpu/4.cfg b/share/picongpu/examples/FoilLCT/etc/picongpu/4.cfg index 9181056a81..8f8d3da9f3 100644 --- a/share/picongpu/examples/FoilLCT/etc/picongpu/4.cfg +++ b/share/picongpu/examples/FoilLCT/etc/picongpu/4.cfg @@ -1,4 +1,4 @@ -# Copyright 2017-2020 Axel Huebl +# Copyright 2017-2021 Axel Huebl, Franz Poeschel # # This file is part of PIConGPU. # @@ -71,12 +71,12 @@ TBG_sumEnergy="--fields_energy.period 100 \ TBG_chargeConservation="--chargeConservation.period 100" # regular output -TBG_hdf5="--hdf5.period 250 --hdf5.file simData" +TBG_openPMD="--openPMD.period 250 --openPMD.file simData --openPMD.ext bp" TBG_plugins="!TBG_e_histogram !TBG_H_histogram !TBG_C_histogram !TBG_N_histogram \ !TBG_e_PSypy !TBG_H_PSypy !TBG_C_PSypy !TBG_N_PSypy \ !TBG_sumEnergy !TBG_chargeConservation \ - !TBG_hdf5" + !TBG_openPMD" ################################# diff --git a/share/picongpu/examples/FoilLCT/etc/picongpu/4_isaac.cfg b/share/picongpu/examples/FoilLCT/etc/picongpu/4_isaac.cfg index 11a568a3ac..4fb7c54cc5 100644 --- a/share/picongpu/examples/FoilLCT/etc/picongpu/4_isaac.cfg +++ b/share/picongpu/examples/FoilLCT/etc/picongpu/4_isaac.cfg @@ -1,4 +1,4 @@ -# Copyright 2017-2020 Axel Huebl +# Copyright 2017-2021 Axel Huebl # # This file is part of PIConGPU. # @@ -46,7 +46,7 @@ TBG_periodic="--periodic 0 0 0" ## Section: Optional Variables ## ################################# -TBG_isaac="--isaac.period 1 --isaac.name foil --isaac.url hypnos5 --isaac.quality 99" +TBG_isaac="--isaac.period 1 --isaac.name foil --isaac.url hemera4 --isaac.quality 99" # futher options: # URL of the server # --isaac.url URL diff --git a/share/picongpu/examples/FoilLCT/etc/picongpu/8.cfg b/share/picongpu/examples/FoilLCT/etc/picongpu/8.cfg index 539cf9437b..2567201d1c 100644 --- a/share/picongpu/examples/FoilLCT/etc/picongpu/8.cfg +++ b/share/picongpu/examples/FoilLCT/etc/picongpu/8.cfg @@ -1,4 +1,4 @@ -# Copyright 2017-2020 Axel Huebl +# Copyright 2017-2021 Axel Huebl, Franz Poeschel # # This file is part of PIConGPU. # @@ -71,12 +71,12 @@ TBG_sumEnergy="--fields_energy.period 100 \ TBG_chargeConservation="--chargeConservation.period 100" # regular output -TBG_hdf5="--hdf5.period 250 --hdf5.file simData" +TBG_openPMD="--openPMD.period 250 --openPMD.file simData --openPMD.ext bp" TBG_plugins="!TBG_e_histogram !TBG_H_histogram !TBG_C_histogram !TBG_N_histogram \ !TBG_e_PSypy !TBG_H_PSypy !TBG_C_PSypy !TBG_N_PSypy \ !TBG_sumEnergy !TBG_chargeConservation \ - !TBG_hdf5" + !TBG_openPMD" ################################# diff --git a/share/picongpu/examples/FoilLCT/etc/picongpu/8_isaac.cfg b/share/picongpu/examples/FoilLCT/etc/picongpu/8_isaac.cfg index c72bccdec9..b5e54b5825 100644 --- a/share/picongpu/examples/FoilLCT/etc/picongpu/8_isaac.cfg +++ b/share/picongpu/examples/FoilLCT/etc/picongpu/8_isaac.cfg @@ -1,4 +1,4 @@ -# Copyright 2017-2020 Axel Huebl +# Copyright 2017-2021 Axel Huebl # # This file is part of PIConGPU. # @@ -46,7 +46,7 @@ TBG_periodic="--periodic 0 0 0" ## Section: Optional Variables ## ################################# -TBG_isaac="--isaac.period 1 --isaac.name foil --isaac.url hypnos5 --isaac.quality 99" +TBG_isaac="--isaac.period 1 --isaac.name foil --isaac.url hemera4 --isaac.quality 99" # futher options: # URL of the server # --isaac.url URL diff --git a/share/picongpu/examples/FoilLCT/include/picongpu/param/density.param b/share/picongpu/examples/FoilLCT/include/picongpu/param/density.param index 806db8eeb9..a86f0b1b75 100644 --- a/share/picongpu/examples/FoilLCT/include/picongpu/param/density.param +++ b/share/picongpu/examples/FoilLCT/include/picongpu/param/density.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -34,81 +34,71 @@ namespace picongpu { -namespace SI -{ - /** Base density in particles per m^3 in the density profiles. - * - * This is often taken as reference maximum density in normalized profiles. - * Individual particle species can define a `densityRatio` flag relative - * to this value. - * - * unit: ELEMENTS/m^3 - * - * We take n_e ("fully ionized") as reference density. - * Our target material (see speciesDefinition) is a liquid crystal called - * 8CB (4'-octyl-4-cyanobiphenyl). - */ - constexpr float_64 nc = 1.11485e21 * 1.e6 / 0.8 / 0.8; - constexpr float_64 BASE_DENSITY_SI = 192. * nc; - -} // namespace SI - -namespace densityProfiles -{ - struct FlatFoilWithRampFunctor + namespace SI { - /** This formula uses SI quantities only. - * The profile will be multiplied by BASE_DENSITY_SI. + /** Base density in particles per m^3 in the density profiles. * - * @param position_SI total offset including all slides [meter] - * @param cellSize_SI cell sizes [meter] + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. * - * @return float_X density [normalized to 1.0] + * unit: ELEMENTS/m^3 + * + * We take n_e ("fully ionized") as reference density. + * Our target material (see speciesDefinition) is a liquid crystal called + * 8CB (4'-octyl-4-cyanobiphenyl). */ - HDINLINE float_X - operator()( - const floatD_64& position_SI, - const float3_64& cellSize_SI - ) + constexpr float_64 nc = 1.11485e21 * 1.e6 / 0.8 / 0.8; + constexpr float_64 BASE_DENSITY_SI = 192. * nc; + + } // namespace SI + + namespace densityProfiles + { + struct FlatFoilWithRampFunctor { - // m -> mu - const float_64 y( position_SI.y() * 1.e6 ); + /** This formula uses SI quantities only. + * The profile will be multiplied by BASE_DENSITY_SI. + * + * @param position_SI total offset including all slides [meter] + * @param cellSize_SI cell sizes [meter] + * + * @return float_X density [normalized to 1.0] + */ + HDINLINE float_X operator()(const floatD_64& position_SI, const float3_64& cellSize_SI) + { + // m -> mu + const float_64 y(position_SI.y() * 1.e6); - // target begin & end (plateau) - constexpr float_64 y0( 0.5 ); - constexpr float_64 y1( y0 + 1.0 ); - // exponential pre-expanded density - constexpr float_64 L( 10.e-3 ); - constexpr float_64 L_cutoff( 4. * L ); + // target begin & end (plateau) + constexpr float_64 y0(0.5); + constexpr float_64 y1(y0 + 1.0); + // exponential pre-expanded density + constexpr float_64 L(10.e-3); + constexpr float_64 L_cutoff(4. * L); - float_64 dens = 0.0; + float_64 dens = 0.0; - // upramp - if( y < y0 && (y0 - y) < L_cutoff ) - dens = math::exp( ( y - y0 ) / L ); - // downramp - if( y > y1 && (y - y1) < L_cutoff ) - dens = math::exp( ( y1 - y ) / L ); - // plateau - if( y >= y0 && y <= y1 ) - dens = 1.0; + // upramp + if(y < y0 && (y0 - y) < L_cutoff) + dens = math::exp((y - y0) / L); + // downramp + if(y > y1 && (y - y1) < L_cutoff) + dens = math::exp((y1 - y) / L); + // plateau + if(y >= y0 && y <= y1) + dens = 1.0; - // safety check: all parts of the function MUST be > 0 - dens *= float_64( dens >= 0.0 ); - return dens; - } - }; + // safety check: all parts of the function MUST be > 0 + dens *= float_64(dens >= 0.0); + return dens; + } + }; - // definition of free formula profile - using FlatFoilWithRamp = FreeFormulaImpl< FlatFoilWithRampFunctor >; + // definition of free formula profile + using FlatFoilWithRamp = FreeFormulaImpl; - // put probe particles every 4th cell in X, Y(, Z) - using ProbeEveryFourthCell = EveryNthCellImpl< - mCT::UInt32< - 4, - 4, - 4 - > - >; -} // namespace densityProfiles + // put probe particles every 4th cell in X, Y(, Z) + using ProbeEveryFourthCell = EveryNthCellImpl>; + } // namespace densityProfiles } // namespace picongpu diff --git a/share/picongpu/examples/FoilLCT/include/picongpu/param/dimension.param b/share/picongpu/examples/FoilLCT/include/picongpu/param/dimension.param index a69f7998f9..8cb96ebe58 100644 --- a/share/picongpu/examples/FoilLCT/include/picongpu/param/dimension.param +++ b/share/picongpu/examples/FoilLCT/include/picongpu/param/dimension.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl +/* Copyright 2014-2021 Axel Huebl * * This file is part of PIConGPU. * diff --git a/share/picongpu/examples/FoilLCT/include/picongpu/param/fileOutput.param b/share/picongpu/examples/FoilLCT/include/picongpu/param/fileOutput.param index 798173d705..c611c5c0f8 100644 --- a/share/picongpu/examples/FoilLCT/include/picongpu/param/fileOutput.param +++ b/share/picongpu/examples/FoilLCT/include/picongpu/param/fileOutput.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, * Benjamin Worpitz, Richard Pausch * * This file is part of PIConGPU. @@ -63,55 +63,34 @@ namespace picongpu namespace deriveField = particles::particleToGrid; /* Density section */ - using Density_Seq = deriveField::CreateEligible_t< - VectorAllSpecies, - deriveField::derivedAttributes::Density - >; + using Density_Seq = deriveField::CreateEligible_t; /* BoundElectronDensity section */ - using BoundElectronDensity_Seq = deriveField::CreateEligible_t< - VectorAllSpecies, - deriveField::derivedAttributes::BoundElectronDensity - >; + using BoundElectronDensity_Seq + = deriveField::CreateEligible_t; /* ChargeDensity section */ - using ChargeDensity_Seq = deriveField::CreateEligible_t< - VectorAllSpecies, - deriveField::derivedAttributes::ChargeDensity - >; + using ChargeDensity_Seq + = deriveField::CreateEligible_t; /* EnergyDensity section */ - using EnergyDensity_Seq = deriveField::CreateEligible_t< - VectorAllSpecies, - deriveField::derivedAttributes::EnergyDensity - >; + using EnergyDensity_Seq + = deriveField::CreateEligible_t; /** FieldTmpSolvers groups all solvers that create data for FieldTmp ****** * * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size */ - using FieldTmpSolvers = MakeSeq_t< - Density_Seq, - BoundElectronDensity_Seq, - ChargeDensity_Seq, - EnergyDensity_Seq - >; + using FieldTmpSolvers = MakeSeq_t; /** FileOutputFields: Groups all Fields that shall be dumped *************/ /** Possible native fields: FieldE, FieldB, FieldJ */ - using NativeFileOutputFields = MakeSeq_t< - FieldE, - FieldB, - FieldJ - >; + using NativeFileOutputFields = MakeSeq_t; - using FileOutputFields = MakeSeq_t< - NativeFileOutputFields, - FieldTmpSolvers - >; + using FileOutputFields = MakeSeq_t; /** FileOutputParticles: Groups all Species that shall be dumped ********** @@ -119,6 +98,6 @@ namespace picongpu * hint: to enable particle output set to * using FileOutputParticles = VectorAllSpecies; */ - using FileOutputParticles = MakeSeq_t< Probes >; + using FileOutputParticles = MakeSeq_t; -} +} // namespace picongpu diff --git a/share/picongpu/examples/FoilLCT/include/picongpu/param/grid.param b/share/picongpu/examples/FoilLCT/include/picongpu/param/grid.param index 1db5e05440..f50381ff3e 100644 --- a/share/picongpu/examples/FoilLCT/include/picongpu/param/grid.param +++ b/share/picongpu/examples/FoilLCT/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -61,7 +61,7 @@ namespace picongpu * unit: seconds CFL criteria for Yee MW Solver * 2D: sqrt(2) * 3D: sqrt(3) */ - constexpr float_64 DELTA_T_SI = CELL_WIDTH_SI / ( 1.415 * SPEED_OF_LIGHT_SI ); + constexpr float_64 DELTA_T_SI = CELL_WIDTH_SI / (1.415 * SPEED_OF_LIGHT_SI); } // namespace SI @@ -70,9 +70,9 @@ namespace picongpu * unit: none */ constexpr uint32_t ABSORBER_CELLS[3][2] = { - {64, 64}, /*x direction [negative,positive]*/ - {64, 64}, /*y direction [negative,positive]*/ - {64, 64} /*z direction [negative,positive]*/ + {64, 64}, /*x direction [negative,positive]*/ + {64, 64}, /*y direction [negative,positive]*/ + {64, 64} /*z direction [negative,positive]*/ }; /** Define the strength of the absorber for any direction @@ -82,7 +82,7 @@ namespace picongpu constexpr float_X ABSORBER_STRENGTH[3][2] = { {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ - {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ }; /** When to move the co-moving window. diff --git a/share/picongpu/examples/FoilLCT/include/picongpu/param/laser.param b/share/picongpu/examples/FoilLCT/include/picongpu/param/laser.param index fe85a021e6..6fa4e7b348 100644 --- a/share/picongpu/examples/FoilLCT/include/picongpu/param/laser.param +++ b/share/picongpu/examples/FoilLCT/include/picongpu/param/laser.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, * Alexander Debus * * This file is part of PIConGPU. @@ -48,149 +48,153 @@ #include #ifndef PARAM_LASERPROFILE -#define PARAM_LASERPROFILE PlaneWave +# define PARAM_LASERPROFILE PlaneWave #endif namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ - struct PlaneWaveParam + namespace fields { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; - - /** UNITCONV */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - static constexpr float_64 _A0 = 5.0; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** The profile of the test Lasers 0 and 2 can be stretched by a - * constexprant area between the up and downramp - * unit: seconds */ - static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 0.0; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 25.0e-15 / 2.354820045; - - /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before and after the plateau - * unit: none */ - static constexpr float_64 RAMP_INIT = 3. * 2.354820045; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0u; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - /** Available polarisation types - */ - enum PolarisationType + namespace laserProfiles { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = LINEAR_X; - }; - - struct ExpRampWithPrepulseParam : PlaneWaveParam - { - /* Laser profile with Gaussian spatial envelope and the following - * temporal shape: - * A Gaussian peak (optionally lengthened by a plateau) is preceded by - * two pieces of exponential preramps, defined by 3 (time, intensity)- - * -points. - * The first two points get connected by an exponential, the 2nd and - * 3rd point are connected by another exponential, which is then - * extrapolated to the peak. The Gaussian is added everywhere, but - * typically contributes significantly only near the peak. - * It is advisable to set the third point far enough from the plateau - * (approx 3*FWHM), then the contribution from the Gaussian is - * negligible there, and the intensity can be set as measured from the - * laser profile. - * Optionally a Gaussian prepulse can be added, given by the parameters - * of the relative intersity and time point. - * The time of the prepulse and the three preramp points are given in - * SI, the intensities are given as multiples of the peak intensity. - */ - - // Intensities of prepulse and exponential preramp - static constexpr float_X INT_RATIO_PREPULSE = 0.; - static constexpr float_X INT_RATIO_POINT_1 = 1.e-8; - static constexpr float_X INT_RATIO_POINT_2 = 1.e-4; - static constexpr float_X INT_RATIO_POINT_3 = 1.e-4; - - // time-positions of prepulse and preramps points - static constexpr float_64 TIME_PREPULSE_SI = -950.0e-15; - static constexpr float_64 TIME_PEAKPULSE_SI = 0.0e-15; - static constexpr float_64 TIME_POINT_1_SI = -1000.0e-15; - static constexpr float_64 TIME_POINT_2_SI = -300.0e-15; - static constexpr float_64 TIME_POINT_3_SI = -100.0e-15; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = 3.0e-14 / 2.35482; // half of the time in which E falls to half its initial value (then I falls to half its value in 15fs, approx 6 wavelengths). Those are 4.8 wavelenghts. - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * WO_X_SI is this distance in x-direction - * W0_Z_SI is this distance in z-direction - * if both values are equal, the laser has a circular shape in x-z - * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } - * [ 1.17741 ] - * unit: meter */ - static constexpr float_64 W0_X_SI = 2.5 * WAVE_LENGTH_SI; - static constexpr float_64 W0_Z_SI = W0_X_SI; - - /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before plateau - and half at the end of the plateau - * unit: none */ - static constexpr float_64 RAMP_INIT = 16.0; - }; - - //! currently selected laser profile - // using Selected = PlaneWave< PlaneWaveParam >; - using Selected = PARAM_LASERPROFILE< PMACC_JOIN( PARAM_LASERPROFILE, Param )>; - -} // namespace laserProfiles -} // namespace fields + struct PlaneWaveParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = 0.8e-6; + + /** UNITCONV */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + static constexpr float_64 _A0 = 5.0; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** The profile of the test Lasers 0 and 2 can be stretched by a + * constexprant area between the up and downramp + * unit: seconds */ + static constexpr float_64 LASER_NOFOCUS_CONSTANT_SI = 0.0; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 25.0e-15 / 2.354820045; + + /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before and after + * the plateau unit: none */ + static constexpr float_64 RAMP_INIT = 3. * 2.354820045; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0u; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + /** Available polarisation types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = LINEAR_X; + }; + + struct ExpRampWithPrepulseParam : PlaneWaveParam + { + /* Laser profile with Gaussian spatial envelope and the following + * temporal shape: + * A Gaussian peak (optionally lengthened by a plateau) is preceded by + * two pieces of exponential preramps, defined by 3 (time, intensity)- + * -points. + * The first two points get connected by an exponential, the 2nd and + * 3rd point are connected by another exponential, which is then + * extrapolated to the peak. The Gaussian is added everywhere, but + * typically contributes significantly only near the peak. + * It is advisable to set the third point far enough from the plateau + * (approx 3*FWHM), then the contribution from the Gaussian is + * negligible there, and the intensity can be set as measured from the + * laser profile. + * Optionally a Gaussian prepulse can be added, given by the parameters + * of the relative intersity and time point. + * The time of the prepulse and the three preramp points are given in + * SI, the intensities are given as multiples of the peak intensity. + */ + + // Intensities of prepulse and exponential preramp + static constexpr float_X INT_RATIO_PREPULSE = 0.; + static constexpr float_X INT_RATIO_POINT_1 = 1.e-8; + static constexpr float_X INT_RATIO_POINT_2 = 1.e-4; + static constexpr float_X INT_RATIO_POINT_3 = 1.e-4; + + // time-positions of prepulse and preramps points + static constexpr float_64 TIME_PREPULSE_SI = -950.0e-15; + static constexpr float_64 TIME_PEAKPULSE_SI = 0.0e-15; + static constexpr float_64 TIME_POINT_1_SI = -1000.0e-15; + static constexpr float_64 TIME_POINT_2_SI = -300.0e-15; + static constexpr float_64 TIME_POINT_3_SI = -100.0e-15; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = 3.0e-14 + / 2.35482; // half of the time in which E falls to half its initial value (then I falls to half its + // value in 15fs, approx 6 wavelengths). Those are 4.8 wavelenghts. + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * WO_X_SI is this distance in x-direction + * W0_Z_SI is this distance in z-direction + * if both values are equal, the laser has a circular shape in x-z + * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } + * [ 1.17741 ] + * unit: meter */ + static constexpr float_64 W0_X_SI = 2.5 * WAVE_LENGTH_SI; + static constexpr float_64 W0_Z_SI = W0_X_SI; + + /** The laser pulse will be initialized half of PULSE_INIT times of the PULSE_LENGTH before plateau + and half at the end of the plateau + * unit: none */ + static constexpr float_64 RAMP_INIT = 16.0; + }; + + //! currently selected laser profile + // using Selected = PlaneWave< PlaneWaveParam >; + using Selected = PARAM_LASERPROFILE; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param b/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param index 6765369121..c403413b56 100644 --- a/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param +++ b/share/picongpu/examples/FoilLCT/include/picongpu/param/memory.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -31,10 +31,10 @@ #include #include +#include namespace picongpu { - /* We have to hold back 350MiB for gpu-internal operations: * - random number generator * - reduces @@ -48,10 +48,10 @@ namespace picongpu * * volume of a superCell must be <= 1024 */ - using SuperCellSize = mCT::Int< 16, 16 >; + using SuperCellSize = mCT::Int<16, 16>; /** define mapper which is used for kernel call mappings */ - using MappingDesc = MappingDescription< simDim, SuperCellSize >; + using MappingDesc = MappingDescription; /** define the size of the core, border and guard area * @@ -67,10 +67,7 @@ namespace picongpu * * GuardSize is defined in units of SuperCellSize per dimension. */ - using GuardSize = typename mCT::shrinkTo< - mCT::Int< 1, 1, 1 >, - simDim - >::type; + using GuardSize = typename mCT::shrinkTo, simDim>::type; /** bytes reserved for species exchange buffer * @@ -87,6 +84,21 @@ namespace picongpu static constexpr uint32_t BYTES_EXCHANGE_Z = 3 * 1024 * 1024; // 3 MiB static constexpr uint32_t BYTES_EDGES = 128 * 1024; // 128 kiB static constexpr uint32_t BYTES_CORNER = 32 * 1024; // 32 kiB + + /** Reference local domain size + * + * The size of the local domain for which the exchange sizes `BYTES_*` are configured for. + * The required size of each exchange will be calculated at runtime based on the local domain size and the + * reference size. The exchange size will be scaled only up and not down. Zero means that there is no reference + * domain size, exchanges will not be scaled. + */ + using REF_LOCAL_DOM_SIZE = mCT::Int<0, 0, 0>; + /** Scaling rate per direction. + * + * 1.0 means it scales linear with the ratio between the local domain size at runtime and the reference local + * domain size. + */ + const std::array DIR_SCALING_FACTOR = {{0.0, 0.0, 0.0}}; }; /** number of scalar fields that are reserved as temporary fields */ diff --git a/share/picongpu/examples/FoilLCT/include/picongpu/param/particle.param b/share/picongpu/examples/FoilLCT/include/picongpu/param/particle.param index 8ac1a6bc87..06c0005e97 100644 --- a/share/picongpu/examples/FoilLCT/include/picongpu/param/particle.param +++ b/share/picongpu/examples/FoilLCT/include/picongpu/param/particle.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, * Richard Pausch * * This file is part of PIConGPU. @@ -34,103 +34,95 @@ #include #include -#include namespace picongpu { -namespace particles -{ - - /** a particle with a weighting below MIN_WEIGHTING will not - * be created / will be deleted - * - * unit: none - * - * here we essentially allow any weighting since it has no real meaning in 2D3V - */ - constexpr float_X MIN_WEIGHTING = 0.0000001; - -namespace manipulators -{ - // ionize ions once by removing one bound electron - struct OnceIonizedImpl - { - template< typename T_Particle > - DINLINE void operator()( - T_Particle& particle - ) - { - constexpr float_X protonNumber = GetAtomicNumbers< T_Particle >::type::numberOfProtons; - particle[ boundElectrons_ ] = protonNumber - 1.0_X; - } - }; - using OnceIonized = generic::Free< OnceIonizedImpl >; - - //! ionize ions twice - struct TwiceIonizedImpl - { - template< typename T_Particle > - DINLINE void operator()( - T_Particle& particle - ) - { - constexpr float_X protonNumber = GetAtomicNumbers< T_Particle >::type::numberOfProtons; - particle[ boundElectrons_ ] = protonNumber - 2._X; - } - }; - - //! definition of TwiceIonizedImpl manipulator - using TwiceIonized = generic::Free< TwiceIonizedImpl >; - - //! changes the in-cell position of each particle of a species - using RandomPosition = unary::RandomPosition; - -} // namespace manipulators - - -namespace startPosition -{ - struct RandomParameter6ppc + namespace particles { - /** Count of particles per cell at initial state + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted * * unit: none - */ - static constexpr uint32_t numParticlesPerCell = 6u; - }; - using Random6ppc = RandomImpl< RandomParameter6ppc >; - - // probe particles sit directly in lower corner of the cell - CONST_VECTOR( - float_X, - 3, - InCellOffset, - // each x, y, z in-cell position component in range [0.0, 1.0) - 0.0, - 0.0, - 0.0 - ); - struct OnePositionParameter - { - /** Count of particles per cell at initial state * - * unit: none + * here we essentially allow any weighting since it has no real meaning in 2D3V */ - static constexpr uint32_t numParticlesPerCell = 1u; + constexpr float_X MIN_WEIGHTING = 0.0000001; - const InCellOffset_t inCellOffset; - }; - using OnePosition = OnePositionImpl< OnePositionParameter >; - -} // namespace startPosition - - /** During unit normalization, we assume this is a typical - * number of particles per cell for normalization of weighted - * particle attributes. - */ - constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = - startPosition::RandomParameter6ppc::numParticlesPerCell; + namespace manipulators + { + // ionize ions once by removing one bound electron + struct OnceIonizedImpl + { + template + DINLINE void operator()(T_Particle& particle) + { + constexpr float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; + particle[boundElectrons_] = protonNumber - 1.0_X; + } + }; + using OnceIonized = generic::Free; + + //! ionize ions twice + struct TwiceIonizedImpl + { + template + DINLINE void operator()(T_Particle& particle) + { + constexpr float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; + particle[boundElectrons_] = protonNumber - 2._X; + } + }; + + //! definition of TwiceIonizedImpl manipulator + using TwiceIonized = generic::Free; + + //! changes the in-cell position of each particle of a species + using RandomPosition = unary::RandomPosition; + + } // namespace manipulators + + + namespace startPosition + { + struct RandomParameter6ppc + { + /** Count of particles per cell at initial state + * + * unit: none + */ + static constexpr uint32_t numParticlesPerCell = 6u; + }; + using Random6ppc = RandomImpl; + + // probe particles sit directly in lower corner of the cell + CONST_VECTOR( + float_X, + 3, + InCellOffset, + // each x, y, z in-cell position component in range [0.0, 1.0) + 0.0, + 0.0, + 0.0); + struct OnePositionParameter + { + /** Count of particles per cell at initial state + * + * unit: none + */ + static constexpr uint32_t numParticlesPerCell = 1u; + + const InCellOffset_t inCellOffset; + }; + using OnePosition = OnePositionImpl; + + } // namespace startPosition + + /** During unit normalization, we assume this is a typical + * number of particles per cell for normalization of weighted + * particle attributes. + */ + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = startPosition::RandomParameter6ppc::numParticlesPerCell; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/FoilLCT/include/picongpu/param/speciesDefinition.param b/share/picongpu/examples/FoilLCT/include/picongpu/param/speciesDefinition.param index 1e19dab033..9ba99dce09 100644 --- a/share/picongpu/examples/FoilLCT/include/picongpu/param/speciesDefinition.param +++ b/share/picongpu/examples/FoilLCT/include/picongpu/param/speciesDefinition.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Heiko Burau, Axel Huebl +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Heiko Burau, Axel Huebl * * This file is part of PIConGPU. * @@ -44,188 +44,141 @@ namespace picongpu { - -/*########################### define particle attributes #####################*/ - -//! describe attributes of a particle -using DefaultParticleAttributes = MakeSeq_t< - position< position_pic >, - momentum, - weighting ->; - -//! ions also need to have a boundElectrons attribute for ionization -using IonParticleAttributes = MakeSeq_t< - DefaultParticleAttributes, - boundElectrons ->; - -/*########################### end particle attributes ########################*/ - -/*########################### define species #################################*/ - -/*--------------------------- electrons --------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioElectrons, 1.0 ); -value_identifier( float_X, ChargeRatioElectrons, 1.0 ); - -using ParticleFlagsElectrons = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioElectrons >, - chargeRatio< ChargeRatioElectrons > ->; - -/* define species electrons */ -using Electrons = Particles< - PMACC_CSTRING( "e" ), - ParticleFlagsElectrons, - DefaultParticleAttributes ->; - -/*--------------------------- H+ --------------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioHydrogen, 1836.152672 ); -value_identifier( float_X, ChargeRatioHydrogen, -1.0 ); - -/* ratio relative to BASE_DENSITY (n_e) */ -value_identifier( float_X, DensityRatioHydrogen, 25. / 158. ); - -using ParticleFlagsHydrogen = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioHydrogen >, - chargeRatio< ChargeRatioHydrogen >, - densityRatio< DensityRatioHydrogen >, - atomicNumbers< ionization::atomicNumbers::Hydrogen_t >, - ionizationEnergies< ionization::energies::AU::Hydrogen_t >, - effectiveNuclearCharge< ionization::effectiveNuclearCharge::Hydrogen_t >, - ionizers< - MakeSeq_t< - particles::ionization::BSIEffectiveZ< Electrons >, - particles::ionization::ADKLinPol< Electrons >, - particles::ionization::ThomasFermi< Electrons > - > - > ->; - -/* define species Hydrogen */ -using Hydrogen = Particles< - PMACC_CSTRING( "H" ), - ParticleFlagsHydrogen, - IonParticleAttributes ->; - -/*--------------------------- C ---------------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioCarbon, 22032.0 ); -value_identifier( float_X, ChargeRatioCarbon, -6.0 ); - -/* ratio relative to BASE_DENSITY (n_e) */ -value_identifier( float_X, DensityRatioCarbon, 21. / 158. ); - -using ParticleFlagsCarbon = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioCarbon >, - chargeRatio< ChargeRatioCarbon >, - densityRatio< DensityRatioCarbon >, - atomicNumbers< ionization::atomicNumbers::Carbon_t >, - ionizationEnergies< ionization::energies::AU::Carbon_t >, - effectiveNuclearCharge< ionization::effectiveNuclearCharge::Carbon_t >, - ionizers< - MakeSeq_t< - particles::ionization::BSIEffectiveZ< Electrons >, - particles::ionization::ADKLinPol< Electrons >, - particles::ionization::ThomasFermi< Electrons > - > - > ->; - -/* define species Carbon */ -using Carbon = Particles< - PMACC_CSTRING( "C" ), - ParticleFlagsCarbon, - IonParticleAttributes ->; - -/*--------------------------- N ---------------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioNitrogen, 25716.852 ); -value_identifier( float_X, ChargeRatioNitrogen, -7.0 ); - -/* ratio relative to BASE_DENSITY (n_e) */ -value_identifier( float_X, DensityRatioNitrogen, 1. / 158. ); - -using ParticleFlagsNitrogen = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioNitrogen >, - chargeRatio< ChargeRatioNitrogen >, - densityRatio< DensityRatioNitrogen >, - atomicNumbers< ionization::atomicNumbers::Nitrogen_t >, - ionizationEnergies< ionization::energies::AU::Nitrogen_t >, - effectiveNuclearCharge< ionization::effectiveNuclearCharge::Nitrogen_t >, - ionizers< - MakeSeq_t< - particles::ionization::BSIEffectiveZ< Electrons >, - particles::ionization::ADKLinPol< Electrons >, - particles::ionization::ThomasFermi< Electrons > - > - > ->; - -/* define species Nitrogen */ -using Nitrogen = Particles< - PMACC_CSTRING( "N" ), - ParticleFlagsNitrogen, - IonParticleAttributes ->; - -/*--------------------------- Probe Particles -------------------------------*/ - -using ParticleFlagsProbes = MakeSeq_t< - particlePusher< particles::pusher::Probe >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle > ->; - -/* define species Probe */ -using Probes = Particles< - PMACC_CSTRING( "probe" ), - ParticleFlagsProbes, - MakeSeq_t< - position< position_pic >, - probeB, - probeE - > ->; - -/*########################### end species ####################################*/ - -/** All known particle species of the simulation - * - * List all defined particle species from above in this list - * to make them available to the PIC algorithm. - */ -using VectorAllSpecies = MakeSeq_t< - Electrons, - Hydrogen, - Carbon, - Nitrogen, - Probes ->; + /*########################### define particle attributes #####################*/ + + //! describe attributes of a particle + using DefaultParticleAttributes = MakeSeq_t, momentum, weighting>; + + //! ions also need to have a boundElectrons attribute for ionization + using IonParticleAttributes = MakeSeq_t; + + /*########################### end particle attributes ########################*/ + + /*########################### define species #################################*/ + + /*--------------------------- electrons --------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + + using ParticleFlagsElectrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio>; + + /* define species electrons */ + using Electrons = Particles; + + /*--------------------------- H+ --------------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioHydrogen, 1836.152672); + value_identifier(float_X, ChargeRatioHydrogen, -1.0); + + /* ratio relative to BASE_DENSITY (n_e) */ + value_identifier(float_X, DensityRatioHydrogen, 25. / 158.); + + +#ifndef PARAM_IONIZATIONCURRENT +# define PARAM_IONIZATIONCURRENT None +#endif + using ParticleFlagsHydrogen = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio, + densityRatio, + atomicNumbers, + ionizationEnergies, + effectiveNuclearCharge, + ionizers, + particles::ionization::ADKLinPol, + particles::ionization::ThomasFermi>>>; + + /* define species Hydrogen */ + using Hydrogen = Particles; + + /*--------------------------- C ---------------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioCarbon, 22032.0); + value_identifier(float_X, ChargeRatioCarbon, -6.0); + + /* ratio relative to BASE_DENSITY (n_e) */ + value_identifier(float_X, DensityRatioCarbon, 21. / 158.); + + using ParticleFlagsCarbon = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio, + densityRatio, + atomicNumbers, + ionizationEnergies, + effectiveNuclearCharge, + ionizers, + particles::ionization::ADKLinPol, + particles::ionization::ThomasFermi>>>; + + /* define species Carbon */ + using Carbon = Particles; + + /*--------------------------- N ---------------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioNitrogen, 25716.852); + value_identifier(float_X, ChargeRatioNitrogen, -7.0); + + /* ratio relative to BASE_DENSITY (n_e) */ + value_identifier(float_X, DensityRatioNitrogen, 1. / 158.); + + using ParticleFlagsNitrogen = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio, + densityRatio, + atomicNumbers, + ionizationEnergies, + effectiveNuclearCharge, + ionizers, + particles::ionization::ADKLinPol, + particles::ionization::ThomasFermi>>>; + + /* define species Nitrogen */ + using Nitrogen = Particles; + + /*--------------------------- Probe Particles -------------------------------*/ + + using ParticleFlagsProbes = MakeSeq_t< + particlePusher, + shape, + interpolation>; + + /* define species Probe */ + using Probes + = Particles, probeB, probeE>>; + + /*########################### end species ####################################*/ + + /** All known particle species of the simulation + * + * List all defined particle species from above in this list + * to make them available to the PIC algorithm. + */ + using VectorAllSpecies = MakeSeq_t; } // namespace picongpu diff --git a/share/picongpu/examples/FoilLCT/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/FoilLCT/include/picongpu/param/speciesInitialization.param index 878da563f8..0f234db171 100644 --- a/share/picongpu/examples/FoilLCT/include/picongpu/param/speciesInitialization.param +++ b/share/picongpu/examples/FoilLCT/include/picongpu/param/speciesInitialization.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,73 +33,32 @@ namespace picongpu { -namespace particles -{ - - /** InitPipeline defines in which order species are initialized - * - * the functors are called in order (from first to last functor) - */ - using InitPipeline = bmpl::vector< - CreateDensity< - densityProfiles::FlatFoilWithRamp, - startPosition::Random6ppc, - Hydrogen - >, - /* derive the other two ion species and adjust their weighting to have always all - * three of macro ions present in a cell, even in cut-off regions of the density profile */ - ManipulateDerive< - manipulators::binary::DensityWeighting, - Hydrogen, - Carbon - >, - ManipulateDerive< - manipulators::binary::DensityWeighting, - Hydrogen, - Nitrogen - >, - // randomize C & N in-cell - Manipulate< - manipulators::unary::RandomPosition, - Carbon - >, - Manipulate< - manipulators::unary::RandomPosition, - Nitrogen - >, - // partial pre-ionization: set bound electrons for C2+ & N2+ - Manipulate< - manipulators::TwiceIonized, - Carbon - >, - // note: boundElectrons default is 0, so Hydrogen's default is H+ - Manipulate< - manipulators::TwiceIonized, - Nitrogen - >, - // partial pre-ionization: create free electrons - Derive< - Hydrogen, - Electrons - >, - ManipulateDerive< - manipulators::binary::UnboundElectronsTimesWeighting, - Carbon, - Electrons - >, - ManipulateDerive< - manipulators::binary::UnboundElectronsTimesWeighting, - Nitrogen, - Electrons - >, - /* create non-physical "probe" particles that sit in every 4x4x4th cell - * and monitor the electro-magnetic fields */ - CreateDensity< - densityProfiles::ProbeEveryFourthCell, - startPosition::OnePosition, - Probes - > - >; + namespace particles + { + /** InitPipeline defines in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< + CreateDensity, + /* derive the other two ion species and adjust their weighting to have always all + * three of macro ions present in a cell, even in cut-off regions of the density profile */ + ManipulateDerive, + ManipulateDerive, + // randomize C & N in-cell + Manipulate, + Manipulate, + // partial pre-ionization: set bound electrons for C2+ & N2+ + Manipulate, + // note: boundElectrons default is 0, so Hydrogen's default is H+ + Manipulate, + // partial pre-ionization: create free electrons + Derive, + ManipulateDerive, + ManipulateDerive, + /* create non-physical "probe" particles that sit in every 4x4x4th cell + * and monitor the electro-magnetic fields */ + CreateDensity>; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/cmakeFlags b/share/picongpu/examples/KelvinHelmholtz/cmakeFlags index ec0ffe7464..35c8e64f65 100755 --- a/share/picongpu/examples/KelvinHelmholtz/cmakeFlags +++ b/share/picongpu/examples/KelvinHelmholtz/cmakeFlags @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # @@ -32,11 +32,6 @@ flags[0]="" flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_DIMENSION=DIM2'" flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_RADIATION=1'" -flags[3]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=EmZ;-DPARAM_PARTICLESHAPE=CIC'" -flags[4]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=EmZ;-DPARAM_PARTICLESHAPE=PCS'" -flags[5]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=EmZ;-DPARAM_PARTICLESHAPE=TSC'" -flags[6]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=EmZ;-DPARAM_PARTICLESHAPE=TSC;-DPARAM_DIMENSION=DIM2'" -flags[7]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=DirSplitting;-DPARAM_CURRENTINTERPOLATION=NoneDS'" ################################################################################ diff --git a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/1.cfg b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/1.cfg index 4cc3fb3302..0d5bfd71f6 100644 --- a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/1.cfg +++ b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/1.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Rene Widera, Felix Schmitt, Axel Huebl +# Copyright 2013-2021 Rene Widera, Felix Schmitt, Axel Huebl # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/16.cfg b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/16.cfg index abbe7abd37..89a94554b4 100644 --- a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/16.cfg +++ b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/16.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # @@ -46,7 +46,10 @@ TBG_periodic="--periodic 1 1 1" ## Section: Optional Variables ## ################################# -TBG_hdf5="--hdf5.period 250 --hdf5.file simData" +# file I/O with openPMD-HDF5 +TBG_openPMD="--openPMD.period 250 \ + --openPMD.file simData \ + --openPMD.ext h5" TBG_pngYZ="--e_png.period 10 --e_png.axis yz --e_png.slicePoint 0.5 --e_png.folder pngElectronsYZ" TBG_pngYX="--e_png.period 10 --e_png.axis yx --e_png.slicePoint 0.5 --e_png.folder pngElectronsYX" @@ -58,16 +61,16 @@ TBG_ipngYX="--i_png.period 10 --i_png.axis yx --i_png.slicePoint 0.5 --i_png.fol TBG_eBin="--e_energyHistogram.period 100 --e_energyHistogram.filter all --e_energyHistogram.binCount 1024 --e_energyHistogram.minEnergy 0 --e_energyHistogram.maxEnergy 5000" TBG_iBin="--i_energyHistogram.period 100 --i_energyHistogram.filter all --i_energyHistogram.binCount 1024 --i_energyHistogram.minEnergy 0 --i_energyHistogram.maxEnergy 2000000" -TBG_plugins="!TBG_ipngYZ \ +TBG_plugins="!TBG_ipngYZ \ !TBG_ipngYX \ !TBG_eBin \ !TBG_iBin \ !TBG_pngYX \ !TBG_pngYZ \ - !TBG_hdf5 \ + !TBG_openPMD \ --i_macroParticlesCount.period 100 \ --e_macroParticlesCount.period 100 \ - --fields_energy.period 10 \ + --fields_energy.period 10 \ --e_energy.period 10 --e_energy.filter all \ --i_energy.period 10 --i_energy.filter all" diff --git a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/1_bench.cfg b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/1_bench.cfg index b44e720184..76b8fa274f 100644 --- a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/1_bench.cfg +++ b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/1_bench.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Rene Widera, Felix Schmitt, Axel Huebl +# Copyright 2013-2021 Rene Widera, Felix Schmitt, Axel Huebl # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/4.cfg b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/4.cfg index 993db01f7b..531dfb53f2 100644 --- a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/4.cfg +++ b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/4.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Rene Widera, Felix Schmitt, Axel Huebl +# Copyright 2013-2021 Rene Widera, Felix Schmitt, Axel Huebl # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/4_bench.cfg b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/4_bench.cfg index 257defa5a6..945f39b161 100644 --- a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/4_bench.cfg +++ b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/4_bench.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Rene Widera, Felix Schmitt, Axel Huebl +# Copyright 2013-2021 Rene Widera, Felix Schmitt, Axel Huebl # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/8_bench.cfg b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/8_bench.cfg index 11c88c5396..8c8bb09973 100644 --- a/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/8_bench.cfg +++ b/share/picongpu/examples/KelvinHelmholtz/etc/picongpu/8_bench.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Rene Widera, Felix Schmitt, Axel Huebl +# Copyright 2013-2021 Rene Widera, Felix Schmitt, Axel Huebl # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/density.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/density.param index 9edc35a15e..bed7ea6308 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/density.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/density.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -25,22 +25,22 @@ namespace picongpu { -namespace SI -{ - /** Base density in particles per m^3 in the density profiles. - * - * This is often taken as reference maximum density in normalized profiles. - * Individual particle species can define a `densityRatio` flag relative - * to this value. - * - * unit: ELEMENTS/m^3 - */ - constexpr float_64 BASE_DENSITY_SI = 1.e25; -} + namespace SI + { + /** Base density in particles per m^3 in the density profiles. + * + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. + * + * unit: ELEMENTS/m^3 + */ + constexpr float_64 BASE_DENSITY_SI = 1.e25; + } // namespace SI -namespace densityProfiles -{ - /* definition of homogenous profile */ - using Homogenous = HomogenousImpl; -} -} + namespace densityProfiles + { + /* definition of homogenous profile */ + using Homogenous = HomogenousImpl; + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/dimension.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/dimension.param index 9f14baaec3..9cda9d9a01 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/dimension.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/dimension.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Rene Widera +/* Copyright 2014-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -20,7 +20,7 @@ #pragma once #ifndef PARAM_DIMENSION -#define PARAM_DIMENSION DIM3 +# define PARAM_DIMENSION DIM3 #endif #define SIMDIM PARAM_DIMENSION diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fieldSolver.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fieldSolver.param deleted file mode 100644 index c52f26f4f8..0000000000 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/fieldSolver.param +++ /dev/null @@ -1,82 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -/** @file - * - * Configure the field solver. - * - * Select the numerical Maxwell solver (e.g. Yee's method). - * - * Also allows to configure ad hoc mitigations for high frequency - * noise in some setups via current smoothing. - */ - -#pragma once - -#include "picongpu/fields/MaxwellSolver/Solvers.def" -#include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" - - -namespace picongpu -{ -namespace fields -{ - - /** Current Interpolation - * - * CurrentInterpolation is used to set a method performing the - * interpolate/assign operation from the generated currents of particle - * species to the electro-magnetic fields. - * - * Allowed values are: - * - None: - * - default for staggered grids/Yee-scheme - * - updates E - * - Binomial: 2nd order Binomial filter - * - smooths the current before assignment in staggered grid - * - updates E & breaks local charge conservation slightly - * - NoneDS: - * - experimental assignment for all-centered/directional splitting - * - updates E & B at the same time - */ -#ifndef PARAM_CURRENTINTERPOLATION -# define PARAM_CURRENTINTERPOLATION None -#endif - using CurrentInterpolation = currentInterpolation::PARAM_CURRENTINTERPOLATION; - - /** FieldSolver - * - * Field Solver Selection: - * - Yee< CurrentInterpolation >: standard Yee solver - * - Lehe< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction - * - YeePML< CurrentInterpolation >: standard Yee solver with PML absorber - * - DirSplitting< CurrentInterpolation >: Sentoku's Directional Splitting Method - * - None< CurrentInterpolation >: disable the vacuum update of E and B - */ - -#ifndef PARAM_FIELDSOLVER - /* WARNING: if you change field solver by hand please update your CELL_WIDTH_SI - * in `grid.param` to fulfill the convergence condition (CFL) - */ -# define PARAM_FIELDSOLVER Yee -#endif - using Solver = maxwellSolver::PARAM_FIELDSOLVER< CurrentInterpolation >; - -} // namespace fields -} // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/grid.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/grid.param index 26b100eb4c..ceffd4d2eb 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/grid.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch, * Benjamin Worpitz * * This file is part of PIConGPU. @@ -19,12 +19,10 @@ */ - #pragma once namespace picongpu { - namespace SI { /** Duration of one timestep @@ -33,27 +31,10 @@ namespace picongpu /** equals X * unit: meter */ -#define DirSplitting 1 -#if (PARAM_FIELDSOLVER == 1) - /* THIS CODE PATH IS ONLY USED IF `PARAM_FIELDSOLVER` IS CHANGED IN - * `cmakeFlags` and the field solver there is set to fieldSolverDirSplitting - * - * Directional Splitting requires a fixed ratio between dt and dx - * and in addition cubic cells. - * conditions: dX == dt * c - * dX == dY - * dX == dZ - */ - constexpr float_64 CELL_WIDTH_SI = DELTA_T_SI*SPEED_OF_LIGHT_SI; -#else - /* cell size for Yee solver (must fulfill CFL) - * WARNING: if you change the field solver in `componentsConfig` you - * have to change the CELL_SIZE in this code path - */ - constexpr float_64 CELL_WIDTH_SI = 9.34635e-8; -#endif -#undef DirSplitting + /** equals X + * unit: meter */ + constexpr float_64 CELL_WIDTH_SI = 9.34635e-8; /** equals Y * unit: meter */ constexpr float_64 CELL_HEIGHT_SI = CELL_WIDTH_SI; @@ -73,21 +54,21 @@ namespace picongpu * behave like the interaction of infinite "wire particles" * in fields with perfect symmetry in Z. */ - } //namespace SI + } // namespace SI //! Defines the size of the absorbing zone (in cells) constexpr uint32_t ABSORBER_CELLS[3][2] = { - {32, 32}, /*x direction [negative,positive]*/ - {32, 32}, /*y direction [negative,positive]*/ - {32, 32} /*z direction [negative,positive]*/ - }; //unit: number of cells + {32, 32}, /*x direction [negative,positive]*/ + {32, 32}, /*y direction [negative,positive]*/ + {32, 32} /*z direction [negative,positive]*/ + }; // unit: number of cells //! Define the strength of the absorber for any direction constexpr float_X ABSORBER_STRENGTH[3][2] = { {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ - {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ - }; //unit: none + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; // unit: none /** When to move the co-moving window. * An initial pseudo particle, flying with the speed of light, @@ -106,4 +87,4 @@ namespace picongpu */ constexpr float_64 movePoint = 0.90; -} +} // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param index 1a3ca4511a..c52ed07b1c 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/memory.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -31,78 +31,87 @@ #include #include +#include namespace picongpu { + /* We have to hold back 350MiB for gpu-internal operations: + * - random number generator + * - reduces + * - ... + */ + constexpr size_t reservedGpuMemorySize = 400 * 1024 * 1024; -/* We have to hold back 350MiB for gpu-internal operations: - * - random number generator - * - reduces - * - ... - */ -constexpr size_t reservedGpuMemorySize = 400 *1024*1024; + /* short namespace*/ + namespace mCT = pmacc::math::CT; + /** size of a superCell + * + * volume of a superCell must be <= 1024 + */ + using SuperCellSize = typename mCT::shrinkTo, simDim>::type; -/* short namespace*/ -namespace mCT = pmacc::math::CT; -/** size of a superCell - * - * volume of a superCell must be <= 1024 - */ -using SuperCellSize = typename mCT::shrinkTo< - mCT::Int< 8, 8, 4 >, - simDim ->::type; + /** define the object for mapping superCells to cells*/ + using MappingDesc = MappingDescription; -/** define the object for mapping superCells to cells*/ -using MappingDesc = MappingDescription< simDim, SuperCellSize >; + /** define the size of the core, border and guard area + * + * PIConGPU uses spatial domain-decomposition for parallelization + * over multiple devices with non-shared memory architecture. + * The global spatial domain is organized per device in three + * sections: the GUARD area contains copies of neighboring + * devices (also known as "halo"/"ghost"). + * The BORDER area is the outermost layer of cells of a device, + * equally to what neighboring devices see as GUARD area. + * The CORE area is the innermost area of a device. In union with + * the BORDER area it defines the "active" spatial domain on a device. + * + * GuardSize is defined in units of SuperCellSize per dimension. + */ + using GuardSize = typename mCT::shrinkTo, simDim>::type; -/** define the size of the core, border and guard area - * - * PIConGPU uses spatial domain-decomposition for parallelization - * over multiple devices with non-shared memory architecture. - * The global spatial domain is organized per device in three - * sections: the GUARD area contains copies of neighboring - * devices (also known as "halo"/"ghost"). - * The BORDER area is the outermost layer of cells of a device, - * equally to what neighboring devices see as GUARD area. - * The CORE area is the innermost area of a device. In union with - * the BORDER area it defines the "active" spatial domain on a device. - * - * GuardSize is defined in units of SuperCellSize per dimension. - */ -using GuardSize = typename mCT::shrinkTo< - mCT::Int< 1, 1, 1 >, - simDim ->::type; + /** bytes reserved for species exchange buffer + * + * This is the default configuration for species exchanges buffer sizes. + * The default exchange buffer sizes can be changed per species by adding + * the alias exchangeMemCfg with similar members like in DefaultExchangeMemCfg + * to its flag list. + */ + struct DefaultExchangeMemCfg + { + // memory used for a direction + static constexpr uint32_t BYTES_EXCHANGE_X = 2 * 1024 * 1024; // 2 MiB + static constexpr uint32_t BYTES_EXCHANGE_Y = 6 * 1024 * 1024; // 6 MiB + static constexpr uint32_t BYTES_EXCHANGE_Z = 2 * 1024 * 1024; // 2 MiB + static constexpr uint32_t BYTES_EDGES = 64 * 1024; // 64 kiB + static constexpr uint32_t BYTES_CORNER = 16 * 1024; // 16 kiB -/** bytes reserved for species exchange buffer - * - * This is the default configuration for species exchanges buffer sizes. - * The default exchange buffer sizes can be changed per species by adding - * the alias exchangeMemCfg with similar members like in DefaultExchangeMemCfg - * to its flag list. - */ -struct DefaultExchangeMemCfg -{ - // memory used for a direction - static constexpr uint32_t BYTES_EXCHANGE_X = 2 * 1024 * 1024; // 2 MiB - static constexpr uint32_t BYTES_EXCHANGE_Y = 6 * 1024 * 1024; // 6 MiB - static constexpr uint32_t BYTES_EXCHANGE_Z = 2 * 1024 * 1024; // 2 MiB - static constexpr uint32_t BYTES_EDGES = 64 * 1024; // 64 kiB - static constexpr uint32_t BYTES_CORNER = 16 * 1024; // 16 kiB -}; + /** Reference local domain size + * + * The size of the local domain for which the exchange sizes `BYTES_*` are configured for. + * The required size of each exchange will be calculated at runtime based on the local domain size and the + * reference size. The exchange size will be scaled only up and not down. Zero means that there is no reference + * domain size, exchanges will not be scaled. + */ + using REF_LOCAL_DOM_SIZE = mCT::Int<0, 0, 0>; + /** Scaling rate per direction. + * + * 1.0 means it scales linear with the ratio between the local domain size at runtime and the reference local + * domain size. + */ + const std::array DIR_SCALING_FACTOR = {{0.0, 0.0, 0.0}}; + }; -/** number of scalar fields that are reserved as temporary fields */ -constexpr uint32_t fieldTmpNumSlots = 1; + /** number of scalar fields that are reserved as temporary fields */ + constexpr uint32_t fieldTmpNumSlots = 1; -/** can `FieldTmp` gather neighbor information - * - * If `true` it is possible to call the method `asyncCommunicationGather()` - * to copy data from the border of neighboring GPU into the local guard. - * This is also known as building up a "ghost" or "halo" region in domain - * decomposition and only necessary for specific algorithms that extend - * the basic PIC cycle, e.g. with dependence on derived density or energy fields. - */ -constexpr bool fieldTmpSupportGatherCommunication = true; + /** can `FieldTmp` gather neighbor information + * + * If `true` it is possible to call the method `asyncCommunicationGather()` + * to copy data from the border of neighboring GPU into the local guard. + * This is also known as building up a "ghost" or "halo" region in domain + * decomposition and only necessary for specific algorithms that extend + * the basic PIC cycle, e.g. with dependence on derived density or energy fields. + */ + constexpr bool fieldTmpSupportGatherCommunication = true; } // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param index 32f98afb3a..8eddfb9d0c 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particle.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, * Richard Pausch * * This file is part of PIConGPU. @@ -28,86 +28,71 @@ namespace picongpu { -namespace particles -{ - namespace startPosition + namespace particles { - struct QuietParam25ppc + namespace startPosition { - /** Count of particles per cell per direction at initial state - * unit: none - */ - using numParticlesPerDimension = typename mCT::shrinkTo< - mCT::Int< - 5, - 5, - 1 - >, - simDim - >::type; - }; - using Quiet25ppc = QuietImpl< QuietParam25ppc >; - - } // namespace startPosition + struct QuietParam25ppc + { + /** Count of particles per cell per direction at initial state + * unit: none + */ + using numParticlesPerDimension = typename mCT::shrinkTo, simDim>::type; + }; + using Quiet25ppc = QuietImpl; - /** a particle with a weighting below MIN_WEIGHTING will not - * be created / will be deleted - * unit: none - */ - constexpr float_X MIN_WEIGHTING = 10.0; + } // namespace startPosition - /** During unit normalization, we assume this is a typical - * number of particles per cell for normalization of weighted - * particle attributes. - */ - constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = mCT::volume< - startPosition::QuietParam25ppc::numParticlesPerDimension - >::type::value; - -namespace manipulators -{ - - CONST_VECTOR(float_X,3,DriftParamPositive_direction,1.0,0.0,0.0); - struct DriftParamPositive - { - /** Initial particle drift velocity for electrons and ions - * Examples: - * - No drift is equal to 1.0 + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted * unit: none */ - static constexpr float_64 gamma = 1.021; - const DriftParamPositive_direction_t direction; - }; - using AssignXDriftPositive = unary::Drift< - DriftParamPositive, - nvidia::functors::Assign - >; + constexpr float_X MIN_WEIGHTING = 10.0; - CONST_VECTOR(float_X,3,DriftParamNegative_direction,-1.0,0.0,0.0); - struct DriftParamNegative - { - /** Initial particle drift velocity for electrons and ions - * Examples: - * - No drift is equal to 1.0 - * unit: none + /** During unit normalization, we assume this is a typical + * number of particles per cell for normalization of weighted + * particle attributes. */ - static constexpr float_64 gamma = 1.021; - const DriftParamNegative_direction_t direction; - }; - using AssignXDriftNegative = unary::Drift< - DriftParamNegative, - nvidia::functors::Assign - >; + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL + = mCT::volume::type::value; - struct TemperatureParam - { - /* Initial temperature - * unit: keV - */ - static constexpr float_64 temperature = 0.0005; - }; - using AddTemperature = unary::Temperature< TemperatureParam >; + namespace manipulators + { + CONST_VECTOR(float_X, 3, DriftParamPositive_direction, 1.0, 0.0, 0.0); + struct DriftParamPositive + { + /** Initial particle drift velocity for electrons and ions + * Examples: + * - No drift is equal to 1.0 + * unit: none + */ + static constexpr float_64 gamma = 1.021; + const DriftParamPositive_direction_t direction; + }; + using AssignXDriftPositive = unary::Drift; + + CONST_VECTOR(float_X, 3, DriftParamNegative_direction, -1.0, 0.0, 0.0); + struct DriftParamNegative + { + /** Initial particle drift velocity for electrons and ions + * Examples: + * - No drift is equal to 1.0 + * unit: none + */ + static constexpr float_64 gamma = 1.021; + const DriftParamNegative_direction_t direction; + }; + using AssignXDriftNegative = unary::Drift; + + struct TemperatureParam + { + /* Initial temperature + * unit: keV + */ + static constexpr float_64 temperature = 0.0005; + }; + using AddTemperature = unary::Temperature; -} // namespace manipulators -} // namespace particles + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particleFilters.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particleFilters.param index 9186d273c3..03c6e5d08c 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particleFilters.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/particleFilters.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -41,87 +41,79 @@ namespace picongpu { -namespace particles -{ -namespace filter -{ - struct IfRelativeGlobalPositionParamLowQuarterPosition - { - /* lowerBound is included in the range */ - static constexpr float_X lowerBound = 0.0; - /* upperBound is excluded in the range */ - static constexpr float_X upperBound = 0.25; - /* dimension for the filter - * x = 0; y= 1; z = 2 - */ - static constexpr uint32_t dimension = 1u; - - // filter name - static constexpr char const * name = "lowerQuarterYPosition"; - }; - - using LowerQuarterYPosition = filter::RelativeGlobalDomainPosition< - IfRelativeGlobalPositionParamLowQuarterPosition - >; - - struct IfRelativeGlobalPositionParamMiddleHalf + namespace particles { - /* lowerBound is included in the range */ - static constexpr float_X lowerBound = 0.25; - /* upperBound is excluded in the range */ - static constexpr float_X upperBound = 0.75; - /* dimension for the filter - * x = 0; y= 1; z = 2 - */ - static constexpr uint32_t dimension = 1u; - - // filter name - static constexpr char const * name = "middleHalfYPosition"; - }; - - using MiddleHalfYPosition = filter::RelativeGlobalDomainPosition< - IfRelativeGlobalPositionParamMiddleHalf - >; - - struct IfRelativeGlobalPositionParamUpperQuarter - { - /* lowerBound is included in the range */ - static constexpr float_X lowerBound = 0.75; - /* upperBound is excluded in the range */ - static constexpr float_X upperBound = 1.0; - /* dimension for the filter - * x = 0; y= 1; z = 2 - */ - static constexpr uint32_t dimension = 1u; - - // filter name - static constexpr char const * name = "upperQuarterYPosition"; - }; - - using UpperQuarterYPosition = filter::RelativeGlobalDomainPosition< - IfRelativeGlobalPositionParamUpperQuarter - >; - - /** Plugins: collection of all available particle filters - * - * Create a list of all filters here that you want to use in plugins. - * - * Note: filter All is defined in picongpu/particles/filter/filter.def - */ - using AllParticleFilters = MakeSeq_t< - All, - LowerQuarterYPosition, - MiddleHalfYPosition, - UpperQuarterYPosition - >; - -} // namespace filter - -namespace traits -{ - /* if needed for generic "free" filters, - * place `SpeciesEligibleForSolver` traits for filters here - */ -} // namespace traits -} // namespace particles + namespace filter + { + struct IfRelativeGlobalPositionParamLowQuarterPosition + { + /* lowerBound is included in the range */ + static constexpr float_X lowerBound = 0.0; + /* upperBound is excluded in the range */ + static constexpr float_X upperBound = 0.25; + /* dimension for the filter + * x = 0; y= 1; z = 2 + */ + static constexpr uint32_t dimension = 1u; + + // filter name + static constexpr char const* name = "lowerQuarterYPosition"; + }; + + using LowerQuarterYPosition + = filter::RelativeGlobalDomainPosition; + + struct IfRelativeGlobalPositionParamMiddleHalf + { + /* lowerBound is included in the range */ + static constexpr float_X lowerBound = 0.25; + /* upperBound is excluded in the range */ + static constexpr float_X upperBound = 0.75; + /* dimension for the filter + * x = 0; y= 1; z = 2 + */ + static constexpr uint32_t dimension = 1u; + + // filter name + static constexpr char const* name = "middleHalfYPosition"; + }; + + using MiddleHalfYPosition = filter::RelativeGlobalDomainPosition; + + struct IfRelativeGlobalPositionParamUpperQuarter + { + /* lowerBound is included in the range */ + static constexpr float_X lowerBound = 0.75; + /* upperBound is excluded in the range */ + static constexpr float_X upperBound = 1.0; + /* dimension for the filter + * x = 0; y= 1; z = 2 + */ + static constexpr uint32_t dimension = 1u; + + // filter name + static constexpr char const* name = "upperQuarterYPosition"; + }; + + using UpperQuarterYPosition + = filter::RelativeGlobalDomainPosition; + + /** Plugins: collection of all available particle filters + * + * Create a list of all filters here that you want to use in plugins. + * + * Note: filter All is defined in picongpu/particles/filter/filter.def + */ + using AllParticleFilters + = MakeSeq_t; + + } // namespace filter + + namespace traits + { + /* if needed for generic "free" filters, + * place `SpeciesEligibleForSolver` traits for filters here + */ + } // namespace traits + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/png.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/png.param index 39108d20f8..b6720b7330 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/png.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/png.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -51,32 +51,31 @@ namespace picongpu // multiply highest undisturbed particle density with factor constexpr float_X preParticleDens_opacity = 0.25; - constexpr float_X preChannel1_opacity = 1.0; - constexpr float_X preChannel2_opacity = 1.0; - constexpr float_X preChannel3_opacity = 1.0; + constexpr float_X preChannel1_opacity = 1.0; + constexpr float_X preChannel2_opacity = 1.0; + constexpr float_X preChannel3_opacity = 1.0; // specify color scales for each channel namespace preParticleDensCol = colorScales::red; - namespace preChannel1Col = colorScales::blue; - namespace preChannel2Col = colorScales::green; - namespace preChannel3Col = colorScales::none; + namespace preChannel1Col = colorScales::blue; + namespace preChannel2Col = colorScales::green; + namespace preChannel3Col = colorScales::none; /* png preview settings for each channel */ - DINLINE float_X preChannel1 ( const float3_X& field_B, const float3_X& field_E, const float3_X& field_J ) + DINLINE float_X preChannel1(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) { return field_B.z(); } - DINLINE float_X preChannel2 ( const float3_X& field_B, const float3_X& field_E, const float3_X& field_J ) + DINLINE float_X preChannel2(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) { return -1.0_X * field_B.z(); } - DINLINE float_X preChannel3 ( const float3_X& field_B, const float3_X& field_E, const float3_X& field_J ) + DINLINE float_X preChannel3(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) { return 1.0_X; } - } - -} + } // namespace visPreview +} // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/radiation.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/radiation.param index 13b21aece9..226d40ea40 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/radiation.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/radiation.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch +/* Copyright 2013-2021 Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -18,13 +18,12 @@ */ - #pragma once - /* - radiation verbose level: - 0=nothing, 1=physics, 2=simulation_state, 4=memory, 8=critical - */ +/* + radiation verbose level: + 0=nothing, 1=physics, 2=simulation_state, 4=memory, 8=critical +*/ #define PIC_VERBOSE_RADIATION 3 @@ -36,145 +35,161 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ -namespace linear_frequencies -{ -namespace SI -{ -constexpr float_64 omega_min = 0.0; -constexpr float_64 omega_max = 1.06e16; -} - -constexpr unsigned int N_omega = 1024; // number of frequencies -} - -namespace log_frequencies -{ -namespace SI -{ -// plasma omega = sqrt( (electron density * (1.6e-19)^2) / (8.854e-12 * 9.11e-31) ) -// = 1.78e14 1/s -constexpr float_64 omega_pe = 1.78e14; -constexpr float_64 omega_min = 0.1 * omega_pe; -constexpr float_64 omega_max = 200 * omega_pe; -} - -constexpr unsigned int N_omega = 1024; // number of frequencies -} - - -namespace frequencies_from_list -{ -/** path to text file with frequencies */ -constexpr const char * listLocation = "/path/to/frequency.list"; -constexpr unsigned int N_omega = 2048; // number of frequencies -} - - -namespace radiation_frequencies = log_frequencies; - - -namespace radiationNyquist -{ - constexpr float_32 NyquistFactor = 0.5; -} - -/////////////////////////////////////////////////// - - - // correct treatment of coherent and incoherent radiation from macroparticles - /* Choose different form factors in order to consider different particle shapes for radiation - * - radFormFactor_CIC_3D ... CIC charge distribution - * - radFormFactor_TSC_3D ... TSC charge distribution - * - radFormFactor_PCS_3D ... PCS charge distribution - * - radFormFactor_CIC_1Dy ... only CIC charge distribution in y - * - radFormFactor_Gauss_spherical ... symmetric Gauss charge distribution - * - radFormFactor_Gauss_cell ... Gauss charge distribution according to cell size - * - radFormFactor_incoherent ... only incoherent radiation - * - radFormFactor_coherent ... only coherent radiation - */ - namespace radFormFactor_CIC_3D { } - namespace radFormFactor_TSC_3D { } - namespace radFormFactor_PCS_3D { } - namespace radFormFactor_CIC_1Dy { } - namespace radFormFactor_Gauss_spherical { } - namespace radFormFactor_Gauss_cell { } - namespace radFormFactor_incoherent { } - namespace radFormFactor_coherent { } - - namespace radFormFactor = radFormFactor_Gauss_spherical; - - -/////////////////////////////////////////////////////////// - - -namespace parameters -{ - - -constexpr unsigned int N_observer = 256; // number of looking directions - -} /* end namespace parameters */ - - /** activate particles for radiation */ - struct GammaFilterFunctor - { - static constexpr float_X radiationGamma = 5.0; - - template< typename T_Particle > - HDINLINE void operator()( T_Particle& particle ) - { - if( - picongpu::gamma( - particle[ picongpu::momentum_ ], - picongpu::traits::attribute::getMass( - particle[ picongpu::weighting_ ], - particle - ) - ) >= radiationGamma - ) - particle[ picongpu::radiationMask_ ] = true; - } - }; - - - /* filter to enable radiation for electrons - * - * to enable the filter: - * - goto file `speciesDefinition.param` - * - add the attribute `radiationMask` to the electron species - */ - using RadiationParticleFilter = picongpu::particles::manipulators::generic::Free< - GammaFilterFunctor - >; - - - -////////////////////////////////////////////////// - - -// add a window function weighting to the radiation in order -// to avoid ringing effects from sharpe boundaries -// default: no window function via `radWindowFunctionNone` - -/* Choose different window function in order to get better ringing reduction - * radWindowFunctionTriangle - * radWindowFunctionHamming - * radWindowFunctionTriplett - * radWindowFunctionGauss - * radWindowFunctionNone - */ -namespace radWindowFunctionTriangle { } -namespace radWindowFunctionHamming { } -namespace radWindowFunctionTriplett { } -namespace radWindowFunctionGauss { } -namespace radWindowFunctionNone { } - -namespace radWindowFunction = radWindowFunctionTriangle; - -} // namespace radiation -} // namespace plugins + namespace plugins + { + namespace radiation + { + namespace linear_frequencies + { + namespace SI + { + constexpr float_64 omega_min = 0.0; + constexpr float_64 omega_max = 1.06e16; + } // namespace SI + + constexpr unsigned int N_omega = 1024; // number of frequencies + } // namespace linear_frequencies + + namespace log_frequencies + { + namespace SI + { + // plasma omega = sqrt( (electron density * (1.6e-19)^2) / (8.854e-12 * 9.11e-31) ) + // = 1.78e14 1/s + constexpr float_64 omega_pe = 1.78e14; + constexpr float_64 omega_min = 0.1 * omega_pe; + constexpr float_64 omega_max = 200 * omega_pe; + } // namespace SI + + constexpr unsigned int N_omega = 1024; // number of frequencies + } // namespace log_frequencies + + + namespace frequencies_from_list + { + /** path to text file with frequencies */ + constexpr const char* listLocation = "/path/to/frequency.list"; + constexpr unsigned int N_omega = 2048; // number of frequencies + } // namespace frequencies_from_list + + + namespace radiation_frequencies = log_frequencies; + + + namespace radiationNyquist + { + constexpr float_32 NyquistFactor = 0.5; + } + + /////////////////////////////////////////////////// + + + // correct treatment of coherent and incoherent radiation from macroparticles + /* Choose different form factors in order to consider different particle shapes for radiation + * - radFormFactor_CIC_3D ... CIC charge distribution + * - radFormFactor_TSC_3D ... TSC charge distribution + * - radFormFactor_PCS_3D ... PCS charge distribution + * - radFormFactor_CIC_1Dy ... only CIC charge distribution in y + * - radFormFactor_Gauss_spherical ... symmetric Gauss charge distribution + * - radFormFactor_Gauss_cell ... Gauss charge distribution according to cell size + * - radFormFactor_incoherent ... only incoherent radiation + * - radFormFactor_coherent ... only coherent radiation + */ + namespace radFormFactor_CIC_3D + { + } + namespace radFormFactor_TSC_3D + { + } + namespace radFormFactor_PCS_3D + { + } + namespace radFormFactor_CIC_1Dy + { + } + namespace radFormFactor_Gauss_spherical + { + } + namespace radFormFactor_Gauss_cell + { + } + namespace radFormFactor_incoherent + { + } + namespace radFormFactor_coherent + { + } + + namespace radFormFactor = radFormFactor_Gauss_spherical; + + + /////////////////////////////////////////////////////////// + + + namespace parameters + { + constexpr unsigned int N_observer = 256; // number of looking directions + + } /* end namespace parameters */ + + /** activate particles for radiation */ + struct GammaFilterFunctor + { + static constexpr float_X radiationGamma = 5.0; + + template + HDINLINE void operator()(T_Particle& particle) + { + if(picongpu::gamma( + particle[picongpu::momentum_], + picongpu::traits::attribute::getMass(particle[picongpu::weighting_], particle)) + >= radiationGamma) + particle[picongpu::radiationMask_] = true; + } + }; + + + /* filter to enable radiation for electrons + * + * to enable the filter: + * - goto file `speciesDefinition.param` + * - add the attribute `radiationMask` to the electron species + */ + using RadiationParticleFilter = picongpu::particles::manipulators::generic::Free; + + + ////////////////////////////////////////////////// + + + // add a window function weighting to the radiation in order + // to avoid ringing effects from sharpe boundaries + // default: no window function via `radWindowFunctionNone` + + /* Choose different window function in order to get better ringing reduction + * radWindowFunctionTriangle + * radWindowFunctionHamming + * radWindowFunctionTriplett + * radWindowFunctionGauss + * radWindowFunctionNone + */ + namespace radWindowFunctionTriangle + { + } + namespace radWindowFunctionHamming + { + } + namespace radWindowFunctionTriplett + { + } + namespace radWindowFunctionGauss + { + } + namespace radWindowFunctionNone + { + } + + namespace radWindowFunction = radWindowFunctionTriangle; + + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/radiationObserver.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/radiationObserver.param index 89be653fc6..c66d133b22 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/radiationObserver.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/radiationObserver.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -22,64 +22,62 @@ namespace picongpu { -namespace plugins -{ -namespace radiation -{ -namespace radiation_observer -{ - /** Compute observation angles - * - * This function is used in the Radiation plug-in kernel to compute - * the observation directions given as a unit vector pointing - * towards a 'virtual' detector - * - * @param observation_id_extern - * int index that identifies each block on the GPU - * to compute the observation direction - * - * @return unit vector pointing in observation direction - * type: vector_64 - * - */ - HDINLINE vector_64 observation_direction(const int observation_id_extern) + namespace plugins { - /** This computes observation directions for one octant - * of a sphere around the simulation area. - * The axises of the octant point towards: - * (+1,0,0) ; (0,+1,0) ; (0,0,-1) - */ - - /* generate two indices from single block index */ - constexpr int N_angle_split = 16; /* index split distance */ - /* get column index for computing angle theta: */ - const int my_index_theta = observation_id_extern / N_angle_split; - /* get row index for computing angle phi: */ - const int my_index_phi = observation_id_extern % N_angle_split; - - /* range for BOTH angles */ - constexpr picongpu::float_64 angle_range= picongpu::PI/2.0; + namespace radiation + { + namespace radiation_observer + { + /** Compute observation angles + * + * This function is used in the Radiation plug-in kernel to compute + * the observation directions given as a unit vector pointing + * towards a 'virtual' detector + * + * @param observation_id_extern + * int index that identifies each block on the GPU + * to compute the observation direction + * + * @return unit vector pointing in observation direction + * type: vector_64 + * + */ + HDINLINE vector_64 observation_direction(const int observation_id_extern) + { + /** This computes observation directions for one octant + * of a sphere around the simulation area. + * The axises of the octant point towards: + * (+1,0,0) ; (0,+1,0) ; (0,0,-1) + */ - /* angle stepwidth for BOTH angles */ - constexpr picongpu::float_64 delta_angle = 1.0 * angle_range / (N_angle_split-1); + /* generate two indices from single block index */ + constexpr int N_angle_split = 16; /* index split distance */ + /* get column index for computing angle theta: */ + const int my_index_theta = observation_id_extern / N_angle_split; + /* get row index for computing angle phi: */ + const int my_index_phi = observation_id_extern % N_angle_split; - /* compute both angles */ - const picongpu::float_64 theta( my_index_theta * delta_angle + 0.5*picongpu::PI ); - const picongpu::float_64 phi( my_index_phi * delta_angle ); + /* range for BOTH angles */ + constexpr picongpu::float_64 angle_range = picongpu::PI / 2.0; - /* compute unit vector */ - picongpu::float_32 sinPhi; - picongpu::float_32 cosPhi; - picongpu::float_32 sinTheta; - picongpu::float_32 cosTheta; - math::sincos(precisionCast(phi), sinPhi, cosPhi); - math::sincos(precisionCast(theta), sinTheta, cosTheta); - return vector_64( sinTheta*cosPhi , sinTheta*sinPhi , cosTheta ) ; + /* angle stepwidth for BOTH angles */ + constexpr picongpu::float_64 delta_angle = 1.0 * angle_range / (N_angle_split - 1); + /* compute both angles */ + const picongpu::float_64 theta(my_index_theta * delta_angle + 0.5 * picongpu::PI); + const picongpu::float_64 phi(my_index_phi * delta_angle); - } + /* compute unit vector */ + picongpu::float_32 sinPhi; + picongpu::float_32 cosPhi; + picongpu::float_32 sinTheta; + picongpu::float_32 cosTheta; + pmacc::math::sincos(precisionCast(phi), sinPhi, cosPhi); + pmacc::math::sincos(precisionCast(theta), sinTheta, cosTheta); + return vector_64(sinTheta * cosPhi, sinTheta * sinPhi, cosTheta); + } -} // namespace radiation_observer -} // namespace radiation -} // namespace plugins + } // namespace radiation_observer + } // namespace radiation + } // namespace plugins } // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/species.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/species.param deleted file mode 100644 index c7e25d4840..0000000000 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/species.param +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright 2014-2020 Rene Widera, Richard Pausch - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/particles/shapes.hpp" -#include "picongpu/algorithms/FieldToParticleInterpolationNative.hpp" -#include "picongpu/algorithms/FieldToParticleInterpolation.hpp" -#include "picongpu/algorithms/AssignedTrilinearInterpolation.hpp" - -#include "picongpu/particles/flylite/NonLTE.def" -#include "picongpu/fields/currentDeposition/Solver.def" - - -namespace picongpu -{ -/*---------------------------- generic solver---------------------------------*/ - -/*! Particle Shape definitions ------------------------------------------------- - * - particles::shapes::CIC : 1st order - * - particles::shapes::TSC : 2nd order - * - particles::shapes::PCS : 3rd order - * - particles::shapes::P4S : 4th order - * - * example: using UsedParticleShape = particles::shapes::CIC; - */ -#ifndef PARAM_PARTICLESHAPE -#define PARAM_PARTICLESHAPE TSC -#endif -using UsedParticleShape = particles::shapes::PARAM_PARTICLESHAPE; - -/* define which interpolation method is used to interpolate fields to particle*/ -using UsedField2Particle = FieldToParticleInterpolation< UsedParticleShape, AssignedTrilinearInterpolation >; - -/*! select current solver method ----------------------------------------------- - * - currentSolver::Esirkepov : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - currentSolver::VillaBune<> : particle shapes - CIC (1st order) only - * - currentSolver::EmZ : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - * For development purposes: --------------------------------------------------- - * - currentSolver::EsirkepovNative : generic version of currentSolverEsirkepov - * without optimization (~4x slower and needs more shared memory) - */ -#ifndef PARAM_CURRENTSOLVER -#define PARAM_CURRENTSOLVER Esirkepov -#endif -using UsedParticleCurrentSolver = currentSolver::PARAM_CURRENTSOLVER; - -/*! particle pusher configuration ---------------------------------------------- - * - * Defining a pusher is optional for particles - * - * - particles::pusher::Vay : better suited relativistic boris pusher - * - particles::pusher::Boris : standard boris pusher - * - particles::pusher::ReducedLandauLifshitz : 4th order RungeKutta pusher - * with classical radiation reaction - * - * For diagnostics & modeling: ------------------------------------------------ - * - particles::pusher::Free : free propagation, ignore fields - * (= free stream model) - * - particles::pusher::Photon : propagate with c in direction of normalized mom. - * - particles::pusher::Probe : Probe particles that interpolate E & B - * For development purposes: -------------------------------------------------- - * - particles::pusher::Axel : a pusher developed at HZDR during 2011 (testing) - */ -using UsedParticlePusher = particles::pusher::Boris; - -} // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesDefinition.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesDefinition.param index 10fd332f24..d46409ee79 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesDefinition.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesDefinition.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Heiko Burau +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Heiko Burau * * This file is part of PIConGPU. * @@ -30,89 +30,76 @@ #ifndef PARAM_RADIATION - /* disable radiation calculation */ -# define PARAM_RADIATION 0 +/* disable radiation calculation */ +# define PARAM_RADIATION 0 #endif namespace picongpu { + /*########################### define particle attributes #####################*/ + + /** describe attributes of a particle*/ + using DefaultParticleAttributes = MakeSeq_t< + position, + momentum, + weighting +#if(PARAM_RADIATION == 1) + , + momentumPrev1 +#endif + >; -/*########################### define particle attributes #####################*/ + /*########################### end particle attributes ########################*/ -/** describe attributes of a particle*/ -using DefaultParticleAttributes = MakeSeq_t< - position< position_pic >, - momentum, - weighting -#if( PARAM_RADIATION == 1 ) - , momentumPrev1 -#endif ->; - -/*########################### end particle attributes ########################*/ - -/*########################### define species #################################*/ - -/*--------------------------- electrons --------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioElectrons, 1.0 ); -value_identifier( float_X, ChargeRatioElectrons, 1.0 ); - -using ParticleFlagsElectrons = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioElectrons >, - chargeRatio< ChargeRatioElectrons > ->; - -/* define species electrons */ -using PIC_Electrons = Particles< - PMACC_CSTRING( "e" ), - ParticleFlagsElectrons, - DefaultParticleAttributes ->; - -/*--------------------------- ions -------------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioIons, 1836.152672 ); -value_identifier( float_X, ChargeRatioIons, -1.0 ); - -/* ratio relative to BASE_DENSITY */ -value_identifier( float_X, DensityRatioIons, 1.0 ); - -using ParticleFlagsIons = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioIons >, - chargeRatio< ChargeRatioIons >, - densityRatio< DensityRatioIons >, - atomicNumbers< ionization::atomicNumbers::Hydrogen_t > ->; - -/* define species ions */ -using PIC_Ions = Particles< - PMACC_CSTRING( "i" ), - ParticleFlagsIons, - DefaultParticleAttributes ->; - -/*########################### end species ####################################*/ - -/** All known particle species of the simulation - * - * List all defined particle species from above in this list - * to make them available to the PIC algorithm. - */ -using VectorAllSpecies = MakeSeq_t< - PIC_Electrons, - PIC_Ions ->; + /*########################### define species #################################*/ + + /*--------------------------- electrons --------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + + using ParticleFlagsElectrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio>; + + /* define species electrons */ + using PIC_Electrons = Particles; + + /*--------------------------- ions -------------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioIons, 1836.152672); + value_identifier(float_X, ChargeRatioIons, -1.0); + + /* ratio relative to BASE_DENSITY */ + value_identifier(float_X, DensityRatioIons, 1.0); + + using ParticleFlagsIons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio, + densityRatio, + atomicNumbers>; + + /* define species ions */ + using PIC_Ions = Particles; + + /*########################### end species ####################################*/ + + /** All known particle species of the simulation + * + * List all defined particle species from above in this list + * to make them available to the PIC algorithm. + */ + using VectorAllSpecies = MakeSeq_t; } // namespace picongpu diff --git a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param index f8e5bc5a9c..304410c368 100644 --- a/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param +++ b/share/picongpu/examples/KelvinHelmholtz/include/picongpu/param/speciesInitialization.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,57 +33,22 @@ namespace picongpu { -namespace particles -{ - /** InitPipeline define in which order species are initialized - * - * the functors are called in order (from first to last functor) - */ - using InitPipeline = bmpl::vector< - CreateDensity< - densityProfiles::Homogenous, - startPosition::Quiet25ppc, - PIC_Electrons - >, - Derive< - PIC_Electrons, - PIC_Ions - >, - Manipulate< - manipulators::AssignXDriftPositive, - PIC_Ions, - filter::LowerQuarterYPosition - >, - Manipulate< - manipulators::AssignXDriftNegative, - PIC_Ions, - filter::MiddleHalfYPosition - >, - Manipulate< - manipulators::AssignXDriftPositive, - PIC_Ions, - filter::UpperQuarterYPosition - >, - Manipulate< - manipulators::AssignXDriftPositive, - PIC_Electrons, - filter::LowerQuarterYPosition - >, - Manipulate< - manipulators::AssignXDriftNegative, - PIC_Electrons, - filter::MiddleHalfYPosition - >, - Manipulate< - manipulators::AssignXDriftPositive, - PIC_Electrons, - filter::UpperQuarterYPosition - >, - Manipulate< - manipulators::AddTemperature, - PIC_Electrons - > - >; + namespace particles + { + /** InitPipeline define in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< + CreateDensity, + Derive, + Manipulate, + Manipulate, + Manipulate, + Manipulate, + Manipulate, + Manipulate, + Manipulate>; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/LaserWakefield/cmakeFlags b/share/picongpu/examples/LaserWakefield/cmakeFlags index d25c78758a..250ade041c 100755 --- a/share/picongpu/examples/LaserWakefield/cmakeFlags +++ b/share/picongpu/examples/LaserWakefield/cmakeFlags @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # @@ -30,15 +30,8 @@ # - increase by 1, no gaps flags[0]="" -flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Lehe;-DPARAM_PARTICLEPUSHER=Vay'" -flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=Esirkepov;-DPARAM_PARTICLESHAPE=CIC'" -flags[3]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=VillaBune;-DPARAM_PARTICLESHAPE=CIC'" -flags[4]="-DPARAM_OVERWRITES:LIST='-DPARAM_PRECISION=precision64Bit'" -flags[5]="-DPARAM_OVERWRITES:LIST='-DPARAM_DIMENSION=DIM2'" -flags[6]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=Esirkepov;-DPARAM_PARTICLESHAPE=CIC;-DPARAM_DIMENSION=DIM2'" -flags[7]="-DPARAM_OVERWRITES:LIST='-DPARAM_PRECISION=precision64Bit;-DPARAM_DIMENSION=DIM2'" -flags[8]="-DPARAM_OVERWRITES:LIST='-DPARAM_DIMENSION=DIM2'" -flags[9]="-DPARAM_OVERWRITES:LIST='-DPARAM_IONS=1;-DPARAM_IONIZATION=1'" +flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_DIMENSION=DIM2'" +flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_IONS=1;-DPARAM_IONIZATION=1'" ################################################################################ # execution diff --git a/share/picongpu/examples/LaserWakefield/etc/picongpu/1.cfg b/share/picongpu/examples/LaserWakefield/etc/picongpu/1.cfg index 2b0968084d..1a2163a8c3 100644 --- a/share/picongpu/examples/LaserWakefield/etc/picongpu/1.cfg +++ b/share/picongpu/examples/LaserWakefield/etc/picongpu/1.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, Franz Poeschel # # This file is part of PIConGPU. # @@ -63,10 +63,12 @@ TBG_e_PSypy="--e_phaseSpace.period 100 \ --e_phaseSpace.min -1.0 --e_phaseSpace.max 1.0 \ --e_phaseSpace.filter all" -# HDF5 raw data output (DISABLED, add to TBG_plugins below to ENABLE!) -TBG_hdf5="--hdf5.period 100 \ - --hdf5.file simData \ - --hdf5.source 'species_all,fields_all'" +TBG_openPMD="--openPMD.period 100 \ + --openPMD.file simData \ + --openPMD.ext bp \ + --checkpoint.backend openPMD \ + --checkpoint.period 100 + --checkpoint.restart.backend openPMD" # macro particle counter (electrons, debug information for memory) TBG_e_macroCount="--e_macroParticlesCount.period 100" @@ -74,7 +76,8 @@ TBG_e_macroCount="--e_macroParticlesCount.period 100" TBG_plugins="!TBG_pngYX \ !TBG_e_histogram \ !TBG_e_PSypy \ - !TBG_e_macroCount" + !TBG_e_macroCount \ + !TBG_openPMD" ################################# ## Section: Program Parameters ## diff --git a/share/picongpu/examples/LaserWakefield/etc/picongpu/16.cfg b/share/picongpu/examples/LaserWakefield/etc/picongpu/16.cfg index 3c05294d63..304bc558f7 100644 --- a/share/picongpu/examples/LaserWakefield/etc/picongpu/16.cfg +++ b/share/picongpu/examples/LaserWakefield/etc/picongpu/16.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/LaserWakefield/etc/picongpu/1_isaac.cfg b/share/picongpu/examples/LaserWakefield/etc/picongpu/1_isaac.cfg index 4f3e2bb7e2..f530b53993 100644 --- a/share/picongpu/examples/LaserWakefield/etc/picongpu/1_isaac.cfg +++ b/share/picongpu/examples/LaserWakefield/etc/picongpu/1_isaac.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/LaserWakefield/etc/picongpu/32.cfg b/share/picongpu/examples/LaserWakefield/etc/picongpu/32.cfg index c34e36cf02..af81abff70 100644 --- a/share/picongpu/examples/LaserWakefield/etc/picongpu/32.cfg +++ b/share/picongpu/examples/LaserWakefield/etc/picongpu/32.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/LaserWakefield/etc/picongpu/4.cfg b/share/picongpu/examples/LaserWakefield/etc/picongpu/4.cfg index dac3e42c1f..7595db871b 100644 --- a/share/picongpu/examples/LaserWakefield/etc/picongpu/4.cfg +++ b/share/picongpu/examples/LaserWakefield/etc/picongpu/4.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, Franz Poeschel # # This file is part of PIConGPU. # @@ -64,10 +64,12 @@ TBG_e_PSypy="--e_phaseSpace.period 100 \ --e_phaseSpace.min -1.0 --e_phaseSpace.max 1.0 \ --e_phaseSpace.filter all" -# HDF5 raw data output (DISABLED, add to TBG_plugins below to ENABLE!) -TBG_hdf5="--hdf5.period 100 \ - --hdf5.file simData \ - --hdf5.source 'species_all,fields_all'" +TBG_openPMD="--openPMD.period 100 \ + --openPMD.file simData \ + --openPMD.ext bp \ + --checkpoint.backend openPMD \ + --checkpoint.period 100 + --checkpoint.restart.backend openPMD" # macro particle counter (electrons, debug information for memory) TBG_e_macroCount="--e_macroParticlesCount.period 100" @@ -75,7 +77,8 @@ TBG_e_macroCount="--e_macroParticlesCount.period 100" TBG_plugins="!TBG_pngYX \ !TBG_e_histogram \ !TBG_e_PSypy \ - !TBG_e_macroCount" + !TBG_e_macroCount \ + !TBG_openPMD" ################################# diff --git a/share/picongpu/examples/LaserWakefield/etc/picongpu/4_gui.cfg b/share/picongpu/examples/LaserWakefield/etc/picongpu/4_gui.cfg index 115b3eb369..15453cda90 100644 --- a/share/picongpu/examples/LaserWakefield/etc/picongpu/4_gui.cfg +++ b/share/picongpu/examples/LaserWakefield/etc/picongpu/4_gui.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/LaserWakefield/etc/picongpu/4_isaac.cfg b/share/picongpu/examples/LaserWakefield/etc/picongpu/4_isaac.cfg index fdcabb2781..6e76ba627a 100644 --- a/share/picongpu/examples/LaserWakefield/etc/picongpu/4_isaac.cfg +++ b/share/picongpu/examples/LaserWakefield/etc/picongpu/4_isaac.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/LaserWakefield/etc/picongpu/8.cfg b/share/picongpu/examples/LaserWakefield/etc/picongpu/8.cfg index 105b4f0b9e..0039e71d07 100644 --- a/share/picongpu/examples/LaserWakefield/etc/picongpu/8.cfg +++ b/share/picongpu/examples/LaserWakefield/etc/picongpu/8.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, Franz Poeschel # # This file is part of PIConGPU. # @@ -64,10 +64,12 @@ TBG_e_PSypy="--e_phaseSpace.period 100 \ --e_phaseSpace.min -1.0 --e_phaseSpace.max 1.0 \ --e_phaseSpace.filter all" -# HDF5 raw data output (DISABLED, add to TBG_plugins below to ENABLE!) -TBG_hdf5="--hdf5.period 100 \ - --hdf5.file simData \ - --hdf5.source 'species_all,fields_all'" +TBG_openPMD="--openPMD.period 100 \ + --openPMD.file simData \ + --openPMD.ext bp \ + --checkpoint.backend openPMD \ + --checkpoint.period 100 + --checkpoint.restart.backend openPMD" # macro particle counter (electrons, debug information for memory) TBG_e_macroCount="--e_macroParticlesCount.period 100" @@ -75,7 +77,8 @@ TBG_e_macroCount="--e_macroParticlesCount.period 100" TBG_plugins="!TBG_pngYX \ !TBG_e_histogram \ !TBG_e_PSypy \ - !TBG_e_macroCount" + !TBG_e_macroCount \ + !TBG_openPMD" ################################# ## Section: Program Parameters ## diff --git a/share/picongpu/examples/LaserWakefield/etc/picongpu/8_isaac.cfg b/share/picongpu/examples/LaserWakefield/etc/picongpu/8_isaac.cfg index 2215846be1..0bfacd9b13 100644 --- a/share/picongpu/examples/LaserWakefield/etc/picongpu/8_isaac.cfg +++ b/share/picongpu/examples/LaserWakefield/etc/picongpu/8_isaac.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/density.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/density.param index 8122542894..4729dcb4ef 100644 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/density.param +++ b/share/picongpu/examples/LaserWakefield/include/picongpu/param/density.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, * Richard Pausch, Marco Garten * * This file is part of PIConGPU. @@ -26,59 +26,57 @@ namespace picongpu { -namespace SI -{ - /** Base density in particles per m^3 in the density profiles. - * - * This is often taken as reference maximum density in normalized profiles. - * Individual particle species can define a `densityRatio` flag relative - * to this value. - * - * unit: ELEMENTS/m^3 - */ + namespace SI + { + /** Base density in particles per m^3 in the density profiles. + * + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. + * + * unit: ELEMENTS/m^3 + */ #ifndef PARAM_BASE_DENSITY_SI -# define PARAM_BASE_DENSITY_SI 1.e25 +# define PARAM_BASE_DENSITY_SI 1.e25 #endif - constexpr float_64 BASE_DENSITY_SI = PARAM_BASE_DENSITY_SI; -} + constexpr float_64 BASE_DENSITY_SI = PARAM_BASE_DENSITY_SI; + } // namespace SI -namespace densityProfiles -{ - PMACC_STRUCT(GaussianParameter, - /** Profile Formula: - * constexpr float_X exponent = abs((y - gasCenter_SI) / gasSigma_SI); - * constexpr float_X density = exp(gasFactor * pow(exponent, gasPower)); - * - * takes `gasCenterLeft_SI for y < gasCenterLeft_SI`, - * `gasCenterRight_SI for y > gasCenterRight_SI`, - * and exponent = 0.0 for gasCenterLeft_SI < y < gasCenterRight_SI - */ - (PMACC_C_VALUE(float_X, gasFactor, -1.0)) - (PMACC_C_VALUE(float_X, gasPower, 4.0)) + namespace densityProfiles + { + PMACC_STRUCT( + GaussianParameter, + /** Profile Formula: + * constexpr float_X exponent = abs((y - gasCenter_SI) / gasSigma_SI); + * constexpr float_X density = exp(gasFactor * pow(exponent, gasPower)); + * + * takes `gasCenterLeft_SI for y < gasCenterLeft_SI`, + * `gasCenterRight_SI for y > gasCenterRight_SI`, + * and exponent = 0.0 for gasCenterLeft_SI < y < gasCenterRight_SI + */ + (PMACC_C_VALUE(float_X, gasFactor, -1.0))(PMACC_C_VALUE(float_X, gasPower, 4.0)) - /** height of vacuum area on top border - * - * this vacuum is important because of the laser initialization, - * which is done in the first cells of the simulation and - * assumes a charge-free volume - * unit: cells - */ - (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) + /** height of vacuum area on top border + * + * this vacuum is important because of the laser initialization, + * which is done in the first cells of the simulation and + * assumes a charge-free volume + * unit: cells + */ + (PMACC_C_VALUE(uint32_t, vacuumCellsY, 50)) - /** The central position of the gas distribution - * unit: meter - */ - (PMACC_C_VALUE(float_64, gasCenterLeft_SI, 8.0e-5)) - (PMACC_C_VALUE(float_64, gasCenterRight_SI, 10.0e-5)) + /** The central position of the gas distribution + * unit: meter + */ + (PMACC_C_VALUE(float_64, gasCenterLeft_SI, 8.0e-5))(PMACC_C_VALUE(float_64, gasCenterRight_SI, 10.0e-5)) - /** the distance from gasCenter_SI until the gas density decreases to its 1/e-th part - * unit: meter - */ - (PMACC_C_VALUE(float_64, gasSigmaLeft_SI, 8.0e-5)) - (PMACC_C_VALUE(float_64, gasSigmaRight_SI, 8.0e-5)) - ); /* struct GaussianParam */ + /** the distance from gasCenter_SI until the gas density decreases to its 1/e-th part + * unit: meter + */ + (PMACC_C_VALUE(float_64, gasSigmaLeft_SI, 8.0e-5))( + PMACC_C_VALUE(float_64, gasSigmaRight_SI, 8.0e-5))); /* struct GaussianParam */ - /* definition of density with Gaussian profile */ - using Gaussian = GaussianImpl< GaussianParameter >; -} -} + /* definition of density with Gaussian profile */ + using Gaussian = GaussianImpl; + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/dimension.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/dimension.param index 9f14baaec3..9cda9d9a01 100644 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/dimension.param +++ b/share/picongpu/examples/LaserWakefield/include/picongpu/param/dimension.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Rene Widera +/* Copyright 2014-2021 Axel Huebl, Rene Widera * * This file is part of PIConGPU. * @@ -20,7 +20,7 @@ #pragma once #ifndef PARAM_DIMENSION -#define PARAM_DIMENSION DIM3 +# define PARAM_DIMENSION DIM3 #endif #define SIMDIM PARAM_DIMENSION diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/fieldSolver.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/fieldSolver.param deleted file mode 100644 index 5ae21b62e2..0000000000 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/fieldSolver.param +++ /dev/null @@ -1,82 +0,0 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -/** @file - * - * Configure the field solver. - * - * Select the numerical Maxwell solver (e.g. Yee's method). - * - * Also allows to configure ad hoc mitigations for high frequency - * noise in some setups via current smoothing. - */ - -#pragma once - -#include "picongpu/fields/MaxwellSolver/Solvers.def" -#include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" - - -namespace picongpu -{ -namespace fields -{ - - /** Current Interpolation - * - * CurrentInterpolation is used to set a method performing the - * interpolate/assign operation from the generated currents of particle - * species to the electro-magnetic fields. - * - * Allowed values are: - * - None: - * - default for staggered grids/Yee-scheme - * - updates E - * - Binomial: 2nd order Binomial filter - * - smooths the current before assignment in staggered grid - * - updates E & breaks local charge conservation slightly - * - NoneDS: - * - experimental assignment for all-centered/directional splitting - * - updates E & B at the same time - */ -#ifndef PARAM_CURRENTINTERPOLATION -# define PARAM_CURRENTINTERPOLATION None -#endif - using CurrentInterpolation = currentInterpolation::PARAM_CURRENTINTERPOLATION; - - /** FieldSolver - * - * Field Solver Selection: - * - Yee< CurrentInterpolation >: standard Yee solver - * - YeePML< CurrentInterpolation >: standard Yee solver with PML absorber - * - Lehe< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction - * - DirSplitting< CurrentInterpolation >: Sentoku's Directional Splitting Method - * - None< CurrentInterpolation >: disable the vacuum update of E and B - */ - -#ifndef PARAM_FIELDSOLVER - /* WARNING: if you change field solver by hand please update your CELL_WIDTH_SI - * in `grid.param` to fulfill the convergence condition (CFL) - */ -# define PARAM_FIELDSOLVER Yee -#endif - using Solver = maxwellSolver::PARAM_FIELDSOLVER< CurrentInterpolation >; - -} // namespace fields -} // namespace picongpu diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/grid.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/grid.param index a44d66b4e1..b5d81edf76 100644 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/grid.param +++ b/share/picongpu/examples/LaserWakefield/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -18,12 +18,10 @@ */ - #pragma once namespace picongpu { - namespace SI { /** Duration of one timestep @@ -52,21 +50,21 @@ namespace picongpu * behave like the interaction of infinite "wire particles" * in fields with perfect symmetry in Z. */ - } //namespace SI + } // namespace SI //! Defines the size of the absorbing zone (in cells) constexpr uint32_t ABSORBER_CELLS[3][2] = { - {32, 32}, /*x direction [negative,positive]*/ - {32, 32}, /*y direction [negative,positive]*/ - {32, 32} /*z direction [negative,positive]*/ - }; //unit: number of cells + {32, 32}, /*x direction [negative,positive]*/ + {32, 32}, /*y direction [negative,positive]*/ + {32, 32} /*z direction [negative,positive]*/ + }; // unit: number of cells //! Define the strength of the absorber for any direction constexpr float_X ABSORBER_STRENGTH[3][2] = { {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ - {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ - }; //unit: none + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; // unit: none /** When to move the co-moving window. * An initial pseudo particle, flying with the speed of light, @@ -85,7 +83,4 @@ namespace picongpu */ constexpr float_64 movePoint = 0.90; -} - - - +} // namespace picongpu diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/laser.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/laser.param index ce55cf26d6..264f694d29 100644 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/laser.param +++ b/share/picongpu/examples/LaserWakefield/include/picongpu/param/laser.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, Alexander Debus +/* Copyright 2013-2021 Axel Huebl, Anton Helm, Rene Widera, Richard Pausch, Alexander Debus * * This file is part of PIConGPU. * @@ -46,119 +46,123 @@ #include "picongpu/fields/laserProfiles/profiles.def" #ifndef PARAM_A0 -# define PARAM_A0 8.0 +# define PARAM_A0 8.0 #endif #ifndef PARAM_WAVE_LENGTH_SI -# define PARAM_WAVE_LENGTH_SI 0.8e-6 +# define PARAM_WAVE_LENGTH_SI 0.8e-6 #endif #ifndef PARAM_PULSE_LENGTH_SI -# define PARAM_PULSE_LENGTH_SI 5.e-15 +# define PARAM_PULSE_LENGTH_SI 5.e-15 #endif namespace picongpu { -namespace fields -{ -namespace laserProfiles -{ -namespace gaussianBeam -{ - //! Use only the 0th Laguerremode for a standard Gaussian - static constexpr uint32_t MODENUMBER = 0; - PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, 1.0); - // This is just an example for a more complicated set of Laguerre modes - //constexpr uint32_t MODENUMBER = 12; - //PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, -1.0, 0.0300519, 0.319461, -0.23783, 0.0954839, 0.0318653, -0.144547, 0.0249208, -0.111989, 0.0434385, -0.030038, -0.00896321, -0.0160788); - -} // namespace gaussianBeam - - struct GaussianBeamParam + namespace fields { - /** unit: meter */ - static constexpr float_64 WAVE_LENGTH_SI = PARAM_WAVE_LENGTH_SI; - - /** Convert the normalized laser strength parameter a0 to Volt per meter */ - static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; - - /** unit: W / m^2 */ - // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) - - /** unit: none */ - static constexpr float_64 _A0 = PARAM_A0; - - /** unit: Volt / meter */ - static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; - - /** unit: Volt / meter */ - //static constexpr float_64 AMPLITUDE_SI = 1.738e13; - - /** Pulse length: sigma of std. gauss for intensity (E^2) - * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] - * [ 2.354820045 ] - * Info: FWHM_of_Intensity = FWHM_Illumination - * = what a experimentalist calls "pulse duration" - * - * unit: seconds (1 sigma) */ - static constexpr float_64 PULSE_LENGTH_SI = PARAM_PULSE_LENGTH_SI; - - /** beam waist: distance from the axis where the pulse intensity (E^2) - * decreases to its 1/e^2-th part, - * at the focus position of the laser - * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } - * [ 1.17741 ] - * - * unit: meter */ - static constexpr float_64 W0_SI = 5.0e-6 / 1.17741; - /** the distance to the laser focus in y-direction - * unit: meter */ - static constexpr float_64 FOCUS_POS_SI = 4.62e-5; - - /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH - * - * unit: none */ - static constexpr float_64 PULSE_INIT = 15.0; - - /** cell from top where the laser is initialized - * - * if `initPlaneY == 0` than the absorber are disabled. - * if `initPlaneY > absorbercells negative Y` the negative absorber in y - * direction is enabled - * - * valid ranges: - * - initPlaneY == 0 - * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu - */ - static constexpr uint32_t initPlaneY = 0; - - /** laser phase shift (no shift: 0.0) - * - * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center - * - * unit: rad, periodic in 2*pi - */ - static constexpr float_X LASER_PHASE = 0.0; - - using LAGUERREMODES_t = gaussianBeam::LAGUERREMODES_t; - static constexpr uint32_t MODENUMBER = gaussianBeam::MODENUMBER; - - /** Available polarisation types - */ - enum PolarisationType + namespace laserProfiles { - LINEAR_X = 1u, - LINEAR_Z = 2u, - CIRCULAR = 4u, - }; - /** Polarization selection - */ - static constexpr PolarisationType Polarisation = CIRCULAR; - }; - - //! currently selected laser profile - using Selected = GaussianBeam< GaussianBeamParam >; - -} // namespace laserProfiles -} // namespace fields + namespace gaussianBeam + { + //! Use only the 0th Laguerremode for a standard Gaussian + static constexpr uint32_t MODENUMBER = 0; + PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, 1.0); + // This is just an example for a more complicated set of Laguerre modes + // constexpr uint32_t MODENUMBER = 12; + // PMACC_CONST_VECTOR(float_X, MODENUMBER + 1, LAGUERREMODES, -1.0, 0.0300519, 0.319461, -0.23783, + // 0.0954839, 0.0318653, -0.144547, 0.0249208, -0.111989, 0.0434385, -0.030038, -0.00896321, + // -0.0160788); + + } // namespace gaussianBeam + + struct GaussianBeamParam + { + /** unit: meter */ + static constexpr float_64 WAVE_LENGTH_SI = PARAM_WAVE_LENGTH_SI; + + /** Convert the normalized laser strength parameter a0 to Volt per meter */ + static constexpr float_64 UNITCONV_A0_to_Amplitude_SI = -2.0 * PI / WAVE_LENGTH_SI + * ::picongpu::SI::ELECTRON_MASS_SI * ::picongpu::SI::SPEED_OF_LIGHT_SI + * ::picongpu::SI::SPEED_OF_LIGHT_SI / ::picongpu::SI::ELECTRON_CHARGE_SI; + + /** unit: W / m^2 */ + // calculate: _A0 = 8.549297e-6 * sqrt( Intensity[W/m^2] ) * wavelength[m] (linearly polarized) + + /** unit: none */ + static constexpr float_64 _A0 = PARAM_A0; + + /** unit: Volt / meter */ + static constexpr float_64 AMPLITUDE_SI = _A0 * UNITCONV_A0_to_Amplitude_SI; + + /** unit: Volt / meter */ + // static constexpr float_64 AMPLITUDE_SI = 1.738e13; + + /** Pulse length: sigma of std. gauss for intensity (E^2) + * PULSE_LENGTH_SI = FWHM_of_Intensity / [ 2*sqrt{ 2* ln(2) } ] + * [ 2.354820045 ] + * Info: FWHM_of_Intensity = FWHM_Illumination + * = what a experimentalist calls "pulse duration" + * + * unit: seconds (1 sigma) */ + static constexpr float_64 PULSE_LENGTH_SI = PARAM_PULSE_LENGTH_SI; + + /** beam waist: distance from the axis where the pulse intensity (E^2) + * decreases to its 1/e^2-th part, + * at the focus position of the laser + * W0_SI = FWHM_of_Intensity / sqrt{ 2* ln(2) } + * [ 1.17741 ] + * + * unit: meter */ + static constexpr float_64 W0_SI = 5.0e-6 / 1.17741; + /** the distance to the laser focus in y-direction + * unit: meter */ + static constexpr float_64 FOCUS_POS_SI = 4.62e-5; + + /** The laser pulse will be initialized PULSE_INIT times of the PULSE_LENGTH + * + * unit: none */ + static constexpr float_64 PULSE_INIT = 15.0; + + /** cell from top where the laser is initialized + * + * if `initPlaneY == 0` than the absorber are disabled. + * if `initPlaneY > absorbercells negative Y` the negative absorber in y + * direction is enabled + * + * valid ranges: + * - initPlaneY == 0 + * - absorber cells negative Y < initPlaneY < cells in y direction of the top gpu + */ + static constexpr uint32_t initPlaneY = 0; + + /** laser phase shift (no shift: 0.0) + * + * sin(omega*time + laser_phase): starts with phase=0 at center --> E-field=0 at center + * + * unit: rad, periodic in 2*pi + */ + static constexpr float_X LASER_PHASE = 0.0; + + using LAGUERREMODES_t = gaussianBeam::LAGUERREMODES_t; + static constexpr uint32_t MODENUMBER = gaussianBeam::MODENUMBER; + + /** Available polarisation types + */ + enum PolarisationType + { + LINEAR_X = 1u, + LINEAR_Z = 2u, + CIRCULAR = 4u, + }; + /** Polarization selection + */ + static constexpr PolarisationType Polarisation = CIRCULAR; + }; + + //! currently selected laser profile + using Selected = GaussianBeam; + + } // namespace laserProfiles + } // namespace fields } // namespace picongpu diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/particle.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/particle.param index 5048da8db7..2c92549011 100644 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/particle.param +++ b/share/picongpu/examples/LaserWakefield/include/picongpu/param/particle.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Marco Garten, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Marco Garten, Benjamin Worpitz, * Richard Pausch * * This file is part of PIConGPU. @@ -28,53 +28,49 @@ namespace picongpu { -namespace particles -{ - - /** a particle with a weighting below MIN_WEIGHTING will not - * be created / will be deleted - * unit: none - */ - constexpr float_X MIN_WEIGHTING = 10.0; - -namespace startPosition -{ - - struct RandomParameter2ppc + namespace particles { - /** Count of particles per cell at initial state + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted * unit: none */ - static constexpr uint32_t numParticlesPerCell = 2u; - }; - using Random2ppc = RandomImpl< RandomParameter2ppc >; + constexpr float_X MIN_WEIGHTING = 10.0; -} // namespace startPosition + namespace startPosition + { + struct RandomParameter2ppc + { + /** Count of particles per cell at initial state + * unit: none + */ + static constexpr uint32_t numParticlesPerCell = 2u; + }; + using Random2ppc = RandomImpl; - /** During unit normalization, we assume this is a typical - * number of particles per cell for normalization of weighted - * particle attributes. - */ - constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = - startPosition::RandomParameter2ppc::numParticlesPerCell; + } // namespace startPosition -namespace manipulators -{ + /** During unit normalization, we assume this is a typical + * number of particles per cell for normalization of weighted + * particle attributes. + */ + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = startPosition::RandomParameter2ppc::numParticlesPerCell; - struct SetIonToNeutral - { - template< typename T_Particle > - DINLINE void operator()( T_Particle & particle ) + namespace manipulators { - using Particle = T_Particle; + struct SetIonToNeutral + { + template + DINLINE void operator()(T_Particle& particle) + { + using Particle = T_Particle; - // number of bound electrons at initialization state of the neutral atom - float_X const protonNumber = traits::GetAtomicNumbers< T_Particle >::type::numberOfProtons; + // number of bound electrons at initialization state of the neutral atom + float_X const protonNumber = traits::GetAtomicNumbers::type::numberOfProtons; - particle[ boundElectrons_ ] = protonNumber; - } - }; - using SetBoundElectrons = generic::Free< SetIonToNeutral >; -} // namespace manipulators -} // namespace particles + particle[boundElectrons_] = protonNumber; + } + }; + using SetBoundElectrons = generic::Free; + } // namespace manipulators + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/png.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/png.param index 445b8bd10f..65620389a6 100644 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/png.param +++ b/share/picongpu/examples/LaserWakefield/include/picongpu/param/png.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -24,17 +24,17 @@ namespace picongpu { -/*scale image before write to file, only scale if value is not 1.0 - */ -constexpr float_64 scale_image = 1.0; + /*scale image before write to file, only scale if value is not 1.0 + */ + constexpr float_64 scale_image = 1.0; -/*if true image is scaled if cellsize is not quadratic, else no scale*/ -constexpr bool scale_to_cellsize = true; + /*if true image is scaled if cellsize is not quadratic, else no scale*/ + constexpr bool scale_to_cellsize = true; -constexpr bool white_box_per_GPU = false; + constexpr bool white_box_per_GPU = false; -namespace visPreview -{ + namespace visPreview + { // normalize EM fields to typical laser or plasma quantities //-1: Auto: enable adaptive scaling for each output // 1: Laser: typical fields calculated out of the laser amplitude @@ -49,33 +49,32 @@ namespace visPreview #define EM_FIELD_SCALE_CHANNEL2 -1 #define EM_FIELD_SCALE_CHANNEL3 -1 -// multiply highest undisturbed particle density with factor -constexpr float_X preParticleDens_opacity = 0.25; -constexpr float_X preChannel1_opacity = 1.0; -constexpr float_X preChannel2_opacity = 1.0; -constexpr float_X preChannel3_opacity = 1.0; - -// specify color scales for each channel -namespace preParticleDensCol = colorScales::grayInv; -namespace preChannel1Col = colorScales::green; -namespace preChannel2Col = colorScales::none; -namespace preChannel3Col = colorScales::none; + // multiply highest undisturbed particle density with factor + constexpr float_X preParticleDens_opacity = 0.25; + constexpr float_X preChannel1_opacity = 1.0; + constexpr float_X preChannel2_opacity = 1.0; + constexpr float_X preChannel3_opacity = 1.0; -/* png preview settings for each channel */ -DINLINE float_X preChannel1(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) -{ - return field_E.x() * field_E.x(); -} + // specify color scales for each channel + namespace preParticleDensCol = colorScales::grayInv; + namespace preChannel1Col = colorScales::green; + namespace preChannel2Col = colorScales::none; + namespace preChannel3Col = colorScales::none; -DINLINE float_X preChannel2(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) -{ - return field_E.y(); -} + /* png preview settings for each channel */ + DINLINE float_X preChannel1(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) + { + return field_E.x() * field_E.x(); + } -DINLINE float_X preChannel3(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) -{ - return -1.0_X * field_E.y(); -} -} -} + DINLINE float_X preChannel2(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) + { + return field_E.y(); + } + DINLINE float_X preChannel3(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) + { + return -1.0_X * field_E.y(); + } + } // namespace visPreview +} // namespace picongpu diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/precision.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/precision.param deleted file mode 100644 index 2ab132f083..0000000000 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/precision.param +++ /dev/null @@ -1,60 +0,0 @@ -/* Copyright 2013-2020 Rene Widera - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -/** @file - * - * Define the precision of typically used floating point types in the - * simulation. - * - * PIConGPU normalizes input automatically, allowing to use single-precision by - * default for the core algorithms. Note that implementations of various - * algorithms (usually plugins or non-core components) might still decide to - * hard-code a different (mixed) precision for some critical operations. - */ - -#pragma once - - -namespace picongpu -{ - -/*! Select a precision for the simulation data - * - precision32Bit : use 32Bit floating point numbers - * [significant digits 7 to 8] - * - precision64Bit : use 64Bit floating point numbers - * [significant digits 15 to 16] - */ -#ifndef PARAM_PRECISION -# define PARAM_PRECISION precision32Bit -#endif -namespace precisionPIConGPU = PARAM_PRECISION; - -/*! Select a precision special operations (can be different from simulation precision) - * - precisionPIConGPU : use precision which is selected on top (precisionPIConGPU) - * - precision32Bit : use 32Bit floating point numbers - * - precision64Bit : use 64Bit floating point numbers - */ -namespace precisionSqrt = precisionPIConGPU; -namespace precisionExp = precisionPIConGPU; -namespace precisionTrigonometric = precisionPIConGPU; - - -} // namespace picongpu - -#include "picongpu/unitless/precision.unitless" diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/species.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/species.param deleted file mode 100644 index fa31b0192a..0000000000 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/species.param +++ /dev/null @@ -1,87 +0,0 @@ -/* Copyright 2014-2020 Rene Widera, Richard Pausch - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/particles/shapes.hpp" -#include "picongpu/algorithms/FieldToParticleInterpolationNative.hpp" -#include "picongpu/algorithms/FieldToParticleInterpolation.hpp" -#include "picongpu/algorithms/AssignedTrilinearInterpolation.hpp" - -#include "picongpu/particles/flylite/NonLTE.def" -#include "picongpu/fields/currentDeposition/Solver.def" - - -namespace picongpu -{ -/*---------------------------- generic solver---------------------------------*/ - -/*! Particle Shape definitions ------------------------------------------------- - * - particles::shapes::CIC : 1st order - * - particles::shapes::TSC : 2nd order - * - particles::shapes::PCS : 3rd order - * - particles::shapes::P4S : 4th order - * - * example: using UsedParticleShape = particles::shapes::CIC; - */ -#ifndef PARAM_PARTICLESHAPE -#define PARAM_PARTICLESHAPE TSC -#endif -using UsedParticleShape = particles::shapes::PARAM_PARTICLESHAPE; - -/* define which interpolation method is used to interpolate fields to particle*/ -using UsedField2Particle = FieldToParticleInterpolation< UsedParticleShape, AssignedTrilinearInterpolation >; - -/*! select current solver method ----------------------------------------------- - * - currentSolver::Esirkepov : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - currentSolver::VillaBune<> : particle shapes - CIC (1st order) only - * - currentSolver::EmZ : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - * For development purposes: --------------------------------------------------- - * - currentSolver::EsirkepovNative : generic version of currentSolverEsirkepov - * without optimization (~4x slower and needs more shared memory) - */ -#ifndef PARAM_CURRENTSOLVER -#define PARAM_CURRENTSOLVER Esirkepov -#endif -using UsedParticleCurrentSolver = currentSolver::PARAM_CURRENTSOLVER< UsedParticleShape >; - -/*! particle pusher configuration ---------------------------------------------- - * - * Defining a pusher is optional for particles - * - * - particles::pusher::Vay : better suited relativistic boris pusher - * - particles::pusher::Boris : standard boris pusher - * - particles::pusher::ReducedLandauLifshitz : 4th order RungeKutta pusher - * with classical radiation reaction - * - * For diagnostics & modeling: ------------------------------------------------ - * - particles::pusher::Free : free propagation, ignore fields - * (= free stream model) - * - particles::pusher::Photon : propagate with c in direction of normalized mom. - * - particles::pusher::Probe : Probe particles that interpolate E & B - * For development purposes: -------------------------------------------------- - * - particles::pusher::Axel : a pusher developed at HZDR during 2011 (testing) - */ -#ifndef PARAM_PARTICLEPUSHER -#define PARAM_PARTICLEPUSHER Boris -#endif -using UsedParticlePusher = particles::pusher::PARAM_PARTICLEPUSHER; - -}//namespace picongpu diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/speciesDefinition.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/speciesDefinition.param index 5a1a26407d..3966dfb21e 100644 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/speciesDefinition.param +++ b/share/picongpu/examples/LaserWakefield/include/picongpu/param/speciesDefinition.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Marco Garten, Richard Pausch, +/* Copyright 2013-2021 Rene Widera, Marco Garten, Richard Pausch, * Benjamin Worpitz, Axel Huebl * * This file is part of PIConGPU. @@ -32,96 +32,80 @@ namespace picongpu { + /*########################### define particle attributes #####################*/ -/*########################### define particle attributes #####################*/ + /** describe attributes of a particle*/ + using DefaultParticleAttributes = MakeSeq_t, momentum, weighting>; -/** describe attributes of a particle*/ -using DefaultParticleAttributes = MakeSeq_t< - position, - momentum, - weighting ->; - -/* attribute sequence for species: ions */ -using AttributeSeqIons = MakeSeq_t< - DefaultParticleAttributes -#if( PARAM_IONIZATION == 1 ) - , boundElectrons + /* attribute sequence for species: ions */ + using AttributeSeqIons = MakeSeq_t< + DefaultParticleAttributes +#if(PARAM_IONIZATION == 1) + , + boundElectrons #endif ->; - -/*########################### end particle attributes ########################*/ - -/*########################### define species #################################*/ - - -/*--------------------------- electrons --------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioElectrons, 1.0 ); -value_identifier( float_X, ChargeRatioElectrons, 1.0 ); - -using ParticleFlagsElectrons = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioElectrons >, - chargeRatio< ChargeRatioElectrons > ->; - -/* define species: electrons */ -using PIC_Electrons = Particles< - PMACC_CSTRING( "e" ), - ParticleFlagsElectrons, - DefaultParticleAttributes ->; - -/*--------------------------- ions -------------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioIons, 1836.152672 ); -value_identifier( float_X, ChargeRatioIons, -1.0 ); - -using ParticleFlagsIons = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - current< UsedParticleCurrentSolver >, - massRatio< MassRatioIons >, - chargeRatio< ChargeRatioIons >, -#if( PARAM_IONIZATION == 1 ) - ionizers< - MakeSeq_t< - particles::ionization::BSIEffectiveZ< PIC_Electrons >, - particles::ionization::ADKCircPol< PIC_Electrons > - > - >, - ionizationEnergies< ionization::energies::AU::Hydrogen_t >, - effectiveNuclearCharge< ionization::effectiveNuclearCharge::Hydrogen_t >, + >; + + /*########################### end particle attributes ########################*/ + + /*########################### define species #################################*/ + + + /*--------------------------- electrons --------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + + using ParticleFlagsElectrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio>; + + /* define species: electrons */ + using PIC_Electrons = Particles; + + /*--------------------------- ions -------------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioIons, 1836.152672); + value_identifier(float_X, ChargeRatioIons, -1.0); + + using ParticleFlagsIons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio, +#if(PARAM_IONIZATION == 1) + ionizers, + particles::ionization::ADKCircPol>>, + ionizationEnergies, + effectiveNuclearCharge, #endif - atomicNumbers< ionization::atomicNumbers::Hydrogen_t > ->; + atomicNumbers>; -/* define species: ions */ -using PIC_Ions = Particles< - PMACC_CSTRING( "i" ), - ParticleFlagsIons, - AttributeSeqIons ->; + /* define species: ions */ + using PIC_Ions = Particles; /*########################### end species ####################################*/ /*enable (1) or disable (0) ions*/ #ifndef PARAM_IONS -# define PARAM_IONS 0 +# define PARAM_IONS 0 #endif -using VectorAllSpecies = MakeSeq_t< - PIC_Electrons -#if( PARAM_IONS == 1) - ,PIC_Ions + using VectorAllSpecies = MakeSeq_t< + PIC_Electrons +#if(PARAM_IONS == 1) + , + PIC_Ions #endif ->; + >; -} //namespace picongpu +} // namespace picongpu diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/speciesInitialization.param index 59f9c9cead..705193e900 100644 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/speciesInitialization.param +++ b/share/picongpu/examples/LaserWakefield/include/picongpu/param/speciesInitialization.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,39 +33,25 @@ namespace picongpu { -namespace particles -{ - /** InitPipeline define in which order species are initialized - * - * the functors are called in order (from first to last functor) - */ - using InitPipeline = bmpl::vector< -#if( PARAM_IONIZATION == 0 ) - CreateDensity< - densityProfiles::Gaussian, - startPosition::Random2ppc, - PIC_Electrons - > -# if( PARAM_IONS == 1 ) - , - Derive< - PIC_Electrons, - PIC_Ions - > -# endif + namespace particles + { + /** InitPipeline define in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< +#if(PARAM_IONIZATION == 0) + CreateDensity +# if(PARAM_IONS == 1) + , + Derive +# endif #else - CreateDensity< - densityProfiles::Gaussian, - startPosition::Random2ppc, - PIC_Ions - >, - Manipulate< - manipulators::SetBoundElectrons, - PIC_Ions - > + CreateDensity, + Manipulate #endif - >; + >; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/LaserWakefield/include/picongpu/param/starter.param b/share/picongpu/examples/LaserWakefield/include/picongpu/param/starter.param index 5e6c700755..a7ca54ee55 100644 --- a/share/picongpu/examples/LaserWakefield/include/picongpu/param/starter.param +++ b/share/picongpu/examples/LaserWakefield/include/picongpu/param/starter.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once @@ -26,9 +25,5 @@ namespace picongpu { namespace defaultPIConGPU { - } -} - - - +} // namespace picongpu diff --git a/share/picongpu/examples/LaserWakefield/lib/python/picongpu/params.py b/share/picongpu/examples/LaserWakefield/lib/python/picongpu/params.py index ab7360a3b6..5aa6d890da 100644 --- a/share/picongpu/examples/LaserWakefield/lib/python/picongpu/params.py +++ b/share/picongpu/examples/LaserWakefield/lib/python/picongpu/params.py @@ -1,7 +1,7 @@ """ This file is part of PIConGPU. -Copyright 2017-2020 PIConGPU contributors +Copyright 2017-2021 PIConGPU contributors Authors: Sebastian Starke, Jeffrey Kelling License: GPLv3+ diff --git a/share/picongpu/examples/SingleParticleTest/cmakeFlags b/share/picongpu/examples/SingleParticleTest/cmakeFlags index c9a87d031d..935db2cb84 100755 --- a/share/picongpu/examples/SingleParticleTest/cmakeFlags +++ b/share/picongpu/examples/SingleParticleTest/cmakeFlags @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch +# Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/SingleParticleTest/etc/picongpu/1.cfg b/share/picongpu/examples/SingleParticleTest/etc/picongpu/1.cfg index d32aba5e9f..ebb7806628 100644 --- a/share/picongpu/examples/SingleParticleTest/etc/picongpu/1.cfg +++ b/share/picongpu/examples/SingleParticleTest/etc/picongpu/1.cfg @@ -1,4 +1,5 @@ -# Copyright 2013-2020 Heiko Burau, Rene Widera, Felix Schmitt, Axel Huebl +# Copyright 2013-2021 Heiko Burau, Rene Widera, Felix Schmitt, Axel Huebl, +# Franz Poeschel # # This file is part of PIConGPU. # @@ -49,7 +50,8 @@ TBG_periodic="--periodic 1 1 1" # write position to stdout (messy): # --e_position.period 1 -TBG_plugins="--hdf5.period 1 --hdf5.file simData \ +TBG_openPMD="openPMD.period 1 --openPMD.file simData --openPMD.ext bp" +TBG_plugins="!TBG_openPMD \ --e_macroParticlesCount.period 100" @@ -59,7 +61,7 @@ TBG_plugins="--hdf5.period 1 --hdf5.file simData \ TBG_deviceDist="!TBG_devices_x !TBG_devices_y !TBG_devices_z" -TBG_programParams="-d !TBG_deviceDist \ +TBG_programParams="!TBG_deviceDist \ -g !TBG_gridSize \ -s !TBG_steps \ !TBG_periodic \ diff --git a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/density.param b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/density.param index 80733e2766..d957544767 100644 --- a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/density.param +++ b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/density.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -27,65 +27,58 @@ namespace picongpu { -namespace SI -{ - /** Base density in particles per m^3 in the density profiles. - * - * This is often taken as reference maximum density in normalized profiles. - * Individual particle species can define a `densityRatio` flag relative - * to this value. - * - * unit: ELEMENTS/m^3 - * - * One particle per cell with weighting 1.0: - */ - constexpr float_64 BASE_DENSITY_SI = - 1.0 / - ( CELL_WIDTH_SI * CELL_HEIGHT_SI * CELL_DEPTH_SI ); - -} - -namespace densityProfiles -{ - - struct FreeFormulaFunctor + namespace SI { - - /** - * This formula uses SI quantities only - * The profile will be multiplied by BASE_DENSITY_SI. + /** Base density in particles per m^3 in the density profiles. + * + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. * - * @param position_SI total offset including all slides [in meter] - * @param cellSize_SI cell sizes [in meter] + * unit: ELEMENTS/m^3 * - * @return float_X density [normalized to 1.0] + * One particle per cell with weighting 1.0: */ - HDINLINE float_X operator()( - const floatD_64& position_SI, - const float3_64& cellSize_SI - ) + constexpr float_64 BASE_DENSITY_SI = 1.0 / (CELL_WIDTH_SI * CELL_HEIGHT_SI * CELL_DEPTH_SI); + + } // namespace SI + + namespace densityProfiles + { + struct FreeFormulaFunctor { - const pmacc::math::UInt64< simDim > cell_id( position_SI / cellSize_SI.shrink< simDim >() ); + /** + * This formula uses SI quantities only + * The profile will be multiplied by BASE_DENSITY_SI. + * + * @param position_SI total offset including all slides [in meter] + * @param cellSize_SI cell sizes [in meter] + * + * @return float_X density [normalized to 1.0] + */ + HDINLINE float_X operator()(const floatD_64& position_SI, const float3_64& cellSize_SI) + { + const pmacc::math::UInt64 cell_id(position_SI / cellSize_SI.shrink()); - // add particle in cell in at [ 32 5 16 ] - // X=32: middle of X plane (gyro-motion in X-Y) - // Y=5: do not start fully at border, e.g., if someone wants to increase E, and so mass over time - // Z=16: middle of box in Z, move slowly in positive Z as E-field drift - const pmacc::math::UInt64< DIM3 > cell_start( 32u, 5u, 16u ); + // add particle in cell in at [ 32 5 16 ] + // X=32: middle of X plane (gyro-motion in X-Y) + // Y=5: do not start fully at border, e.g., if someone wants to increase E, and so mass over time + // Z=16: middle of box in Z, move slowly in positive Z as E-field drift + const pmacc::math::UInt64 cell_start(32u, 5u, 16u); - bool isStartCell = true; - for( uint64_t d = 0; d < simDim; ++d ) - if( cell_id[d] != cell_start[d] ) - isStartCell = false; + bool isStartCell = true; + for(uint64_t d = 0; d < simDim; ++d) + if(cell_id[d] != cell_start[d]) + isStartCell = false; - if( isStartCell ) - return 1.0; + if(isStartCell) + return 1.0; - return 0.0; - } - }; + return 0.0; + } + }; - /* definition of free formula profile */ - using FreeFormula = FreeFormulaImpl< FreeFormulaFunctor >; -} -} + /* definition of free formula profile */ + using FreeFormula = FreeFormulaImpl; + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/dimension.param b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/dimension.param index 0881e9884b..efb7c42757 100644 --- a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/dimension.param +++ b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/dimension.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Rene Widera, Richard Pausch +/* Copyright 2014-2021 Axel Huebl, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -20,7 +20,7 @@ #pragma once #ifndef PARAM_DIMENSION -#define PARAM_DIMENSION DIM3 +# define PARAM_DIMENSION DIM3 #endif #define SIMDIM PARAM_DIMENSION diff --git a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/fieldBackground.param b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/fieldBackground.param index 984b495661..61acac9eb4 100644 --- a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/fieldBackground.param +++ b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/fieldBackground.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Axel Huebl, Alexander Debus +/* Copyright 2014-2021 Axel Huebl, Alexander Debus * * This file is part of PIConGPU. * @@ -31,31 +31,23 @@ namespace picongpu static constexpr bool InfluenceParticlePusher = true; /* We use this to calculate your SI input back to our unit system */ - PMACC_ALIGN( - m_unitField, - const float3_64 - ); + PMACC_ALIGN(m_unitField, const float3_64); - HDINLINE FieldBackgroundE( const float3_64 unitField ) : - m_unitField( unitField ) - {} + HDINLINE FieldBackgroundE(const float3_64 unitField) : m_unitField(unitField) + { + } /** Specify your background field E(r,t) here * * \param cellIdx The total cell id counted from the start at t = 0 * \param currentStep The current time step */ - HDINLINE float3_X - operator()( + HDINLINE float3_X operator()( const DataSpace& /*cellIdx*/, const uint32_t /*currentStep*/ ) const { /* specify your E-Field in V/m and convert to PIConGPU units */ - return float3_X( - 0.0, - 0.0, - -10.0e6 / m_unitField[1] - ); + return float3_X(0.0, 0.0, -10.0e6 / m_unitField[1]); } }; @@ -66,31 +58,23 @@ namespace picongpu static constexpr bool InfluenceParticlePusher = true; /* We use this to calculate your SI input back to our unit system */ - PMACC_ALIGN( - m_unitField, - const float3_64 - ); + PMACC_ALIGN(m_unitField, const float3_64); - HDINLINE FieldBackgroundB( const float3_64 unitField ) : - m_unitField( unitField ) - {} + HDINLINE FieldBackgroundB(const float3_64 unitField) : m_unitField(unitField) + { + } /** Specify your background field B(r,t) here * * \param cellIdx The total cell id counted from the start at t=0 * \param currentStep The current time step */ - HDINLINE float3_X - operator()( + HDINLINE float3_X operator()( const DataSpace& /*cellIdx*/, const uint32_t /*currentStep*/ ) const { /* specify your B-Field in T and convert to PIConGPU units */ - return float3_X( - 0.0, - 0.0, - 50.0 / m_unitField[1] - ); + return float3_X(0.0, 0.0, 50.0 / m_unitField[1]); } }; @@ -101,31 +85,23 @@ namespace picongpu static constexpr bool activated = false; /* We use this to calculate your SI input back to our unit system */ - PMACC_ALIGN( - m_unitField, - const float3_64 - ); + PMACC_ALIGN(m_unitField, const float3_64); - HDINLINE FieldBackgroundJ( const float3_64 unitField ) : - m_unitField(unitField) - {} + HDINLINE FieldBackgroundJ(const float3_64 unitField) : m_unitField(unitField) + { + } /** Specify your background field J(r,t) here * * \param cellIdx The total cell id counted from the start at t=0 * \param currentStep The current time step */ - HDINLINE float3_X - operator()( + HDINLINE float3_X operator()( const DataSpace& /*cellIdx*/, const uint32_t /*currentStep*/ ) const { /* specify your J-Field in A/m^2 and convert to PIConGPU units */ - return float3_X( - 0.0, - 0.0, - 0.0 - ); + return float3_X(0.0, 0.0, 0.0); } }; diff --git a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/fileOutput.param b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/fileOutput.param index 1763bc18e6..64da1739de 100644 --- a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/fileOutput.param +++ b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/fileOutput.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, * Benjamin Worpitz, Richard Pausch * * This file is part of PIConGPU. @@ -63,32 +63,23 @@ namespace picongpu namespace deriveField = particles::particleToGrid; /* ChargeDensity section */ - using ChargeDensity_Seq = deriveField::CreateEligible_t< - VectorAllSpecies, - deriveField::derivedAttributes::ChargeDensity - >; + using ChargeDensity_Seq + = deriveField::CreateEligible_t; /** FieldTmpSolvers groups all solvers that create data for FieldTmp ****** * * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size */ - using FieldTmpSolvers = MakeSeq_t< - ChargeDensity_Seq - >; + using FieldTmpSolvers = MakeSeq_t; /** FileOutputFields: Groups all Fields that shall be dumped *************/ /** Possible native fields: FieldE, FieldB, FieldJ */ - using NativeFileOutputFields = MakeSeq_t< - FieldJ - >; + using NativeFileOutputFields = MakeSeq_t; - using FileOutputFields = MakeSeq_t< - NativeFileOutputFields, - FieldTmpSolvers - >; + using FileOutputFields = MakeSeq_t; /** FileOutputParticles: Groups all Species that shall be dumped ********** @@ -98,4 +89,4 @@ namespace picongpu */ using FileOutputParticles = VectorAllSpecies; -} +} // namespace picongpu diff --git a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/grid.param b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/grid.param index 8b66ec083d..162db76a96 100644 --- a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/grid.param +++ b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -18,12 +18,10 @@ */ - #pragma once namespace picongpu { - namespace SI { /** Period of a gyro-motion in s for an electron with beta=0.5 in B=50T @@ -57,21 +55,21 @@ namespace picongpu * in fields with perfect symmetry in Z. */ - } //namespace SI + } // namespace SI //! Defines the size of the absorbing zone (in cells) constexpr uint32_t ABSORBER_CELLS[3][2] = { - {32, 32}, /*x direction [negative,positive]*/ - {32, 32}, /*y direction [negative,positive]*/ - {32, 32} /*z direction [negative,positive]*/ - }; //unit: number of cells + {32, 32}, /*x direction [negative,positive]*/ + {32, 32}, /*y direction [negative,positive]*/ + {32, 32} /*z direction [negative,positive]*/ + }; // unit: number of cells //! Define the strength of the absorber for any direction constexpr float_X ABSORBER_STRENGTH[3][2] = { {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ - {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ - }; //unit: none + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; // unit: none /** When to move the co-moving window. * An initial pseudo particle, flying with the speed of light, @@ -90,7 +88,4 @@ namespace picongpu */ constexpr float_64 movePoint = 0.90; -} - - - +} // namespace picongpu diff --git a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/particle.param b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/particle.param index 7cc28fbf6d..bfa944c9f3 100644 --- a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/particle.param +++ b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/particle.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, * Richard Pausch * * This file is part of PIConGPU. @@ -31,73 +31,65 @@ namespace picongpu { - -namespace particles -{ - - /** a particle with a weighting below MIN_WEIGHTING will not - * be created / will be deleted - * note: this specific setting allows all kinds of weightings > 0.0 - * unit: none - */ - constexpr float_X MIN_WEIGHTING = std::numeric_limits< float_X >::min(); - -namespace manipulators -{ - - // Parameters for a particle drift in X - CONST_VECTOR( - float_X, - 3, - DriftParam_direction, - // unit vector for direction of drift: x, y, z - 1.0, - 0.0, - 0.0 - ); - struct DriftParam + namespace particles { - static constexpr float_64 gamma = 1.1547; // beta: 0.5 - const DriftParam_direction_t direction; - }; - using AssignYDrift = unary::Drift< - DriftParam, - nvidia::functors::Assign - >; - -} // namespace manipulators - - -namespace startPosition -{ - // sit directly in lower corner of the cell - CONST_VECTOR( - float_X, - 3, - InCellOffset, - // each x, y, z in-cell position component in range [0.0, 1.0) - 0.0, - 0.0, - 0.0 - ); - struct OnePositionParameter - { - /** Count of particles per cell at initial state + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted + * note: this specific setting allows all kinds of weightings > 0.0 * unit: none */ - static constexpr uint32_t numParticlesPerCell = 1u; - - const InCellOffset_t inCellOffset; - }; - using OnePosition = OnePositionImpl< OnePositionParameter >; - -} // namespace startPosition - - /** During unit normalization, we assume this is a typical - * number of particles per cell for normalization of weighted - * particle attributes. - */ - constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = 1u; + constexpr float_X MIN_WEIGHTING = std::numeric_limits::min(); + + namespace manipulators + { + // Parameters for a particle drift in X + CONST_VECTOR( + float_X, + 3, + DriftParam_direction, + // unit vector for direction of drift: x, y, z + 1.0, + 0.0, + 0.0); + struct DriftParam + { + static constexpr float_64 gamma = 1.1547; // beta: 0.5 + const DriftParam_direction_t direction; + }; + using AssignYDrift = unary::Drift; + + } // namespace manipulators + + + namespace startPosition + { + // sit directly in lower corner of the cell + CONST_VECTOR( + float_X, + 3, + InCellOffset, + // each x, y, z in-cell position component in range [0.0, 1.0) + 0.0, + 0.0, + 0.0); + struct OnePositionParameter + { + /** Count of particles per cell at initial state + * unit: none + */ + static constexpr uint32_t numParticlesPerCell = 1u; + + const InCellOffset_t inCellOffset; + }; + using OnePosition = OnePositionImpl; + + } // namespace startPosition + + /** During unit normalization, we assume this is a typical + * number of particles per cell for normalization of weighted + * particle attributes. + */ + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = 1u; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/species.param b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/species.param index 48de646b1a..81ef7e445a 100644 --- a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/species.param +++ b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/species.param @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Rene Widera, Richard Pausch +/* Copyright 2014-2021 Rene Widera, Richard Pausch, Annegret Roeszler, Klaus Steiniger * * This file is part of PIConGPU. * @@ -17,55 +17,83 @@ * If not, see . */ +/** @file + * + * Particle shape, field to particle interpolation, current solver, and particle pusher + * can be declared here for usage in `speciesDefinition.param`. + * + * @see + * **MODELS / Hierarchy of Charge Assignment Schemes** + * in the online documentation for information on particle shapes. + * + * + * \attention + * The higher order shape names are redefined with release 0.6.0 in order to provide a consistent naming: + * * PQS is the name of the 3rd order assignment function (instead of PCS) + * * PCS is the name of the 4th order assignment function (instead of P4S) + * * P4S does not exist anymore + */ + #pragma once #include "picongpu/particles/shapes.hpp" #include "picongpu/algorithms/FieldToParticleInterpolationNative.hpp" #include "picongpu/algorithms/FieldToParticleInterpolation.hpp" #include "picongpu/algorithms/AssignedTrilinearInterpolation.hpp" - #include "picongpu/particles/flylite/NonLTE.def" #include "picongpu/fields/currentDeposition/Solver.def" namespace picongpu { -/*---------------------------- generic solver---------------------------------*/ - -/*! Particle Shape definitions ------------------------------------------------- - * - particles::shapes::CIC : 1st order - * - particles::shapes::TSC : 2nd order - * - particles::shapes::PCS : 3rd order - * - particles::shapes::P4S : 4th order - * - * example: using UsedParticleShape = particles::shapes::CIC; - */ -using UsedParticleShape = particles::shapes::CIC; + /** select macroparticle shape + * + * **WARNING** the shape names are redefined and diverge from PIConGPU versions before 0.6.0. + * + * - particles::shapes::CIC : Assignment function is a piecewise linear spline + * - particles::shapes::TSC : Assignment function is a piecewise quadratic spline + * - particles::shapes::PQS : Assignment function is a piecewise cubic spline + * - particles::shapes::PCS : Assignment function is a piecewise quartic spline + */ + using UsedParticleShape = particles::shapes::CIC; -/* define which interpolation method is used to interpolate fields to particle*/ -using UsedField2Particle = FieldToParticleInterpolation; + /** select interpolation method to be used for interpolation of grid-based field values to particle positions + */ + using UsedField2Particle = FieldToParticleInterpolation; -/*! select current solver method ----------------------------------------------- - * - currentSolver::Esirkepov : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - currentSolver::VillaBune<> : particle shapes - CIC (1st order) only - * - currentSolver::EmZ : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - * For development purposes: --------------------------------------------------- - * - currentSolver::EsirkepovNative : generic version of currentSolverEsirkepov - * without optimization (~4x slower and needs more shared memory) - */ -using UsedParticleCurrentSolver = currentSolver::Esirkepov; + /*! select current solver method + * - currentSolver::Esirkepov< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * - currentSolver::VillaBune< SHAPE, STRATEGY > : particle shapes - CIC (1st order) only + * - currentSolver::EmZ< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * + * For development purposes: + * - currentSolver::EsirkepovNative< SHAPE, STRATEGY > : generic version of currentSolverEsirkepov + * without optimization (~4x slower and needs more shared memory) + * + * STRATEGY (optional): + * - currentSolver::strategy::StridedCachedSupercells + * - currentSolver::strategy::StridedCachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::CachedSupercells + * - currentSolver::strategy::CachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::NonCachedSupercells + * - currentSolver::strategy::NonCachedSupercellsScaled with N >= 1 + */ + using UsedParticleCurrentSolver = currentSolver::Esirkepov; -/*! particle pusher configuration ---------------------------------------------- +/** particle pusher configuration * * Defining a pusher is optional for particles * - * - particles::pusher::Vay : better suited relativistic boris pusher - * - particles::pusher::Boris : standard boris pusher + * - particles::pusher::HigueraCary : Higuera & Cary's relativistic pusher preserving both volume and ExB velocity + * - particles::pusher::Vay : Vay's relativistic pusher preserving ExB velocity + * - particles::pusher::Boris : Boris' relativistic pusher preserving volume * - particles::pusher::ReducedLandauLifshitz : 4th order RungeKutta pusher * with classical radiation reaction + * - particles::pusher::Composite : composite of two given pushers, + * switches between using one (or none) of those * * For diagnostics & modeling: ------------------------------------------------ + * - particles::pusher::Acceleration : Accelerate particles by applying a constant electric field * - particles::pusher::Free : free propagation, ignore fields * (= free stream model) * - particles::pusher::Photon : propagate with c in direction of normalized mom. @@ -74,8 +102,8 @@ using UsedParticleCurrentSolver = currentSolver::Esirkepov; * - particles::pusher::Axel : a pusher developed at HZDR during 2011 (testing) */ #ifndef PARAM_PARTICLEPUSHER -#define PARAM_PARTICLEPUSHER Boris +# define PARAM_PARTICLEPUSHER Boris #endif -using UsedParticlePusher = particles::pusher::PARAM_PARTICLEPUSHER; + using UsedParticlePusher = particles::pusher::PARAM_PARTICLEPUSHER; } // namespace picongpu diff --git a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/speciesDefinition.param b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/speciesDefinition.param index 5eb194b45b..1dcb7a048c 100644 --- a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/speciesDefinition.param +++ b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/speciesDefinition.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -18,7 +18,6 @@ */ - #pragma once #include "picongpu/simulation_defines.hpp" @@ -33,15 +32,10 @@ namespace picongpu { + /*########################### define particle attributes #####################*/ -/*########################### define particle attributes #####################*/ - -/** describe attributes of a particle*/ -using DefaultParticleAttributes = MakeSeq_t< - position, - momentum, - weighting ->; + /** describe attributes of a particle*/ + using DefaultParticleAttributes = MakeSeq_t, momentum, weighting>; /*########################### end particle attributes ########################*/ @@ -49,39 +43,32 @@ using DefaultParticleAttributes = MakeSeq_t< /* enable pusher by default if `PARAM_ENABLEPUSHER` is not defined in `cmakeFlags` */ #ifndef PARAM_ENABLEPUSHER -# define PARAM_ENABLEPUSHER 1 +# define PARAM_ENABLEPUSHER 1 #endif -/*--------------------------- electrons --------------------------------------*/ + /*--------------------------- electrons --------------------------------------*/ -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier(float_X, MassRatioElectrons, 1.0); -value_identifier(float_X, ChargeRatioElectrons, 1.0); + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); -using ParticleFlagsElectrons = MakeSeq_t< + using ParticleFlagsElectrons = MakeSeq_t< /* enable the pusher only if PARAM_ENABLEPUSHER is defined as one `1` */ -#if( PARAM_ENABLEPUSHER == 1 ) - particlePusher, +#if(PARAM_ENABLEPUSHER == 1) + particlePusher, #endif - shape, - interpolation, - current, - massRatio, - chargeRatio ->; + shape, + interpolation, + current, + massRatio, + chargeRatio>; -/* define species electrons */ -using PIC_Electrons = Particles< - PMACC_CSTRING( "e" ), - ParticleFlagsElectrons, - DefaultParticleAttributes ->; + /* define species electrons */ + using PIC_Electrons = Particles; -/*########################### end species ####################################*/ + /*########################### end species ####################################*/ -using VectorAllSpecies = MakeSeq_t< - PIC_Electrons ->; + using VectorAllSpecies = MakeSeq_t; -} //namespace picongpu +} // namespace picongpu diff --git a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/speciesInitialization.param index b198424a0b..2e56c9aa9f 100644 --- a/share/picongpu/examples/SingleParticleTest/include/picongpu/param/speciesInitialization.param +++ b/share/picongpu/examples/SingleParticleTest/include/picongpu/param/speciesInitialization.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,23 +33,15 @@ namespace picongpu { -namespace particles -{ - /** InitPipeline define in which order species are initialized - * - * the functors are called in order (from first to last functor) - */ - using InitPipeline = bmpl::vector< - CreateDensity< - densityProfiles::FreeFormula, - startPosition::OnePosition, - PIC_Electrons - >, - Manipulate< - manipulators::AssignYDrift, - PIC_Electrons - > - >; + namespace particles + { + /** InitPipeline define in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< + CreateDensity, + Manipulate>; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/ThermalTest/etc/picongpu/1.cfg b/share/picongpu/examples/ThermalTest/etc/picongpu/1.cfg index 3ba5f63a26..430c41402a 100644 --- a/share/picongpu/examples/ThermalTest/etc/picongpu/1.cfg +++ b/share/picongpu/examples/ThermalTest/etc/picongpu/1.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/ThermalTest/etc/picongpu/32.cfg b/share/picongpu/examples/ThermalTest/etc/picongpu/32.cfg index 3f2122b339..1bb2b889cc 100644 --- a/share/picongpu/examples/ThermalTest/etc/picongpu/32.cfg +++ b/share/picongpu/examples/ThermalTest/etc/picongpu/32.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Heiko Burau, Felix Schmitt, Axel Huebl +# Copyright 2013-2021 Heiko Burau, Felix Schmitt, Axel Huebl # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/ThermalTest/etc/picongpu/4.cfg b/share/picongpu/examples/ThermalTest/etc/picongpu/4.cfg index 3d7fe0524c..a03b65ccec 100644 --- a/share/picongpu/examples/ThermalTest/etc/picongpu/4.cfg +++ b/share/picongpu/examples/ThermalTest/etc/picongpu/4.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/ThermalTest/etc/picongpu/64.cfg b/share/picongpu/examples/ThermalTest/etc/picongpu/64.cfg index dc6cc96410..fc3220f2cc 100644 --- a/share/picongpu/examples/ThermalTest/etc/picongpu/64.cfg +++ b/share/picongpu/examples/ThermalTest/etc/picongpu/64.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/ThermalTest/etc/picongpu/8.cfg b/share/picongpu/examples/ThermalTest/etc/picongpu/8.cfg index fcf46b2f5f..d567d35426 100644 --- a/share/picongpu/examples/ThermalTest/etc/picongpu/8.cfg +++ b/share/picongpu/examples/ThermalTest/etc/picongpu/8.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt +# Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/ThermalTest/executeOnClone b/share/picongpu/examples/ThermalTest/executeOnClone index d379900356..52c66ce269 100755 --- a/share/picongpu/examples/ThermalTest/executeOnClone +++ b/share/picongpu/examples/ThermalTest/executeOnClone @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera, Heiko Burau +# Copyright 2013-2021 Axel Huebl, Rene Widera, Heiko Burau # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/ThermalTest/include/picongpu/ThermalTestSimulation.hpp b/share/picongpu/examples/ThermalTest/include/picongpu/ThermalTestSimulation.hpp index 5e26ebbec8..93027cc778 100644 --- a/share/picongpu/examples/ThermalTest/include/picongpu/ThermalTestSimulation.hpp +++ b/share/picongpu/examples/ThermalTest/include/picongpu/ThermalTestSimulation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Axel Huebl +/* Copyright 2013-2021 Heiko Burau, Axel Huebl * * This file is part of PIConGPU. * @@ -22,7 +22,7 @@ #include "picongpu/simulation_defines.hpp" #include -#include "picongpu/simulation/control/MySimulation.hpp" +#include "picongpu/simulation/control/Simulation.hpp" #include @@ -34,9 +34,6 @@ #include #include #include "picongpu/ArgsParser.hpp" - -#include - #include "picongpu/plugins/PluginController.hpp" #include @@ -51,166 +48,155 @@ #include #include -#include -#include -#include -#include #include #include #include -namespace picongpu -{ - -using namespace pmacc; +#include +#include -class ThermalTestSimulation : public MySimulation +namespace picongpu { -public: + using namespace pmacc; - ThermalTestSimulation() - : MySimulation() + class ThermalTestSimulation : public Simulation { - } - - void init() - { - MySimulation::init(); - - using namespace ::pmacc::math; - - DataConnector &dc = Environment<>::get().DataConnector(); - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); + public: + ThermalTestSimulation() : Simulation() + { + } - auto fieldE_coreBorder = - fieldE->getGridBuffer().getDeviceBuffer().cartBuffer().view( - precisionCast(GuardDim().toRT()), -precisionCast(GuardDim().toRT())); + void init() + { + Simulation::init(); - this->eField_zt[0] = new container::HostBuffer (Size_t < 2 > (fieldE_coreBorder.size().z(), this->collectTimesteps)); - this->eField_zt[1] = new container::HostBuffer(this->eField_zt[0]->size()); + using namespace ::pmacc::math; - dc.releaseData( FieldE::getName() ); - } + DataConnector& dc = Environment<>::get().DataConnector(); + auto fieldE = dc.get(FieldE::getName(), true); - void pluginRegisterHelp(po::options_description& desc) - { - MySimulation::pluginRegisterHelp(desc); - } + auto fieldE_coreBorder = fieldE->getGridBuffer().getDeviceBuffer().cartBuffer().view( + precisionCast(GuardDim().toRT()), + -precisionCast(GuardDim().toRT())); - void pluginLoad() - { - MySimulation::pluginLoad(); - } + this->eField_zt[0] = std::make_unique>( + Size_t<2>(fieldE_coreBorder.size().z(), this->collectTimesteps)); + this->eField_zt[1] = std::make_unique>(this->eField_zt[0]->size()); - virtual ~ThermalTestSimulation() - { - __delete(eField_zt[0]); - __delete(eField_zt[1]); - } + dc.releaseData(FieldE::getName()); + } - void writeOutput() - { - using namespace ::pmacc::math; + void pluginRegisterHelp(po::options_description& desc) + { + Simulation::pluginRegisterHelp(desc); + } - auto& con = Environment::get().GridController(); - Size_t gpuDim = (Size_t)con.getGpuNodes(); - Int<3> gpuPos = (Int<3>)con.getPosition(); - zone::SphericZone gpuGatheringZone(Size_t (1, 1, gpuDim.z())); - algorithm::mpi::Gather gather(gpuGatheringZone); + void pluginLoad() + { + Simulation::pluginLoad(); + } - container::HostBuffer eField_zt_reduced(eField_zt[0]->size()); + virtual ~ThermalTestSimulation() = default; - for (int i = 0; i < 2; i++) + void writeOutput() { - bool reduceRoot = (gpuPos.x() == 0) && (gpuPos.y() == 0); - for(int gpuPos_z = 0; gpuPos_z < (int)gpuDim.z(); gpuPos_z++) - { - zone::SphericZone<3> gpuReducingZone( - Size_t<3>(gpuDim.x(), gpuDim.y(), 1), - Int<3>(0, 0, gpuPos_z)); + using namespace ::pmacc::math; - algorithm::mpi::Reduce<3> reduce(gpuReducingZone, reduceRoot); + auto& con = Environment::get().GridController(); + Size_t gpuDim = (Size_t) con.getGpuNodes(); + Int<3> gpuPos = (Int<3>) con.getPosition(); + zone::SphericZone gpuGatheringZone(Size_t(1, 1, gpuDim.z())); + algorithm::mpi::Gather gather(gpuGatheringZone); - reduce(eField_zt_reduced, *(eField_zt[i]), pmacc::algorithm::functor::Add()); - } - if(!reduceRoot) continue; + container::HostBuffer eField_zt_reduced(eField_zt[0]->size()); - container::HostBuffer global_eField_zt( - gpuDim.z() * eField_zt_reduced.size().x(), eField_zt_reduced.size().y()); - - gather(global_eField_zt, eField_zt_reduced, 1); - if (gather.root()) + for(int i = 0; i < 2; i++) { - std::string filename; - if (i == 0) - filename = "eField_zt_trans.dat"; - else - filename = "eField_zt_long.dat"; - std::ofstream eField_zt_dat(filename.data()); - eField_zt_dat << global_eField_zt; - eField_zt_dat.close(); + bool reduceRoot = (gpuPos.x() == 0) && (gpuPos.y() == 0); + for(int gpuPos_z = 0; gpuPos_z < (int) gpuDim.z(); gpuPos_z++) + { + zone::SphericZone<3> gpuReducingZone(Size_t<3>(gpuDim.x(), gpuDim.y(), 1), Int<3>(0, 0, gpuPos_z)); + + algorithm::mpi::Reduce<3> reduce(gpuReducingZone, reduceRoot); + + reduce(eField_zt_reduced, *(eField_zt[i]), pmacc::algorithm::functor::Add()); + } + if(!reduceRoot) + continue; + + container::HostBuffer global_eField_zt( + gpuDim.z() * eField_zt_reduced.size().x(), + eField_zt_reduced.size().y()); + + gather(global_eField_zt, eField_zt_reduced, 1); + if(gather.root()) + { + std::string filename; + if(i == 0) + filename = "eField_zt_trans.dat"; + else + filename = "eField_zt_long.dat"; + std::ofstream eField_zt_dat(filename.data()); + eField_zt_dat << global_eField_zt; + eField_zt_dat.close(); + } } } - } - - /** - * Run one simulation step. - * - * @param currentStep iteration number of the current step - */ - void runOneStep(uint32_t currentStep) - { - MySimulation::runOneStep(currentStep); + /** + * Run one simulation step. + * + * @param currentStep iteration number of the current step + */ + void runOneStep(uint32_t currentStep) + { + Simulation::runOneStep(currentStep); - if (currentStep > this->collectTimesteps + firstTimestep) - return; - if (currentStep < firstTimestep) - return; + if(currentStep > this->collectTimesteps + firstTimestep) + return; + if(currentStep < firstTimestep) + return; - using namespace math; + using namespace math; - DataConnector &dc = Environment<>::get().DataConnector(); - auto fieldE = dc.get< FieldE >( FieldE::getName(), true ); + DataConnector& dc = Environment<>::get().DataConnector(); + auto fieldE = dc.get(FieldE::getName(), true); - auto fieldE_coreBorder = - fieldE->getGridBuffer().getDeviceBuffer().cartBuffer().view( - precisionCast(GuardDim().toRT()), -precisionCast(GuardDim().toRT())); + auto fieldE_coreBorder = fieldE->getGridBuffer().getDeviceBuffer().cartBuffer().view( + precisionCast(GuardDim().toRT()), + -precisionCast(GuardDim().toRT())); - for (size_t z = 0; z < eField_zt[0]->size().x(); z++) - { - zone::SphericZone < 2 > reduceZone(fieldE_coreBorder.size().shrink<2>()); - for (int i = 0; i < 2; i++) + for(size_t z = 0; z < eField_zt[0]->size().x(); z++) { - *(eField_zt[i]->origin()(z, currentStep - firstTimestep)) = - algorithm::kernel::Reduce() - (cursor::make_FunctorCursor( + zone::SphericZone<2> reduceZone(fieldE_coreBorder.size().shrink<2>()); + for(int i = 0; i < 2; i++) + { + *(eField_zt[i]->origin()(z, currentStep - firstTimestep)) = algorithm::kernel::Reduce()( + cursor::make_FunctorCursor( cursor::tools::slice(fieldE_coreBorder.origin()(0, 0, z)), - pmacc::algorithm::functor::GetComponent(i == 0 ? 0 : 2) - ), + pmacc::algorithm::functor::GetComponent(i == 0 ? 0 : 2)), reduceZone, nvidia::functors::Add()); + } } - } - dc.releaseData( FieldE::getName() ); + dc.releaseData(FieldE::getName()); - if (currentStep == this->collectTimesteps + firstTimestep) - writeOutput(); - } + if(currentStep == this->collectTimesteps + firstTimestep) + writeOutput(); + } -private: - // number of timesteps which collect the data - static constexpr uint32_t collectTimesteps = 512; - // first timestep which collects data - // you may like to let the plasma develope/thermalize a little bit - static constexpr uint32_t firstTimestep = 1024; + private: + // number of timesteps which collect the data + static constexpr uint32_t collectTimesteps = 512; + // first timestep which collects data + // you may like to let the plasma develope/thermalize a little bit + static constexpr uint32_t firstTimestep = 1024; - container::HostBuffer* eField_zt[2]; + std::array>, 2> eField_zt; - using BlockDim = pmacc::math::CT::Size_t < 16, 16, 1 >; - using GuardDim = SuperCellSize; -}; + using BlockDim = pmacc::math::CT::Size_t<16, 16, 1>; + using GuardDim = SuperCellSize; + }; } // namespace picongpu - diff --git a/share/picongpu/examples/ThermalTest/include/picongpu/param/components.param b/share/picongpu/examples/ThermalTest/include/picongpu/param/components.param index 12f2026dc3..807544ea5b 100644 --- a/share/picongpu/examples/ThermalTest/include/picongpu/param/components.param +++ b/share/picongpu/examples/ThermalTest/include/picongpu/param/components.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Anton Helm, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Anton Helm, Rene Widera, * Richard Pausch * * This file is part of PIConGPU. @@ -30,9 +30,9 @@ namespace picongpu { -/*! Simulation Starter --------------------------------------------------- - * - thermalTestStarter : starter for thermal test - */ -namespace simulation_starter = thermalTestStarter; + /*! Simulation Starter --------------------------------------------------- + * - thermalTestStarter : starter for thermal test + */ + namespace simulation_starter = thermalTestStarter; } // namespace picongpu diff --git a/share/picongpu/examples/ThermalTest/include/picongpu/param/density.param b/share/picongpu/examples/ThermalTest/include/picongpu/param/density.param index bbd1034ff2..eb6aa2b5a7 100644 --- a/share/picongpu/examples/ThermalTest/include/picongpu/param/density.param +++ b/share/picongpu/examples/ThermalTest/include/picongpu/param/density.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -25,22 +25,22 @@ namespace picongpu { -namespace SI -{ - /** Base density in particles per m^3 in the density profiles. - * - * This is often taken as reference maximum density in normalized profiles. - * Individual particle species can define a `densityRatio` flag relative - * to this value. - * - * unit: ELEMENTS/m^3 - */ - constexpr float_64 BASE_DENSITY_SI = 1.571e24; -} + namespace SI + { + /** Base density in particles per m^3 in the density profiles. + * + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. + * + * unit: ELEMENTS/m^3 + */ + constexpr float_64 BASE_DENSITY_SI = 1.571e24; + } // namespace SI -namespace densityProfiles -{ - /* definition of homogenous density profile */ - using Homogenous = HomogenousImpl; -} -} + namespace densityProfiles + { + /* definition of homogenous density profile */ + using Homogenous = HomogenousImpl; + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/examples/ThermalTest/include/picongpu/param/grid.param b/share/picongpu/examples/ThermalTest/include/picongpu/param/grid.param index af220aab28..f244b1e011 100644 --- a/share/picongpu/examples/ThermalTest/include/picongpu/param/grid.param +++ b/share/picongpu/examples/ThermalTest/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -18,12 +18,10 @@ */ - #pragma once namespace picongpu { - namespace SI { /** Duration of one timestep @@ -52,21 +50,21 @@ namespace picongpu * behave like the interaction of infinite "wire particles" * in fields with perfect symmetry in Z. */ - } //namespace SI + } // namespace SI - //! Defines the size of the absorbing zone (in cells) + //! Defines the size of the absorbing zone (in cells) constexpr uint32_t ABSORBER_CELLS[3][2] = { - {0, 0}, /*x direction [negative,positive]*/ - {0, 0}, /*y direction [negative,positive]*/ - {0, 0} /*z direction [negative,positive]*/ - }; //unit: number of cells + {0, 0}, /*x direction [negative,positive]*/ + {0, 0}, /*y direction [negative,positive]*/ + {0, 0} /*z direction [negative,positive]*/ + }; // unit: number of cells //! Define the strength of the absorber for any direction constexpr float_X ABSORBER_STRENGTH[3][2] = { {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ - {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ - }; //unit: none + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; // unit: none /** When to move the co-moving window. * An initial pseudo particle, flying with the speed of light, @@ -85,7 +83,4 @@ namespace picongpu */ constexpr float_64 movePoint = 0.90; -} - - - +} // namespace picongpu diff --git a/share/picongpu/examples/ThermalTest/include/picongpu/param/memory.param b/share/picongpu/examples/ThermalTest/include/picongpu/param/memory.param index 2fee1b1993..26ac7159d0 100644 --- a/share/picongpu/examples/ThermalTest/include/picongpu/param/memory.param +++ b/share/picongpu/examples/ThermalTest/include/picongpu/param/memory.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -31,78 +31,87 @@ #include #include +#include namespace picongpu { + /* We have to hold back 350MiB for gpu-internal operations: + * - random number generator + * - reduces + * - ... + */ + constexpr size_t reservedGpuMemorySize = 350 * 1024 * 1024; -/* We have to hold back 350MiB for gpu-internal operations: - * - random number generator - * - reduces - * - ... - */ -constexpr size_t reservedGpuMemorySize = 350 *1024*1024; + /* short namespace*/ + namespace mCT = pmacc::math::CT; + /** size of a superCell + * + * volume of a superCell must be <= 1024 + */ + using SuperCellSize = typename mCT::shrinkTo, simDim>::type; -/* short namespace*/ -namespace mCT = pmacc::math::CT; -/** size of a superCell - * - * volume of a superCell must be <= 1024 - */ -using SuperCellSize = typename mCT::shrinkTo< - mCT::Int< 8, 8, 4 >, - simDim ->::type; + /** define the object for mapping superCells to cells*/ + using MappingDesc = MappingDescription; -/** define the object for mapping superCells to cells*/ -using MappingDesc = MappingDescription; + /** define the size of the core, border and guard area + * + * PIConGPU uses spatial domain-decomposition for parallelization + * over multiple devices with non-shared memory architecture. + * The global spatial domain is organized per device in three + * sections: the GUARD area contains copies of neighboring + * devices (also known as "halo"/"ghost"). + * The BORDER area is the outermost layer of cells of a device, + * equally to what neighboring devices see as GUARD area. + * The CORE area is the innermost area of a device. In union with + * the BORDER area it defines the "active" spatial domain on a device. + * + * GuardSize is defined in units of SuperCellSize per dimension. + */ + using GuardSize = typename mCT::shrinkTo, simDim>::type; -/** define the size of the core, border and guard area - * - * PIConGPU uses spatial domain-decomposition for parallelization - * over multiple devices with non-shared memory architecture. - * The global spatial domain is organized per device in three - * sections: the GUARD area contains copies of neighboring - * devices (also known as "halo"/"ghost"). - * The BORDER area is the outermost layer of cells of a device, - * equally to what neighboring devices see as GUARD area. - * The CORE area is the innermost area of a device. In union with - * the BORDER area it defines the "active" spatial domain on a device. - * - * GuardSize is defined in units of SuperCellSize per dimension. - */ -using GuardSize = typename mCT::shrinkTo< - mCT::Int< 1, 1, 1 >, - simDim ->::type; + /** bytes reserved for species exchange buffer + * + * This is the default configuration for species exchanges buffer sizes. + * The default exchange buffer sizes can be changed per species by adding + * the alias exchangeMemCfg with similar members like in DefaultExchangeMemCfg + * to its flag list. + */ + struct DefaultExchangeMemCfg + { + // memory used for a direction + static constexpr uint32_t BYTES_EXCHANGE_X = 40 * 1024 * 1024; // 40 MiB + static constexpr uint32_t BYTES_EXCHANGE_Y = 40 * 1024 * 1024; // 40 MiB + static constexpr uint32_t BYTES_EXCHANGE_Z = 40 * 1024 * 1024; // 40 MiB + static constexpr uint32_t BYTES_EDGES = 3 * 1024 * 1024; // 3 MiB + static constexpr uint32_t BYTES_CORNER = 800 * 1024; // 800 kiB -/** bytes reserved for species exchange buffer - * - * This is the default configuration for species exchanges buffer sizes. - * The default exchange buffer sizes can be changed per species by adding - * the alias exchangeMemCfg with similar members like in DefaultExchangeMemCfg - * to its flag list. - */ -struct DefaultExchangeMemCfg -{ - // memory used for a direction - static constexpr uint32_t BYTES_EXCHANGE_X = 40 * 1024 * 1024; // 40 MiB - static constexpr uint32_t BYTES_EXCHANGE_Y = 40 * 1024 * 1024; // 40 MiB - static constexpr uint32_t BYTES_EXCHANGE_Z = 40 * 1024 * 1024; // 40 MiB - static constexpr uint32_t BYTES_EDGES = 3 * 1024 * 1024; // 3 MiB - static constexpr uint32_t BYTES_CORNER = 800 * 1024; // 800 kiB -}; + /** Reference local domain size + * + * The size of the local domain for which the exchange sizes `BYTES_*` are configured for. + * The required size of each exchange will be calculated at runtime based on the local domain size and the + * reference size. The exchange size will be scaled only up and not down. Zero means that there is no reference + * domain size, exchanges will not be scaled. + */ + using REF_LOCAL_DOM_SIZE = mCT::Int<0, 0, 0>; + /** Scaling rate per direction. + * + * 1.0 means it scales linear with the ratio between the local domain size at runtime and the reference local + * domain size. + */ + const std::array DIR_SCALING_FACTOR = {{0.0, 0.0, 0.0}}; + }; -/** number of scalar fields that are reserved as temporary fields */ -constexpr uint32_t fieldTmpNumSlots = 1; + /** number of scalar fields that are reserved as temporary fields */ + constexpr uint32_t fieldTmpNumSlots = 1; -/** can `FieldTmp` gather neighbor information - * - * If `true` it is possible to call the method `asyncCommunicationGather()` - * to copy data from the border of neighboring GPU into the local guard. - * This is also known as building up a "ghost" or "halo" region in domain - * decomposition and only necessary for specific algorithms that extend - * the basic PIC cycle, e.g. with dependence on derived density or energy fields. - */ -constexpr bool fieldTmpSupportGatherCommunication = true; + /** can `FieldTmp` gather neighbor information + * + * If `true` it is possible to call the method `asyncCommunicationGather()` + * to copy data from the border of neighboring GPU into the local guard. + * This is also known as building up a "ghost" or "halo" region in domain + * decomposition and only necessary for specific algorithms that extend + * the basic PIC cycle, e.g. with dependence on derived density or energy fields. + */ + constexpr bool fieldTmpSupportGatherCommunication = true; } // namespace picongpu diff --git a/share/picongpu/examples/ThermalTest/include/picongpu/param/particle.param b/share/picongpu/examples/ThermalTest/include/picongpu/param/particle.param index 636607e9af..0d2e2bc9c4 100644 --- a/share/picongpu/examples/ThermalTest/include/picongpu/param/particle.param +++ b/share/picongpu/examples/ThermalTest/include/picongpu/param/particle.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -27,49 +27,45 @@ namespace picongpu { - -namespace particles -{ - - /** a particle with a weighting below MIN_WEIGHTING will not - * be created / will be deleted - * unit: none - */ - constexpr float_X MIN_WEIGHTING = 10.0; - -namespace manipulators -{ - struct TemperatureParam + namespace particles { - /** Initial temperature - * unit: keV + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted + * unit: none */ - static constexpr float_64 temperature = 51.16; - }; - using AddTemperature = unary::Temperature< TemperatureParam > ; -} // namespace manipulators + constexpr float_X MIN_WEIGHTING = 10.0; -namespace startPosition -{ + namespace manipulators + { + struct TemperatureParam + { + /** Initial temperature + * unit: keV + */ + static constexpr float_64 temperature = 51.16; + }; + using AddTemperature = unary::Temperature; + } // namespace manipulators - struct RandomParameter16ppc - { - /** Count of particles per cell at initial state - * unit: none - */ - static constexpr uint32_t numParticlesPerCell = 16u; - }; - // definition of random particle start - using Random16ppc = RandomImpl< RandomParameter16ppc >; + namespace startPosition + { + struct RandomParameter16ppc + { + /** Count of particles per cell at initial state + * unit: none + */ + static constexpr uint32_t numParticlesPerCell = 16u; + }; + // definition of random particle start + using Random16ppc = RandomImpl; -} // namespace startPosition + } // namespace startPosition - /** During unit normalization, we assume this is a typical - * number of particles per cell for normalization of weighted - * particle attributes. - */ - constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = - startPosition::RandomParameter16ppc::numParticlesPerCell; + /** During unit normalization, we assume this is a typical + * number of particles per cell for normalization of weighted + * particle attributes. + */ + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = startPosition::RandomParameter16ppc::numParticlesPerCell; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/ThermalTest/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/ThermalTest/include/picongpu/param/speciesInitialization.param index f7cee2f02f..cccfc7f4d2 100644 --- a/share/picongpu/examples/ThermalTest/include/picongpu/param/speciesInitialization.param +++ b/share/picongpu/examples/ThermalTest/include/picongpu/param/speciesInitialization.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,32 +33,17 @@ namespace picongpu { -namespace particles -{ - /** InitPipeline define in which order species are initialized - * - * the functors are called in order (from first to last functor) - */ - using InitPipeline = bmpl::vector< - CreateDensity< - densityProfiles::Homogenous, - startPosition::Random16ppc, - PIC_Ions - >, - ManipulateDerive< - manipulators::binary::ProtonTimesWeighting, - PIC_Ions, - PIC_Electrons - >, - Manipulate< - manipulators::AddTemperature, - PIC_Electrons - >, - Manipulate< - manipulators::AddTemperature, - PIC_Ions - > - >; + namespace particles + { + /** InitPipeline define in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< + CreateDensity, + ManipulateDerive, + Manipulate, + Manipulate>; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/ThermalTest/include/picongpu/param/starter.param b/share/picongpu/examples/ThermalTest/include/picongpu/param/starter.param index 7630489bfa..f876fcf30c 100644 --- a/share/picongpu/examples/ThermalTest/include/picongpu/param/starter.param +++ b/share/picongpu/examples/ThermalTest/include/picongpu/param/starter.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -18,18 +18,12 @@ */ - #pragma once namespace picongpu { - namespace thermalTestStarter { - } -} - - - +} // namespace picongpu diff --git a/share/picongpu/examples/ThermalTest/include/picongpu/unitless/starter.unitless b/share/picongpu/examples/ThermalTest/include/picongpu/unitless/starter.unitless index e3966aa9d7..55d6d9ee53 100644 --- a/share/picongpu/examples/ThermalTest/include/picongpu/unitless/starter.unitless +++ b/share/picongpu/examples/ThermalTest/include/picongpu/unitless/starter.unitless @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PIConGPU. * @@ -34,7 +34,6 @@ namespace picongpu using SimStarter = ::picongpu::SimulationStarter< ::picongpu::InitialiserController, ::picongpu::PluginController, - ::picongpu::ThermalTestSimulation - >; + ::picongpu::ThermalTestSimulation>; } -} +} // namespace picongpu diff --git a/share/picongpu/examples/ThermalTest/tools/dispersion.py b/share/picongpu/examples/ThermalTest/tools/dispersion.py index e1561264b9..6634b991e5 100644 --- a/share/picongpu/examples/ThermalTest/tools/dispersion.py +++ b/share/picongpu/examples/ThermalTest/tools/dispersion.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2013-2020 Heiko Burau, Axel Huebl +# Copyright 2013-2021 Heiko Burau, Axel Huebl # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/TransitionRadiation/etc/picongpu/1.cfg b/share/picongpu/examples/TransitionRadiation/etc/picongpu/1.cfg index 146623a785..761526431d 100644 --- a/share/picongpu/examples/TransitionRadiation/etc/picongpu/1.cfg +++ b/share/picongpu/examples/TransitionRadiation/etc/picongpu/1.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Richard Pausch, Felix Schmitt, Axel Huebl, Finn-Ole Carstens +# Copyright 2013-2021 Richard Pausch, Felix Schmitt, Axel Huebl, Finn-Ole Carstens # # This file is part of PIConGPU. # @@ -61,10 +61,6 @@ TBG_e_histogram="--e_energyHistogram.period 10 \ --e_energyHistogram.maxEnergy 500000 \ --e_energyHistogram.filter all" -# optional hdf5 output -TBG_hdf5="--hdf5.period 10\ - --hdf5.file pos" - # macroparticle count to see time consumption of transition radiation plugin TBG_e_macroParticleCount="--e_macroParticlesCount.period 10" diff --git a/share/picongpu/examples/TransitionRadiation/etc/picongpu/16.cfg b/share/picongpu/examples/TransitionRadiation/etc/picongpu/16.cfg index 1303fd0223..ddc561a99a 100644 --- a/share/picongpu/examples/TransitionRadiation/etc/picongpu/16.cfg +++ b/share/picongpu/examples/TransitionRadiation/etc/picongpu/16.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Richard Pausch, Felix Schmitt, Axel Huebl, Finn-Ole Carstens +# Copyright 2013-2021 Richard Pausch, Felix Schmitt, Axel Huebl, Finn-Ole Carstens # # This file is part of PIConGPU. # @@ -61,10 +61,6 @@ TBG_e_histogram="--e_energyHistogram.period 10 \ --e_energyHistogram.maxEnergy 500000 \ --e_energyHistogram.filter all" -# optional hdf5 output -TBG_hdf5="--hdf5.period 10\ - --hdf5.file pos" - # macroparticle count to see time consumption of transition radiation plugin TBG_e_macroParticleCount="--e_macroParticlesCount.period 10" diff --git a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/density.param b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/density.param index be19511511..f1a5bda1f4 100644 --- a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/density.param +++ b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/density.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch, Finn-Ole Carstens * * This file is part of PIConGPU. @@ -40,18 +40,17 @@ namespace picongpu */ constexpr float_64 BASE_DENSITY_SI = 1.0e22; - } + } // namespace SI namespace densityProfiles { - - PMACC_STRUCT(GaussianCloudParam, + PMACC_STRUCT( + GaussianCloudParam, /** Profile Formula: * exponent = |globalCellPos - center| / sigma * density = e^[ gasFactor * exponent^gasPower ] */ - (PMACC_C_VALUE(float_X, gasFactor, -0.5)) - (PMACC_C_VALUE(float_X, gasPower, 2.0)) + (PMACC_C_VALUE(float_X, gasFactor, -0.5))(PMACC_C_VALUE(float_X, gasPower, 2.0)) /** height of vacuum area on top border * @@ -64,14 +63,13 @@ namespace picongpu /** The central position of the density distribution * unit: meter */ - (PMACC_C_VECTOR_DIM(float_64, simDim, center_SI, 128 * 0.16e-6 / 2.0, 0.912e-5 , 128 * 0.16e-6 / 2.0)) + (PMACC_C_VECTOR_DIM(float_64, simDim, center_SI, 128 * 0.16e-6 / 2.0, 0.912e-5, 128 * 0.16e-6 / 2.0)) /** the distance from gasCenter_SI until the density decreases to its 1/e-th part * unit: meter */ - (PMACC_C_VECTOR_DIM(float_64, simDim, sigma_SI, 3.5e-6, 3.0e-6, 3.5e-6)) - ); /* struct GaussianCloudParam */ + (PMACC_C_VECTOR_DIM(float_64, simDim, sigma_SI, 3.5e-6, 3.0e-6, 3.5e-6))); /* struct GaussianCloudParam */ /* definition of cloud profile */ - using GaussianCloud = GaussianCloudImpl< GaussianCloudParam >; - } -} + using GaussianCloud = GaussianCloudImpl; + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/fieldSolver.param b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/fieldSolver.param index a30867816b..41c9586801 100644 --- a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/fieldSolver.param +++ b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/fieldSolver.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Sergei Bastrakov, Klaus Steiniger * * This file is part of PIConGPU. * @@ -25,6 +25,11 @@ * * Also allows to configure ad hoc mitigations for high frequency * noise in some setups via current smoothing. + * + * \attention + * Currently, the laser initialization in PIConGPU is implemented to work with the standard Yee solver. + * Using a solver of higher order will result in a slightly increased laser amplitude and energy than expected. + * */ #pragma once @@ -35,37 +40,43 @@ namespace picongpu { -namespace fields -{ - - /** Current Interpolation - * - * CurrentInterpolation is used to set a method performing the - * interpolate/assign operation from the generated currents of particle - * species to the electro-magnetic fields. - * - * Allowed values are: - * - None: - * - default for staggered grids/Yee-scheme - * - updates E - * - Binomial: 2nd order Binomial filter - * - smooths the current before assignment in staggered grid - * - updates E & breaks local charge conservation slightly - * - NoneDS: - * - experimental assignment for all-centered/directional splitting - * - updates E & B at the same time - */ - using CurrentInterpolation = currentInterpolation::None; + namespace fields + { + /** Current Interpolation + * + * CurrentInterpolation is used to set a method performing the + * interpolate/assign operation from the generated currents of particle + * species to the electro-magnetic fields. + * + * Allowed values are: + * - None: + * - default for staggered grids/Yee-scheme + * - updates E + * - Binomial: 2nd order Binomial filter + * - smooths the current before assignment in staggered grid + * - updates E & breaks local charge conservation slightly + */ + using CurrentInterpolation = currentInterpolation::None; - /** FieldSolver - * - * Field Solver Selection: - * - Yee< CurrentInterpolation > : standard Yee solver - * - Lehe< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction - * - DirSplitting< CurrentInterpolation >: Sentoku's Directional Splitting Method - * - None< CurrentInterpolation >: disable the vacuum update of E and B - */ - using Solver = maxwellSolver::None< CurrentInterpolation >; + /** FieldSolver + * + * Field Solver Selection: + * - Yee< CurrentInterpolation > : Standard Yee solver approximating derivatives with respect to time and + * space by second order finite differences. + * - YeePML< CurrentInterpolation >: Standard Yee solver using Perfectly Matched Layer Absorbing Boundary + * Conditions (PML) + * - Lehe< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction + * - LehePML< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction + * using Perfectly Matched Layer Absorbing Boundary Conditions (PML) + * - ArbitraryOrderFDTD< 4, CurrentInterpolation >: Solver using 4 neighbors to each direction to approximate + * *spatial* derivatives by finite differences. The number of neighbors can be changed from 4 to any positive, + * integer number. The order of the solver will be twice the number of neighbors in each direction. Yee's + * method is a special case of this using one neighbor to each direction. + * - ArbitraryOrderFDTDPML< 4, CurrentInterpolation >: ArbitraryOrderFDTD solver using Perfectly Matched Layer + * Absorbing Boundary Conditions (PML) + * - None< CurrentInterpolation >: disable the vacuum update of E and B + */ + using Solver = maxwellSolver::None; -} // namespace fields + } // namespace fields } // namespace picongpu diff --git a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/grid.param b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/grid.param index effd12601c..5ddfa2cb36 100644 --- a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/grid.param +++ b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Richard Pausch, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -18,12 +18,10 @@ */ - #pragma once namespace picongpu { - namespace SI { /** Duration of one timestep @@ -52,21 +50,21 @@ namespace picongpu * behave like the interaction of infinite "wire particles" * in fields with perfect symmetry in Z. */ - } //namespace SI + } // namespace SI //! Defines the size of the absorbing zone (in cells) constexpr uint32_t ABSORBER_CELLS[3][2] = { - {32, 32}, /*x direction [negative,positive]*/ - {32, 32}, /*y direction [negative,positive]*/ - {32, 32} /*z direction [negative,positive]*/ - }; //unit: number of cells + {32, 32}, /*x direction [negative,positive]*/ + {32, 32}, /*y direction [negative,positive]*/ + {32, 32} /*z direction [negative,positive]*/ + }; // unit: number of cells //! Define the strength of the absorber for any direction constexpr float_X ABSORBER_STRENGTH[3][2] = { {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ - {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ - }; //unit: none + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; // unit: none constexpr uint32_t ABSORBER_FADE_IN_STEPS = 16; @@ -82,7 +80,4 @@ namespace picongpu */ constexpr float_64 movePoint = 0.90; -} - - - +} // namespace picongpu diff --git a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/particle.param b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/particle.param index f9eec216ba..eea756f23c 100644 --- a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/particle.param +++ b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/particle.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch, Axel Huebl +/* Copyright 2013-2021 Rene Widera, Richard Pausch, Axel Huebl * * This file is part of PIConGPU. * @@ -32,19 +32,17 @@ namespace picongpu { namespace particles { - /* a particle with a weighting below MIN_WEIGHTING will not - * be created / will be deleted - * unit: none - */ + * be created / will be deleted + * unit: none + */ constexpr float_X MIN_WEIGHTING = 1.0; constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = 50; namespace manipulators { - - CONST_VECTOR( float_X, 3, DriftParamNegative_direction, 1.0, 1.0, 0.0 ); + CONST_VECTOR(float_X, 3, DriftParamNegative_direction, 1.0, 1.0, 0.0); struct DriftParamNegative { /** Initial particle drift velocity for electrons and ions @@ -56,10 +54,7 @@ namespace picongpu const DriftParamNegative_direction_t direction; }; // definition of SetDrift start - using AssignYDriftNegative = unary::Drift< - DriftParamNegative, - nvidia::functors::Assign - >; + using AssignYDriftNegative = unary::Drift; } // namespace manipulators @@ -73,7 +68,7 @@ namespace picongpu */ static constexpr uint32_t numParticlesPerCell = TYPICAL_PARTICLES_PER_CELL; }; - using Random = RandomImpl< RandomParameter >; + using Random = RandomImpl; } // namespace startPosition } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/png.param b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/png.param index 788e0121ae..4ed07c907e 100644 --- a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/png.param +++ b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/png.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Richard Pausch +/* Copyright 2013-2021 Heiko Burau, Richard Pausch * * This file is part of PIConGPU. * @@ -24,17 +24,17 @@ namespace picongpu { -/*scale image before write to file, only scale if value is not 1.0 - */ -constexpr float_64 scale_image = 1.0; + /*scale image before write to file, only scale if value is not 1.0 + */ + constexpr float_64 scale_image = 1.0; -/*if true image is scaled if cellsize is not quadratic, else no scale*/ -constexpr bool scale_to_cellsize = true; + /*if true image is scaled if cellsize is not quadratic, else no scale*/ + constexpr bool scale_to_cellsize = true; -constexpr bool white_box_per_GPU = true; + constexpr bool white_box_per_GPU = true; -namespace visPreview -{ + namespace visPreview + { // normalize EM fields to typical laser or plasma quantities //-1: Auto: enable adaptive scaling for each output // 1: Laser: typical fields calculated out of the laser amplitude @@ -49,33 +49,32 @@ namespace visPreview #define EM_FIELD_SCALE_CHANNEL2 -1 #define EM_FIELD_SCALE_CHANNEL3 -1 -// multiply highest undisturbed particle density with factor -constexpr float_X preParticleDens_opacity = 0.25; -constexpr float_X preChannel1_opacity = 1.0; -constexpr float_X preChannel2_opacity = 1.0; -constexpr float_X preChannel3_opacity = 1.0; - -// specify color scales for each channel -namespace preParticleDensCol = colorScales::red; -namespace preChannel1Col = colorScales::blue; -namespace preChannel2Col = colorScales::green; -namespace preChannel3Col = colorScales::none; + // multiply highest undisturbed particle density with factor + constexpr float_X preParticleDens_opacity = 0.25; + constexpr float_X preChannel1_opacity = 1.0; + constexpr float_X preChannel2_opacity = 1.0; + constexpr float_X preChannel3_opacity = 1.0; -/* png preview settings for each channel */ -DINLINE float_X preChannel1(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) -{ - return math::abs2(field_J); -} + // specify color scales for each channel + namespace preParticleDensCol = colorScales::red; + namespace preChannel1Col = colorScales::blue; + namespace preChannel2Col = colorScales::green; + namespace preChannel3Col = colorScales::none; -DINLINE float_X preChannel2(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) -{ - return field_E.x() * field_E.x(); -} + /* png preview settings for each channel */ + DINLINE float_X preChannel1(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) + { + return pmacc::math::abs2(field_J); + } -DINLINE float_X preChannel3(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) -{ - return -1.0_X * field_E.y(); -} -} -} + DINLINE float_X preChannel2(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) + { + return field_E.x() * field_E.x(); + } + DINLINE float_X preChannel3(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) + { + return -1.0_X * field_E.y(); + } + } // namespace visPreview +} // namespace picongpu diff --git a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/species.param b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/species.param deleted file mode 100644 index df5d1a5664..0000000000 --- a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/species.param +++ /dev/null @@ -1,78 +0,0 @@ -/* Copyright 2014-2020 Rene Widera, Richard Pausch - * - * This file is part of PIConGPU. - * - * PIConGPU is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * PIConGPU is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with PIConGPU. - * If not, see . - */ - -#pragma once - -#include "picongpu/particles/shapes.hpp" -#include "picongpu/algorithms/FieldToParticleInterpolationNative.hpp" -#include "picongpu/algorithms/FieldToParticleInterpolation.hpp" -#include "picongpu/algorithms/AssignedTrilinearInterpolation.hpp" - -#include "picongpu/particles/flylite/NonLTE.def" -#include "picongpu/fields/currentDeposition/Solver.def" - - -namespace picongpu -{ -/*---------------------------- generic solver---------------------------------*/ - -/*! Particle Shape definitions ------------------------------------------------- - * - particles::shapes::CIC : 1st order - * - particles::shapes::TSC : 2nd order - * - particles::shapes::PCS : 3rd order - * - particles::shapes::P4S : 4th order - * - * example: using UsedParticleShape = particles::shapes::CIC; - */ -using UsedParticleShape = particles::shapes::CIC; - -/* define which interpolation method is used to interpolate fields to particle*/ -using UsedField2Particle = FieldToParticleInterpolation< UsedParticleShape, AssignedTrilinearInterpolation >; - -/*! select current solver method ----------------------------------------------- - * - currentSolver::Esirkepov : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - currentSolver::VillaBune<> : particle shapes - CIC (1st order) only - * - currentSolver::EmZ : particle shapes - CIC, TSC, PCS, P4S (1st to 4th order) - * - * For development purposes: --------------------------------------------------- - * - currentSolver::EsirkepovNative : generic version of currentSolverEsirkepov - * without optimization (~4x slower and needs more shared memory) - */ -using UsedParticleCurrentSolver = currentSolver::Esirkepov< UsedParticleShape >; - -/*! particle pusher configuration ---------------------------------------------- - * - * Defining a pusher is optional for particles - * - * - particles::pusher::Vay : better suited relativistic boris pusher - * - particles::pusher::Boris : standard boris pusher - * - particles::pusher::ReducedLandauLifshitz : 4th order RungeKutta pusher - * with classical radiation reaction - * - * For diagnostics & modeling: ------------------------------------------------ - * - particles::pusher::Free : free propagation, ignore fields - * (= free stream model) - * - particles::pusher::Photon : propagate with c in direction of normalized mom. - * - particles::pusher::Probe : Probe particles that interpolate E & B - * For development purposes: -------------------------------------------------- - * - particles::pusher::Axel : a pusher developed at HZDR during 2011 (testing) - */ -using UsedParticlePusher = particles::pusher::Boris; - -} // namespace picongpu diff --git a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/speciesDefinition.param b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/speciesDefinition.param index 5fb514f525..c723423895 100644 --- a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/speciesDefinition.param +++ b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/speciesDefinition.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Heiko Burau, +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Heiko Burau, * Richard Pausch * * This file is part of PIConGPU. @@ -32,16 +32,10 @@ namespace picongpu { - /*########################### define particle attributes #####################*/ /** describe attributes of a particle*/ - using DefaultParticleAttributes = MakeSeq_t< - position< position_pic >, - momentum, - weighting, - transitionRadiationMask - >; + using DefaultParticleAttributes = MakeSeq_t, momentum, weighting, transitionRadiationMask>; /*########################### end particle attributes ########################*/ @@ -50,28 +44,21 @@ namespace picongpu /*--------------------------- electrons --------------------------------------*/ /* ratio relative to BASE_CHARGE and BASE_MASS */ - value_identifier( float_X, MassRatioElectrons, 1.0 ); - value_identifier( float_X, ChargeRatioElectrons, 1.0 ); + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); using ParticleFlagsElectrons = MakeSeq_t< - particlePusher< UsedParticlePusher >, - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - massRatio< MassRatioElectrons >, - chargeRatio< ChargeRatioElectrons > - >; + particlePusher, + shape, + interpolation, + massRatio, + chargeRatio>; /* define species electrons */ - using PIC_Electrons = Particles< - PMACC_CSTRING( "e" ), - ParticleFlagsElectrons, - DefaultParticleAttributes - >; + using PIC_Electrons = Particles; /*########################### end species ####################################*/ - using VectorAllSpecies = MakeSeq_t< - PIC_Electrons - >; + using VectorAllSpecies = MakeSeq_t; -} //namespace picongpu +} // namespace picongpu diff --git a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/speciesInitialization.param index 1fee353a50..bdb6445015 100644 --- a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/speciesInitialization.param +++ b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/speciesInitialization.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,23 +33,15 @@ namespace picongpu { -namespace particles -{ - /** InitPipeline define in which order species are initialized - * - * the functors are called in order (from first to last functor) - */ - using InitPipeline = bmpl::vector< - CreateDensity< - densityProfiles::GaussianCloud, - startPosition::Random, - PIC_Electrons - >, - Manipulate< - manipulators::AssignYDriftNegative, - PIC_Electrons - > - >; + namespace particles + { + /** InitPipeline define in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< + CreateDensity, + Manipulate>; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/transitionRadiation.param b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/transitionRadiation.param index 7294e90e14..c9954201ed 100644 --- a/share/picongpu/examples/TransitionRadiation/include/picongpu/param/transitionRadiation.param +++ b/share/picongpu/examples/TransitionRadiation/include/picongpu/param/transitionRadiation.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Richard Pausch, Finn-Ole Carstens +/* Copyright 2013-2021 Rene Widera, Richard Pausch, Finn-Ole Carstens * * This file is part of PIConGPU. * @@ -43,225 +43,237 @@ namespace picongpu { -namespace plugins -{ -// initiate the formfactor namespaces from the radiation plugin -namespace radiation -{ - namespace radFormFactor_CIC_3D { } - namespace radFormFactor_TSC_3D { } - namespace radFormFactor_PCS_3D { } - namespace radFormFactor_CIC_1Dy { } - namespace radFormFactor_Gauss_spherical { } - namespace radFormFactor_Gauss_cell { } - namespace radFormFactor_incoherent { } - namespace radFormFactor_coherent { } -} // namespace radiation - -namespace transitionRadiation -{ -namespace linearFrequencies -{ - namespace SI - { - //! mimimum frequency of the linear frequency scale in units of [1/s] - constexpr float_64 omegaMin = 0.0; - //! maximum frequency of the linear frequency scale in units of [1/s] - constexpr float_64 omegaMax = 1.06e16; - } - - //! number of frequency values to compute in the linear frequency [unitless] - constexpr unsigned int nOmega = 512; - -} // namespace linearFrequencies - -namespace logFrequencies -{ - namespace SI - { - //! mimimum frequency of the logarithmic frequency scale in units of [1/s] - constexpr float_64 omegaMin = 1.0e13; - //! maximum frequency of the logarithmic frequency scale in units of [1/s] - constexpr float_64 omegaMax = 1.0e17; - } - - //! number of frequency values to compute in the logarithmic frequency [unitless] - constexpr unsigned int nOmega = 256; - -} // namespace logFrequencies - - -namespace listFrequencies -{ - //! path to text file with frequencies - constexpr char listLocation[] = "/path/to/frequency_list"; - //! number of frequency values to compute if frequencies are given in a file [unitless] - constexpr unsigned int nOmega = 512; - -} // namespace listFrequencies - - - /** selected mode of frequency scaling: - * - * options: - * - linearFrequencies - * - logFrequencies - * - listFrequencies - */ - namespace frequencies = logFrequencies; - - /////////////////////////////////////////////////// - - - /** correct treatment of coherent radiation from macro particles - * - * These formfactors are the same as in the radiation plugin! - * Choose different form factors in order to consider different particle shapes for radiation - * - ::picongpu::plugins::radiation::radFormFactor_CIC_3D ... CIC charge distribution - * - ::picongpu::plugins::radiation::radFormFactor_TSC_3D ... TSC charge distribution - * - ::picongpu::plugins::radiation::radFormFactor_PCS_3D ... PCS charge distribution - * - ::picongpu::plugins::radiation::radFormFactor_CIC_1Dy ... only CIC charge distribution in y - * - ::picongpu::plugins::radiation::radFormFactor_Gauss_spherical ... symmetric Gauss charge distribution - * - ::picongpu::plugins::radiation::radFormFactor_Gauss_cell ... Gauss charge distribution according to cell size - * - ::picongpu::plugins::radiation::radFormFactor_incoherent ... only incoherent radiation - * - ::picongpu::plugins::radiation::radFormFactor_coherent ... only coherent radiation - */ - namespace macroParticleFormFactor = ::picongpu::plugins::radiation::radFormFactor_Gauss_spherical; - - /////////////////////////////////////////////////////////// - - namespace parameters + namespace plugins { - // number of observation directions - constexpr unsigned int nPhi = 128; - constexpr unsigned int nTheta = 128; - constexpr unsigned int nObserver = nPhi * nTheta; - - // theta goes from 0 to pi - constexpr float_64 thetaMin = 0.0; - constexpr float_64 thetaMax = picongpu::PI; - - // phi goes from 0 to 2*pi - constexpr float_64 phiMin = 0.0; - constexpr float_64 phiMax = 2 * picongpu::PI; - - namespace SI + // initiate the formfactor namespaces from the radiation plugin + namespace radiation { - // z position of the foil to calculate transition radiation at - // leave at 0 for no virtual particle propagation - constexpr float_64 foilPosition = 0.0; - } - - } // end namespace parameters - - - //! example of a filter for the relativistic Lorentz factor gamma - struct GammaFilterFunctor - { - //! Gamma value above which the radiation is calculated - static constexpr float_X filterGamma = 5.0; - - template< typename T_Particle > - HDINLINE void operator()( T_Particle& particle ) + namespace radFormFactor_CIC_3D + { + } + namespace radFormFactor_TSC_3D + { + } + namespace radFormFactor_PCS_3D + { + } + namespace radFormFactor_CIC_1Dy + { + } + namespace radFormFactor_Gauss_spherical + { + } + namespace radFormFactor_Gauss_cell + { + } + namespace radFormFactor_incoherent + { + } + namespace radFormFactor_coherent + { + } + } // namespace radiation + + namespace transitionRadiation { - if( - picongpu::gamma( - particle[ picongpu::momentum_ ], - picongpu::traits::attribute::getMass( - particle[ picongpu::weighting_ ], - particle - ) - ) >= filterGamma - ) - particle[ picongpu::transitionRadiationMask_ ] = true; - } - }; - - /** filter to (de)select particles for the radiation calculation - * - * to activate the filter: - * - goto file `speciesDefinition.param` - * - add the attribute `transitionRadiationMask` to the particle species - */ - using GammaFilter = picongpu::particles::manipulators::generic::Free< - GammaFilterFunctor - >; - - /** Compute observation angles - * - * This function is used in the transition radiation plugin kernel to compute - * the observation directions given as a unit vector pointing - * towards a 'virtual' detector - * - * This default setup is an example of a 2D detector array. It computes - * observation directions for 2D virtual detector field - * with its center pointing toward the +y direction (for theta=0, phi=0) - * with observation angles ranging from - * theta = [angle_theta_start : angle_theta_end] - * phi = [angle_phi_start : angle_phi_end ] - * Every observation_id_extern index moves the phi angle from its - * start value toward its end value until the observation_id_extern - * reaches N_split. After that the theta angle moves further from its - * start value towards its end value while phi is reset to its start - * value. - * - * The unit vector pointing towards the observing virtual detector - * can be described using theta and phi by: - * x_value = sin(theta) * cos(phi) - * y_value = cos(theta) - * z_value = sin(theta) * sin(phi) - * These are the standard spherical coordinates. - * - * The example setup describes an detector array of - * 128X128 detectors ranging from 0 to pi for the azimuth angle - * theta and from 0 to 2 pi for the polar angle phi. - * - * @param observation_id_extern - * int index that identifies each block on the GPU - * to compute the observation direction - * - * @return unit vector pointing in observation direction - * type: float3_X - */ - HDINLINE float3_X observationDirection(const int observation_id_extern) - { - /* generate two indices from single block index */ - /** split distance of given index - * pseudo-code: - * index_a = index / split_distance - * index_b = index % split_distance - */ - /** get index for computing angle theta: */ - const int indexTheta = observation_id_extern / parameters::nPhi; - - /** step width angle theta, set it to 0 if nTheta = 1 */ - const picongpu::float_64 deltaTheta = ( parameters::nTheta > 1 ) ? - ( parameters::thetaMax - parameters::thetaMin ) / ( parameters::nTheta - 1.0 ) : 0.0; - - /** compute observation angles theta */ - const picongpu::float_64 theta = indexTheta * deltaTheta + parameters::thetaMin; - - /** get index for computing angle phi: */ - const int indexPhi = observation_id_extern % parameters::nPhi; - - /** step width angle phi, set it to 0 if nPhi = 1 */ - const picongpu::float_64 deltaPhi = ( parameters::nPhi > 1 ) ? - ( parameters::phiMax - parameters::phiMin ) / ( parameters::nPhi - 1.0 ) : 0.0; - - /** compute observation angles phi */ - const picongpu::float_64 phi = indexPhi * deltaPhi - parameters::phiMin; - - /* helper functions for efficient trigonometric calculations */ - picongpu::float_32 sinPhi; - picongpu::float_32 cosPhi; - picongpu::float_32 sinTheta; - picongpu::float_32 cosTheta; - math::sincos( precisionCast< picongpu::float_32 >( phi ), sinPhi, cosPhi ); - math::sincos( precisionCast< picongpu::float_32 >( theta ), sinTheta, cosTheta ); - /** compute observation unit vector */ - return float3_X( sinTheta * cosPhi , cosTheta, sinTheta * sinPhi ); - } - -} // namespace transitionRadiation -} // namespace plugins + namespace linearFrequencies + { + namespace SI + { + //! mimimum frequency of the linear frequency scale in units of [1/s] + constexpr float_64 omegaMin = 0.0; + //! maximum frequency of the linear frequency scale in units of [1/s] + constexpr float_64 omegaMax = 1.06e16; + } // namespace SI + + //! number of frequency values to compute in the linear frequency [unitless] + constexpr unsigned int nOmega = 512; + + } // namespace linearFrequencies + + namespace logFrequencies + { + namespace SI + { + //! mimimum frequency of the logarithmic frequency scale in units of [1/s] + constexpr float_64 omegaMin = 1.0e13; + //! maximum frequency of the logarithmic frequency scale in units of [1/s] + constexpr float_64 omegaMax = 1.0e17; + } // namespace SI + + //! number of frequency values to compute in the logarithmic frequency [unitless] + constexpr unsigned int nOmega = 256; + + } // namespace logFrequencies + + + namespace listFrequencies + { + //! path to text file with frequencies + constexpr char listLocation[] = "/path/to/frequency_list"; + //! number of frequency values to compute if frequencies are given in a file [unitless] + constexpr unsigned int nOmega = 512; + + } // namespace listFrequencies + + + /** selected mode of frequency scaling: + * + * options: + * - linearFrequencies + * - logFrequencies + * - listFrequencies + */ + namespace frequencies = logFrequencies; + + /////////////////////////////////////////////////// + + + /** correct treatment of coherent radiation from macro particles + * + * These formfactors are the same as in the radiation plugin! + * Choose different form factors in order to consider different particle shapes for radiation + * - ::picongpu::plugins::radiation::radFormFactor_CIC_3D ... CIC charge distribution + * - ::picongpu::plugins::radiation::radFormFactor_TSC_3D ... TSC charge distribution + * - ::picongpu::plugins::radiation::radFormFactor_PCS_3D ... PCS charge distribution + * - ::picongpu::plugins::radiation::radFormFactor_CIC_1Dy ... only CIC charge distribution in y + * - ::picongpu::plugins::radiation::radFormFactor_Gauss_spherical ... symmetric Gauss charge distribution + * - ::picongpu::plugins::radiation::radFormFactor_Gauss_cell ... Gauss charge distribution according to + * cell size + * - ::picongpu::plugins::radiation::radFormFactor_incoherent ... only incoherent radiation + * - ::picongpu::plugins::radiation::radFormFactor_coherent ... only coherent radiation + */ + namespace macroParticleFormFactor = ::picongpu::plugins::radiation::radFormFactor_Gauss_spherical; + + /////////////////////////////////////////////////////////// + + namespace parameters + { + // number of observation directions + constexpr unsigned int nPhi = 128; + constexpr unsigned int nTheta = 128; + constexpr unsigned int nObserver = nPhi * nTheta; + + // theta goes from 0 to pi + constexpr float_64 thetaMin = 0.0; + constexpr float_64 thetaMax = picongpu::PI; + + // phi goes from 0 to 2*pi + constexpr float_64 phiMin = 0.0; + constexpr float_64 phiMax = 2 * picongpu::PI; + + namespace SI + { + // z position of the foil to calculate transition radiation at + // leave at 0 for no virtual particle propagation + constexpr float_64 foilPosition = 0.0; + } // namespace SI + + } // end namespace parameters + + + //! example of a filter for the relativistic Lorentz factor gamma + struct GammaFilterFunctor + { + //! Gamma value above which the radiation is calculated + static constexpr float_X filterGamma = 5.0; + + template + HDINLINE void operator()(T_Particle& particle) + { + if(picongpu::gamma( + particle[picongpu::momentum_], + picongpu::traits::attribute::getMass(particle[picongpu::weighting_], particle)) + >= filterGamma) + particle[picongpu::transitionRadiationMask_] = true; + } + }; + + /** filter to (de)select particles for the radiation calculation + * + * to activate the filter: + * - goto file `speciesDefinition.param` + * - add the attribute `transitionRadiationMask` to the particle species + */ + using GammaFilter = picongpu::particles::manipulators::generic::Free; + + /** Compute observation angles + * + * This function is used in the transition radiation plugin kernel to compute + * the observation directions given as a unit vector pointing + * towards a 'virtual' detector + * + * This default setup is an example of a 2D detector array. It computes + * observation directions for 2D virtual detector field + * with its center pointing toward the +y direction (for theta=0, phi=0) + * with observation angles ranging from + * theta = [angle_theta_start : angle_theta_end] + * phi = [angle_phi_start : angle_phi_end ] + * Every observation_id_extern index moves the phi angle from its + * start value toward its end value until the observation_id_extern + * reaches N_split. After that the theta angle moves further from its + * start value towards its end value while phi is reset to its start + * value. + * + * The unit vector pointing towards the observing virtual detector + * can be described using theta and phi by: + * x_value = sin(theta) * cos(phi) + * y_value = cos(theta) + * z_value = sin(theta) * sin(phi) + * These are the standard spherical coordinates. + * + * The example setup describes an detector array of + * 128X128 detectors ranging from 0 to pi for the azimuth angle + * theta and from 0 to 2 pi for the polar angle phi. + * + * @param observation_id_extern + * int index that identifies each block on the GPU + * to compute the observation direction + * + * @return unit vector pointing in observation direction + * type: float3_X + */ + HDINLINE float3_X observationDirection(const int observation_id_extern) + { + /* generate two indices from single block index */ + /** split distance of given index + * pseudo-code: + * index_a = index / split_distance + * index_b = index % split_distance + */ + /** get index for computing angle theta: */ + const int indexTheta = observation_id_extern / parameters::nPhi; + + /** step width angle theta, set it to 0 if nTheta = 1 */ + const picongpu::float_64 deltaTheta = (parameters::nTheta > 1) + ? (parameters::thetaMax - parameters::thetaMin) / (parameters::nTheta - 1.0) + : 0.0; + + /** compute observation angles theta */ + const picongpu::float_64 theta = indexTheta * deltaTheta + parameters::thetaMin; + + /** get index for computing angle phi: */ + const int indexPhi = observation_id_extern % parameters::nPhi; + + /** step width angle phi, set it to 0 if nPhi = 1 */ + const picongpu::float_64 deltaPhi = (parameters::nPhi > 1) + ? (parameters::phiMax - parameters::phiMin) / (parameters::nPhi - 1.0) + : 0.0; + + /** compute observation angles phi */ + const picongpu::float_64 phi = indexPhi * deltaPhi - parameters::phiMin; + + /* helper functions for efficient trigonometric calculations */ + picongpu::float_32 sinPhi; + picongpu::float_32 cosPhi; + picongpu::float_32 sinTheta; + picongpu::float_32 cosTheta; + pmacc::math::sincos(precisionCast(phi), sinPhi, cosPhi); + pmacc::math::sincos(precisionCast(theta), sinTheta, cosTheta); + /** compute observation unit vector */ + return float3_X(sinTheta * cosPhi, cosTheta, sinTheta * sinPhi); + } + + } // namespace transitionRadiation + } // namespace plugins } // namespace picongpu diff --git a/share/picongpu/examples/WarmCopper/cmakeFlags b/share/picongpu/examples/WarmCopper/cmakeFlags index c8f3e2f1d0..e9d333d096 100755 --- a/share/picongpu/examples/WarmCopper/cmakeFlags +++ b/share/picongpu/examples/WarmCopper/cmakeFlags @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/WarmCopper/etc/picongpu/1.cfg b/share/picongpu/examples/WarmCopper/etc/picongpu/1.cfg index 0432cf0a3f..901e2042f6 100644 --- a/share/picongpu/examples/WarmCopper/etc/picongpu/1.cfg +++ b/share/picongpu/examples/WarmCopper/etc/picongpu/1.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl, Franz Poeschel # # This file is part of PIConGPU. # @@ -52,10 +52,15 @@ TBG_ehot_histogram="--ehot_energyHistogram.period 100 --ehot_energyHistogram.fil --ehot_energyHistogram.minEnergy 0 --ehot_energyHistogram.maxEnergy 250" # file I/O -TBG_hdf5="--hdf5.period 100 --hdf5.file simData" - -TBG_plugins="!TBG_eth_histogram !TBG_ehot_histogram \ - !TBG_hdf5" +TBG_openPMD="--openPMD.period 100 \ + --openPMD.file simData \ + --openPMD.ext bp \ + --checkpoint.period 100 \ + --checkpoint.backend openPMD" + +TBG_plugins="!TBG_eth_histogram \ + !TBG_ehot_histogram \ + !TBG_openPMD" ################################# diff --git a/share/picongpu/examples/WarmCopper/include/picongpu/param/density.param b/share/picongpu/examples/WarmCopper/include/picongpu/param/density.param index 77e5e6b040..190b88ce56 100644 --- a/share/picongpu/examples/WarmCopper/include/picongpu/param/density.param +++ b/share/picongpu/examples/WarmCopper/include/picongpu/param/density.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -27,22 +27,22 @@ namespace picongpu { -namespace SI -{ - /** Base density in particles per m^3 in the density profiles. - * - * This is often taken as reference maximum density in normalized profiles. - * Individual particle species can define a `densityRatio` flag relative - * to this value. - * - * unit: ELEMENTS/m^3 - */ - constexpr float_64 BASE_DENSITY_SI = 8.49e28; // copper ion density -} + namespace SI + { + /** Base density in particles per m^3 in the density profiles. + * + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. + * + * unit: ELEMENTS/m^3 + */ + constexpr float_64 BASE_DENSITY_SI = 8.49e28; // copper ion density + } // namespace SI -namespace densityProfiles -{ - /* definition of homogenous profile */ - using Homogenous = HomogenousImpl; -} -} + namespace densityProfiles + { + /* definition of homogenous profile */ + using Homogenous = HomogenousImpl; + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/examples/WarmCopper/include/picongpu/param/grid.param b/share/picongpu/examples/WarmCopper/include/picongpu/param/grid.param index 279dcb4912..b02e025651 100644 --- a/share/picongpu/examples/WarmCopper/include/picongpu/param/grid.param +++ b/share/picongpu/examples/WarmCopper/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -22,7 +22,6 @@ namespace picongpu { - namespace SI { /** Duration of one timestep @@ -63,21 +62,21 @@ namespace picongpu * in fields with perfect symmetry in Z. */ - } //namespace SI + } // namespace SI //! Defines the size of the absorbing zone (in cells) constexpr uint32_t ABSORBER_CELLS[3][2] = { - {32, 32}, /*x direction [negative,positive]*/ - {32, 32}, /*y direction [negative,positive]*/ - {32, 32} /*z direction [negative,positive]*/ - }; //unit: number of cells + {32, 32}, /*x direction [negative,positive]*/ + {32, 32}, /*y direction [negative,positive]*/ + {32, 32} /*z direction [negative,positive]*/ + }; // unit: number of cells //! Define the strength of the absorber for any direction constexpr float_X ABSORBER_STRENGTH[3][2] = { {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ - {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ - }; //unit: none + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; // unit: none /** When to move the co-moving window. * An initial pseudo particle, flying with the speed of light, @@ -96,4 +95,4 @@ namespace picongpu */ constexpr float_64 movePoint = 0.90; -} +} // namespace picongpu diff --git a/share/picongpu/examples/WarmCopper/include/picongpu/param/particle.param b/share/picongpu/examples/WarmCopper/include/picongpu/param/particle.param index f6495f508a..a3659cb23c 100644 --- a/share/picongpu/examples/WarmCopper/include/picongpu/param/particle.param +++ b/share/picongpu/examples/WarmCopper/include/picongpu/param/particle.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, * Richard Pausch * * This file is part of PIConGPU. @@ -29,81 +29,72 @@ namespace picongpu { - -namespace particles -{ - - /** a particle with a weighting below MIN_WEIGHTING will not - * be created / will be deleted - * unit: none - */ - constexpr float_X MIN_WEIGHTING = 10.0; - -namespace manipulators -{ - // define a drift in X equal to 200 keV for electrons - CONST_VECTOR(float_X, 3, DriftParam_direction, 1.0, 0.0, 0.0); - struct Drift200keVParam + namespace particles { - static constexpr float_64 gamma = 1.39139; - const DriftParam_direction_t direction; - }; - using Assign200keVDrift = unary::Drift< Drift200keVParam, nvidia::functors::Assign >; - - struct TemperatureParam - { - /** Initial temperature - * unit: keV + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted + * unit: none */ - static constexpr float_64 temperature = 0.1; - }; - using AddTemperature = unary::Temperature< TemperatureParam >; + constexpr float_X MIN_WEIGHTING = 10.0; - struct OnceIonizedImpl - { - template< typename T_Particle > - DINLINE void operator()( T_Particle& particle ) + namespace manipulators { - constexpr float_X ion1plus = - GetAtomicNumbers< T_Particle >::type::numberOfProtons - - 1._X; - - // set (Z - 1) bound electrons - particle[boundElectrons_] = ion1plus; - } - }; - // definition of SetDrift start - using OnceIonized = generic::Free< OnceIonizedImpl >; - -} // namespace manipulators - - -namespace startPosition -{ - - struct QuietParam2ppc - { - /** Count of particles per cell per direction at initial state - * unit: none + // define a drift in X equal to 200 keV for electrons + CONST_VECTOR(float_X, 3, DriftParam_direction, 1.0, 0.0, 0.0); + struct Drift200keVParam + { + static constexpr float_64 gamma = 1.39139; + const DriftParam_direction_t direction; + }; + using Assign200keVDrift = unary::Drift; + + struct TemperatureParam + { + /** Initial temperature + * unit: keV + */ + static constexpr float_64 temperature = 0.1; + }; + using AddTemperature = unary::Temperature; + + struct OnceIonizedImpl + { + template + DINLINE void operator()(T_Particle& particle) + { + constexpr float_X ion1plus = GetAtomicNumbers::type::numberOfProtons - 1._X; + + // set (Z - 1) bound electrons + particle[boundElectrons_] = ion1plus; + } + }; + // definition of SetDrift start + using OnceIonized = generic::Free; + + } // namespace manipulators + + + namespace startPosition + { + struct QuietParam2ppc + { + /** Count of particles per cell per direction at initial state + * unit: none + */ + using numParticlesPerDimension = typename mCT::shrinkTo, simDim>::type; + }; + + // definition of quiet particle start + using Quiet2ppc = QuietImpl; + + } // namespace startPosition + + /** During unit normalization, we assume this is a typical + * number of particles per cell for normalization of weighted + * particle attributes. */ - using numParticlesPerDimension = typename mCT::shrinkTo< - mCT::Int< 1, 2, 1 >, - simDim - >::type; - }; - - // definition of quiet particle start - using Quiet2ppc = QuietImpl< QuietParam2ppc >; - -} // namespace startPosition - - /** During unit normalization, we assume this is a typical - * number of particles per cell for normalization of weighted - * particle attributes. - */ - constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = mCT::volume< - startPosition::QuietParam2ppc::numParticlesPerDimension - >::type::value; + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL + = mCT::volume::type::value; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/WarmCopper/include/picongpu/param/speciesDefinition.param b/share/picongpu/examples/WarmCopper/include/picongpu/param/speciesDefinition.param index f15be239c2..e57e0791bb 100644 --- a/share/picongpu/examples/WarmCopper/include/picongpu/param/speciesDefinition.param +++ b/share/picongpu/examples/WarmCopper/include/picongpu/param/speciesDefinition.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz, Heiko Burau +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Heiko Burau * * This file is part of PIConGPU. * @@ -31,16 +31,10 @@ namespace picongpu { + /*########################### define particle attributes #####################*/ -/*########################### define particle attributes #####################*/ - -/** describe attributes of a particle*/ -using DefaultParticleAttributes = MakeSeq_t< - position< position_pic >, - momentum, - weighting, - particleId ->; + /** describe attributes of a particle*/ + using DefaultParticleAttributes = MakeSeq_t, momentum, weighting, particleId>; /** The default example keeps particles in place and does not create a current */ @@ -51,134 +45,101 @@ using DefaultParticleAttributes = MakeSeq_t< # define PARAM_ENABLE_CURRENT 0 #endif -/*########################### end particle attributes ########################*/ + /*########################### end particle attributes ########################*/ -/*########################### define species #################################*/ + /*########################### define species #################################*/ -/*--------------------------- photons -------------------------------------------*/ + /*--------------------------- photons -------------------------------------------*/ -value_identifier( float_X, MassRatioPhotons, 0.0 ); -value_identifier( float_X, ChargeRatioPhotons, 0.0 ); + value_identifier(float_X, MassRatioPhotons, 0.0); + value_identifier(float_X, ChargeRatioPhotons, 0.0); -using ParticleFlagsPhotons = MakeSeq_t< -#if( PARAM_ENABLE_PUSHER == 1 ) - particlePusher< particles::pusher::Photon >, + using ParticleFlagsPhotons = MakeSeq_t< +#if(PARAM_ENABLE_PUSHER == 1) + particlePusher, #endif - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, - massRatio< MassRatioPhotons >, - chargeRatio< ChargeRatioPhotons > ->; - -/* define species photons */ -using Photons = Particles< - PMACC_CSTRING( "ph" ), - ParticleFlagsPhotons, - DefaultParticleAttributes ->; - -/*--------------------------- electrons --------------------------------------*/ -/* thermal bulk electrons: 10, 100, 1000 eV - * and - * non-thermal "hot"/prompt electrons: 200 keV - */ + shape, + interpolation, + massRatio, + chargeRatio>; + + /* define species photons */ + using Photons = Particles; + + /*--------------------------- electrons --------------------------------------*/ + /* thermal bulk electrons: 10, 100, 1000 eV + * and + * non-thermal "hot"/prompt electrons: 200 keV + */ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + + /* ratio relative to BASE_DENSITY + * thermal "bulk": 1x ionized n_Cu + * non-thermal "hot"/prompt: 0.1% ne_bulk = 0.001 * n_Cu ~ 1e20 / cm3 + */ + value_identifier(float_X, DensityRatioBulkElectrons, 0.999); + value_identifier(float_X, DensityRatioPromptElectrons, 0.001); + + using ParticleFlagsElectrons = MakeSeq_t< +#if(PARAM_ENABLE_PUSHER == 1) + particlePusher, +#endif + shape, + interpolation, +#if(PARAM_ENABLE_CURRENT == 1) + current, +#endif + massRatio, + chargeRatio>; -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioElectrons, 1.0 ); -value_identifier( float_X, ChargeRatioElectrons, 1.0 ); + /* thermal bulk electrons */ + using BulkElectrons = Particles< + PMACC_CSTRING("eth"), + MakeSeq_t>, + DefaultParticleAttributes>; -/* ratio relative to BASE_DENSITY - * thermal "bulk": 1x ionized n_Cu - * non-thermal "hot"/prompt: 0.1% ne_bulk = 0.001 * n_Cu ~ 1e20 / cm3 - */ -value_identifier( float_X, DensityRatioBulkElectrons, 0.999 ); -value_identifier( float_X, DensityRatioPromptElectrons, 0.001 ); + /* non-thermal "hot"/prompt electrons */ + using PromptElectrons = Particles< + PMACC_CSTRING("ehot"), + MakeSeq_t>, + DefaultParticleAttributes>; -using ParticleFlagsElectrons = MakeSeq_t< -#if( PARAM_ENABLE_PUSHER == 1 ) - particlePusher< UsedParticlePusher >, -#endif - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, -#if( PARAM_ENABLE_CURRENT == 1 ) - current< UsedParticleCurrentSolver >, -#endif - massRatio< MassRatioElectrons >, - chargeRatio< ChargeRatioElectrons > ->; - -/* thermal bulk electrons */ -using BulkElectrons = Particles< - PMACC_CSTRING( "eth" ), - MakeSeq_t< - ParticleFlagsElectrons, - densityRatio< DensityRatioBulkElectrons > - >, - DefaultParticleAttributes ->; - -/* non-thermal "hot"/prompt electrons */ -using PromptElectrons = Particles< - PMACC_CSTRING( "ehot" ), - MakeSeq_t< - ParticleFlagsElectrons, - densityRatio< DensityRatioPromptElectrons > - >, - DefaultParticleAttributes ->; - -/*--------------------------- ions -------------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioCopper, 115840. ); -value_identifier( float_X, ChargeRatioCopper, -29.0 ); - -/* ratio relative to BASE_DENSITY */ -value_identifier( float_X, DensityRatioCopper, 1.0 ); - -using ParticleFlagsCopper = MakeSeq_t< -#if( PARAM_ENABLE_PUSHER == 1 ) - particlePusher< UsedParticlePusher >, + /*--------------------------- ions -------------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioCopper, 115840.); + value_identifier(float_X, ChargeRatioCopper, -29.0); + + /* ratio relative to BASE_DENSITY */ + value_identifier(float_X, DensityRatioCopper, 1.0); + + using ParticleFlagsCopper = MakeSeq_t< +#if(PARAM_ENABLE_PUSHER == 1) + particlePusher, #endif - shape< UsedParticleShape >, - interpolation< UsedField2Particle >, -#if( PARAM_ENABLE_CURRENT == 1 ) - current< UsedParticleCurrentSolver >, + shape, + interpolation, +#if(PARAM_ENABLE_CURRENT == 1) + current, #endif - massRatio< MassRatioCopper >, - chargeRatio< ChargeRatioCopper >, - densityRatio< DensityRatioCopper >, - atomicNumbers< ionization::atomicNumbers::Copper_t >, - // note: this method is not yet fully implemented - populationKinetics< - particles::flylite::NonLTE< - MakeSeq_t< - BulkElectrons, - PromptElectrons - >, - MakeSeq_t< Photons > - > - > ->; - -/* define species ions */ -using CopperIons = Particles< - PMACC_CSTRING( "Cu" ), - ParticleFlagsCopper, - MakeSeq_t< - DefaultParticleAttributes, - boundElectrons, - superconfig - > ->; - -/*########################### end species ####################################*/ - -using VectorAllSpecies = MakeSeq_t< - Photons, - BulkElectrons, - PromptElectrons, - CopperIons ->; - -} + massRatio, + chargeRatio, + densityRatio, + atomicNumbers, + // note: this method is not yet fully implemented + populationKinetics, MakeSeq_t>>>; + + /* define species ions */ + using CopperIons = Particles< + PMACC_CSTRING("Cu"), + ParticleFlagsCopper, + MakeSeq_t>; + + /*########################### end species ####################################*/ + + using VectorAllSpecies = MakeSeq_t; + +} // namespace picongpu diff --git a/share/picongpu/examples/WarmCopper/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/WarmCopper/include/picongpu/param/speciesInitialization.param index 02312f6426..ea6109fc67 100644 --- a/share/picongpu/examples/WarmCopper/include/picongpu/param/speciesInitialization.param +++ b/share/picongpu/examples/WarmCopper/include/picongpu/param/speciesInitialization.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,48 +33,25 @@ namespace picongpu { -namespace particles -{ - - /** InitPipeline defines in which order species are initialized - * - * the functors are called in order (from first to last functor) - */ - using InitPipeline = bmpl::vector< - // Generate Densities - CreateDensity< - densityProfiles::Homogenous, - startPosition::Quiet2ppc, - CopperIons - >, - ManipulateDerive< - manipulators::binary::DensityWeighting, - CopperIons, - BulkElectrons - >, - ManipulateDerive< - manipulators::binary::DensityWeighting, - CopperIons, - PromptElectrons - >, - // Set the Cu ions to Cu_1+ - Manipulate< - manipulators::OnceIonized, - CopperIons - >, - // Set initial temperature of bulk electrons - Manipulate< - manipulators::AddTemperature, - BulkElectrons - >, - /* Set initial drift (directed in this case) of delta-distributed 200 keV - * prompt electrons + namespace particles + { + /** InitPipeline defines in which order species are initialized + * + * the functors are called in order (from first to last functor) */ - Manipulate< - manipulators::Assign200keVDrift, - PromptElectrons - > - >; + using InitPipeline = bmpl::vector< + // Generate Densities + CreateDensity, + ManipulateDerive, + ManipulateDerive, + // Set the Cu ions to Cu_1+ + Manipulate, + // Set initial temperature of bulk electrons + Manipulate, + /* Set initial drift (directed in this case) of delta-distributed 200 keV + * prompt electrons + */ + Manipulate>; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/WeibelTransverse/etc/picongpu/4.cfg b/share/picongpu/examples/WeibelTransverse/etc/picongpu/4.cfg index 2cafc7a7f9..3b48e09712 100644 --- a/share/picongpu/examples/WeibelTransverse/etc/picongpu/4.cfg +++ b/share/picongpu/examples/WeibelTransverse/etc/picongpu/4.cfg @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Rene Widera, Axel Huebl +# Copyright 2013-2021 Rene Widera, Axel Huebl # # This file is part of PIConGPU. # diff --git a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/density.param b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/density.param index 9c45927d94..5ab7ed52c3 100644 --- a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/density.param +++ b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/density.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, * Richard Pausch * * This file is part of PIConGPU. @@ -25,22 +25,22 @@ namespace picongpu { -namespace SI -{ - /** Base density in particles per m^3 in the density profiles. - * - * This is often taken as reference maximum density in normalized profiles. - * Individual particle species can define a `densityRatio` flag relative - * to this value. - * - * unit: ELEMENTS/m^3 - */ - constexpr float_64 BASE_DENSITY_SI = 1.e25; -} + namespace SI + { + /** Base density in particles per m^3 in the density profiles. + * + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. + * + * unit: ELEMENTS/m^3 + */ + constexpr float_64 BASE_DENSITY_SI = 1.e25; + } // namespace SI -namespace densityProfiles -{ - /* definition of homogenous density profile */ - using Homogenous = HomogenousImpl; -} -} + namespace densityProfiles + { + /* definition of homogenous density profile */ + using Homogenous = HomogenousImpl; + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/grid.param b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/grid.param index 7cbc0a468d..dbd829621c 100644 --- a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/grid.param +++ b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/grid.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -18,12 +18,10 @@ */ - #pragma once namespace picongpu { - namespace SI { /** Duration of one timestep @@ -52,21 +50,21 @@ namespace picongpu * behave like the interaction of infinite "wire particles" * in fields with perfect symmetry in Z. */ - } //namespace SI + } // namespace SI - //! Defines the size of the absorbing zone (in cells) + //! Defines the size of the absorbing zone (in cells) constexpr uint32_t ABSORBER_CELLS[3][2] = { - {0, 0}, /*x direction [negative,positive]*/ - {0, 0}, /*y direction [negative,positive]*/ - {0, 0} /*z direction [negative,positive]*/ - }; //unit: number of cells + {0, 0}, /*x direction [negative,positive]*/ + {0, 0}, /*y direction [negative,positive]*/ + {0, 0} /*z direction [negative,positive]*/ + }; // unit: number of cells //! Define the strength of the absorber for any direction constexpr float_X ABSORBER_STRENGTH[3][2] = { {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ - {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ - }; //unit: none + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; // unit: none /** When to move the co-moving window. * An initial pseudo particle, flying with the speed of light, @@ -85,7 +83,4 @@ namespace picongpu */ constexpr float_64 movePoint = 0.90; -} - - - +} // namespace picongpu diff --git a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param index ec6c2d79ab..82b4a6cea8 100644 --- a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param +++ b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/memory.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -31,77 +31,87 @@ #include #include +#include + namespace picongpu { + /* We have to hold back 350MiB for gpu-internal operations: + * - random number generator + * - reduces + * - ... + */ + constexpr size_t reservedGpuMemorySize = 400 * 1024 * 1024; -/* We have to hold back 350MiB for gpu-internal operations: - * - random number generator - * - reduces - * - ... - */ -constexpr size_t reservedGpuMemorySize = 400 *1024*1024; + /* short namespace*/ + namespace mCT = pmacc::math::CT; + /** size of a superCell + * + * volume of a superCell must be <= 1024 + */ + using SuperCellSize = typename mCT::shrinkTo, simDim>::type; -/* short namespace*/ -namespace mCT = pmacc::math::CT; -/** size of a superCell - * - * volume of a superCell must be <= 1024 - */ -using SuperCellSize = typename mCT::shrinkTo< - mCT::Int< 8, 8, 4 >, - simDim ->::type; + /** define the object for mapping superCells to cells*/ + using MappingDesc = MappingDescription; -/** define the object for mapping superCells to cells*/ -using MappingDesc = MappingDescription< simDim, SuperCellSize >; + /** define the size of the core, border and guard area + * + * PIConGPU uses spatial domain-decomposition for parallelization + * over multiple devices with non-shared memory architecture. + * The global spatial domain is organized per device in three + * sections: the GUARD area contains copies of neighboring + * devices (also known as "halo"/"ghost"). + * The BORDER area is the outermost layer of cells of a device, + * equally to what neighboring devices see as GUARD area. + * The CORE area is the innermost area of a device. In union with + * the BORDER area it defines the "active" spatial domain on a device. + * + * GuardSize is defined in units of SuperCellSize per dimension. + */ + using GuardSize = typename mCT::shrinkTo, simDim>::type; -/** define the size of the core, border and guard area - * - * PIConGPU uses spatial domain-decomposition for parallelization - * over multiple devices with non-shared memory architecture. - * The global spatial domain is organized per device in three - * sections: the GUARD area contains copies of neighboring - * devices (also known as "halo"/"ghost"). - * The BORDER area is the outermost layer of cells of a device, - * equally to what neighboring devices see as GUARD area. - * The CORE area is the innermost area of a device. In union with - * the BORDER area it defines the "active" spatial domain on a device. - * - * GuardSize is defined in units of SuperCellSize per dimension. - */ -using GuardSize = typename mCT::shrinkTo< - mCT::Int< 1, 1, 1 >, - simDim ->::type; + /** bytes reserved for species exchange buffer + * + * This is the default configuration for species exchanges buffer sizes. + * The default exchange buffer sizes can be changed per species by adding + * the alias exchangeMemCfg with similar members like in DefaultExchangeMemCfg + * to its flag list. + */ + struct DefaultExchangeMemCfg + { + // memory used for a direction + static constexpr uint32_t BYTES_EXCHANGE_X = 4 * 1024 * 1024; // 4 MiB + static constexpr uint32_t BYTES_EXCHANGE_Y = 6 * 1024 * 1024; // 6 MiB + static constexpr uint32_t BYTES_EXCHANGE_Z = 64 * 1024 * 1024; // 64 MiB + static constexpr uint32_t BYTES_EDGES = 2 * 1024 * 1024; // 2 MiB + static constexpr uint32_t BYTES_CORNER = 512 * 1024; // 512 kiB -/** bytes reserved for species exchange buffer - * - * This is the default configuration for species exchanges buffer sizes. - * The default exchange buffer sizes can be changed per species by adding - * the alias exchangeMemCfg with similar members like in DefaultExchangeMemCfg - * to its flag list. - */ -struct DefaultExchangeMemCfg -{ - // memory used for a direction - static constexpr uint32_t BYTES_EXCHANGE_X = 4 * 1024 * 1024; // 4 MiB - static constexpr uint32_t BYTES_EXCHANGE_Y = 6 * 1024 * 1024; // 6 MiB - static constexpr uint32_t BYTES_EXCHANGE_Z = 64 * 1024 * 1024; // 64 MiB - static constexpr uint32_t BYTES_EDGES = 2 * 1024 * 1024; // 2 MiB - static constexpr uint32_t BYTES_CORNER = 512 * 1024; // 512 kiB -}; + /** Reference local domain size + * + * The size of the local domain for which the exchange sizes `BYTES_*` are configured for. + * The required size of each exchange will be calculated at runtime based on the local domain size and the + * reference size. The exchange size will be scaled only up and not down. Zero means that there is no reference + * domain size, exchanges will not be scaled. + */ + using REF_LOCAL_DOM_SIZE = mCT::Int<0, 0, 0>; + /** Scaling rate per direction. + * + * 1.0 means it scales linear with the ratio between the local domain size at runtime and the reference local + * domain size. + */ + const std::array DIR_SCALING_FACTOR = {{0.0, 0.0, 0.0}}; + }; -/** number of scalar fields that are reserved as temporary fields */ -constexpr uint32_t fieldTmpNumSlots = 1; + /** number of scalar fields that are reserved as temporary fields */ + constexpr uint32_t fieldTmpNumSlots = 1; -/** can `FieldTmp` gather neighbor information - * - * If `true` it is possible to call the method `asyncCommunicationGather()` - * to copy data from the border of neighboring GPU into the local guard. - * This is also known as building up a "ghost" or "halo" region in domain - * decomposition and only necessary for specific algorithms that extend - * the basic PIC cycle, e.g. with dependence on derived density or energy fields. - */ -constexpr bool fieldTmpSupportGatherCommunication = true; + /** can `FieldTmp` gather neighbor information + * + * If `true` it is possible to call the method `asyncCommunicationGather()` + * to copy data from the border of neighboring GPU into the local guard. + * This is also known as building up a "ghost" or "halo" region in domain + * decomposition and only necessary for specific algorithms that extend + * the basic PIC cycle, e.g. with dependence on derived density or energy fields. + */ + constexpr bool fieldTmpSupportGatherCommunication = true; } // namespace picongpu diff --git a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/particle.param b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/particle.param index 18eb6cd0b3..1675796115 100644 --- a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/particle.param +++ b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/particle.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Rene Widera, Benjamin Worpitz, +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, * Richard Pausch * * This file is part of PIConGPU. @@ -29,83 +29,75 @@ namespace picongpu { - -namespace particles -{ - - /** a particle with a weighting below MIN_WEIGHTING will not - * be created / will be deleted - * unit: none - */ - constexpr float_X MIN_WEIGHTING = 10.0; - -namespace manipulators -{ - - CONST_VECTOR( float_X, 3, DriftParamElectrons_direction, 0.0, 0.0, 1.0 ); - struct DriftParamElectrons + namespace particles { - /** Initial particle drift velocity for electrons and ions - * Examples: - * - No drift is equal to 1.0 + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted * unit: none */ - static constexpr float_64 gamma = 1.021; - const DriftParamElectrons_direction_t direction; - }; - using AssignZDriftElectrons = unary::Drift< DriftParamElectrons, nvidia::functors::Assign >; - - CONST_VECTOR( float_X, 3, DriftParamIons_direction, 0.0, 0.0, -1.0 ); - struct DriftParamIons - { - /** Initial particle drift velocity for electrons and ions - * Examples: - * - No drift is equal to 1.0 - * unit: none - */ - static constexpr float_64 gamma = 1.021; - const DriftParamIons_direction_t direction; - }; - // definition of SetDrift start - using AssignZDriftIons = unary::Drift< DriftParamIons, nvidia::functors::Assign >; - - struct TemperatureParam - { - /** Initial temperature - * unit: keV - */ - static constexpr float_64 temperature = 0.005; - }; - using AddTemperature = unary::Temperature< TemperatureParam >; - -} // namespace manipulators - -namespace startPosition -{ - - struct QuietParam4ppc - { - /** Count of particles per cell per direction at initial state - * unit: none + constexpr float_X MIN_WEIGHTING = 10.0; + + namespace manipulators + { + CONST_VECTOR(float_X, 3, DriftParamElectrons_direction, 0.0, 0.0, 1.0); + struct DriftParamElectrons + { + /** Initial particle drift velocity for electrons and ions + * Examples: + * - No drift is equal to 1.0 + * unit: none + */ + static constexpr float_64 gamma = 1.021; + const DriftParamElectrons_direction_t direction; + }; + using AssignZDriftElectrons = unary::Drift; + + CONST_VECTOR(float_X, 3, DriftParamIons_direction, 0.0, 0.0, -1.0); + struct DriftParamIons + { + /** Initial particle drift velocity for electrons and ions + * Examples: + * - No drift is equal to 1.0 + * unit: none + */ + static constexpr float_64 gamma = 1.021; + const DriftParamIons_direction_t direction; + }; + // definition of SetDrift start + using AssignZDriftIons = unary::Drift; + + struct TemperatureParam + { + /** Initial temperature + * unit: keV + */ + static constexpr float_64 temperature = 0.005; + }; + using AddTemperature = unary::Temperature; + + } // namespace manipulators + + namespace startPosition + { + struct QuietParam4ppc + { + /** Count of particles per cell per direction at initial state + * unit: none + */ + using numParticlesPerDimension = mCT::shrinkTo, simDim>::type; + }; + + // definition of quiet particle start + using Quiet4ppc = QuietImpl; + + } // namespace startPosition + + /** During unit normalization, we assume this is a typical + * number of particles per cell for normalization of weighted + * particle attributes. */ - using numParticlesPerDimension = mCT::shrinkTo< - mCT::Int< 2, 2, 1 >, - simDim - >::type; - }; - - // definition of quiet particle start - using Quiet4ppc = QuietImpl< QuietParam4ppc >; - -} // namespace startPosition - - /** During unit normalization, we assume this is a typical - * number of particles per cell for normalization of weighted - * particle attributes. - */ - constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = mCT::volume< - startPosition::QuietParam4ppc::numParticlesPerDimension - >::type::value; + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL + = mCT::volume::type::value; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/png.param b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/png.param index 3817d0df9e..a749261f55 100644 --- a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/png.param +++ b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/png.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Richard Pausch * * This file is part of PIConGPU. * @@ -57,25 +57,24 @@ namespace picongpu // specify color scales for each channel namespace preParticleDensCol = colorScales::red; - namespace preChannel1Col = colorScales::blue; - namespace preChannel2Col = colorScales::green; - namespace preChannel3Col = colorScales::none; + namespace preChannel1Col = colorScales::blue; + namespace preChannel2Col = colorScales::green; + namespace preChannel3Col = colorScales::none; /* png preview settings for each channel */ - DINLINE float_X preChannel1 ( const float3_X& field_B, const float3_X& field_E, const float3_X& field_J ) + DINLINE float_X preChannel1(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) { - return field_B.x()*field_B.x() + field_B.y()*field_B.y(); + return field_B.x() * field_B.x() + field_B.y() * field_B.y(); } - DINLINE float_X preChannel2 ( const float3_X& field_B, const float3_X& field_E, const float3_X& field_J ) + DINLINE float_X preChannel2(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) { - return field_E.x()*field_E.x() + field_E.y()*field_E.y(); + return field_E.x() * field_E.x() + field_E.y() * field_E.y(); } - DINLINE float_X preChannel3 ( const float3_X& field_B, const float3_X& field_E, const float3_X& field_J ) + DINLINE float_X preChannel3(const float3_X& field_B, const float3_X& field_E, const float3_X& field_J) { return 1.0_X; } - } -} - + } // namespace visPreview +} // namespace picongpu diff --git a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/speciesDefinition.param b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/speciesDefinition.param index 00758a9b9f..25290c0e7e 100644 --- a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/speciesDefinition.param +++ b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/speciesDefinition.param @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Benjamin Worpitz +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz * * This file is part of PIConGPU. * @@ -31,75 +31,57 @@ namespace picongpu { + /*########################### define particle attributes #####################*/ -/*########################### define particle attributes #####################*/ - -/** describe attributes of a particle */ -using DefaultParticleAttributes = MakeSeq_t< - position, - momentum, - weighting ->; - -/*########################### end particle attributes ########################*/ - -/*########################### define species #################################*/ - - -/*--------------------------- electrons --------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioElectrons, 1.0 ); -value_identifier( float_X, ChargeRatioElectrons, 1.0 ); - -using ParticleFlagsElectrons = MakeSeq_t< - particlePusher, - shape, - interpolation, - current, - massRatio, - chargeRatio ->; - -/* define species electrons */ -using PIC_Electrons = Particles< - PMACC_CSTRING( "e" ), - ParticleFlagsElectrons, - DefaultParticleAttributes ->; - -/*--------------------------- ions -------------------------------------------*/ - -/* ratio relative to BASE_CHARGE and BASE_MASS */ -value_identifier( float_X, MassRatioIons, 1.0 ); -value_identifier( float_X, ChargeRatioIons, -1.0 ); - -/* ratio relative to BASE_DENSITY */ -value_identifier( float_X, DensityRatioIons, 1.0 ); - -using ParticleFlagsIons = MakeSeq_t< - particlePusher, - shape, - interpolation, - current, - massRatio, - chargeRatio, - densityRatio, - atomicNumbers ->; - -/*define specie ions*/ -using PIC_Ions = Particles< - PMACC_CSTRING( "i" ), - ParticleFlagsIons, - DefaultParticleAttributes ->; - -/*########################### end species ####################################*/ - -using VectorAllSpecies = MakeSeq_t< - PIC_Electrons, - PIC_Ions ->; - -} //namespace picongpu + /** describe attributes of a particle */ + using DefaultParticleAttributes = MakeSeq_t, momentum, weighting>; + + /*########################### end particle attributes ########################*/ + + /*########################### define species #################################*/ + + + /*--------------------------- electrons --------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + + using ParticleFlagsElectrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio>; + + /* define species electrons */ + using PIC_Electrons = Particles; + + /*--------------------------- ions -------------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioIons, 1.0); + value_identifier(float_X, ChargeRatioIons, -1.0); + + /* ratio relative to BASE_DENSITY */ + value_identifier(float_X, DensityRatioIons, 1.0); + + using ParticleFlagsIons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio, + densityRatio, + atomicNumbers>; + + /*define specie ions*/ + using PIC_Ions = Particles; + + /*########################### end species ####################################*/ + + using VectorAllSpecies = MakeSeq_t; + +} // namespace picongpu diff --git a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/speciesInitialization.param b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/speciesInitialization.param index ea52788480..ca5ac89f0d 100644 --- a/share/picongpu/examples/WeibelTransverse/include/picongpu/param/speciesInitialization.param +++ b/share/picongpu/examples/WeibelTransverse/include/picongpu/param/speciesInitialization.param @@ -1,4 +1,4 @@ -/* Copyright 2015-2020 Rene Widera, Axel Huebl +/* Copyright 2015-2021 Rene Widera, Axel Huebl * * This file is part of PIConGPU. * @@ -33,41 +33,26 @@ namespace picongpu { -namespace particles -{ - /** InitPipeline define in which order species are initialized - * - * the functors are called in order (from first to last functor) - */ - using InitPipeline = bmpl::vector< - CreateDensity< - densityProfiles::Homogenous, - startPosition::Quiet4ppc, - PIC_Ions - >, - ManipulateDerive< - /* make sure in speciesDefinition.param that - * densityRatio * chargeRatio - * of electrons and ions is quasi neutral! - * alternatively, use manipulators::ProtonTimesWeighting - */ - manipulators::binary::DensityWeighting, - PIC_Ions, - PIC_Electrons - >, - Manipulate< - manipulators::AssignZDriftIons, - PIC_Ions - >, - Manipulate< - manipulators::AssignZDriftElectrons, - PIC_Electrons - >, - Manipulate< - manipulators::AddTemperature, - PIC_Electrons - > - >; + namespace particles + { + /** InitPipeline define in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< + CreateDensity, + ManipulateDerive< + /* make sure in speciesDefinition.param that + * densityRatio * chargeRatio + * of electrons and ions is quasi neutral! + * alternatively, use manipulators::ProtonTimesWeighting + */ + manipulators::binary::DensityWeighting, + PIC_Ions, + PIC_Electrons>, + Manipulate, + Manipulate, + Manipulate>; -} // namespace particles + } // namespace particles } // namespace picongpu diff --git a/share/picongpu/tests/XrayScattering/README.rst b/share/picongpu/tests/XrayScattering/README.rst new file mode 100644 index 0000000000..442a505f82 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/README.rst @@ -0,0 +1,7 @@ +XrayScattering: +=============== +This is a simulation with some simple density profiles (double slit, periodic grid, periodic stripes), no laser, no random species initialization. +It is meant as a functional test for the xrayScattering plugin. +Plugin output is validated by a comparision with an FFT result. + +.. sectionauthor:: Pawel Ordyna diff --git a/share/picongpu/tests/XrayScattering/cmakeFlags b/share/picongpu/tests/XrayScattering/cmakeFlags new file mode 100755 index 0000000000..3a1e4aa1e2 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/cmakeFlags @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# +# Copyright 2013-2021 Axel Huebl, Rene Widera, Pawel Ordyna +# +# This file is part of PIConGPU. +# +# PIConGPU is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PIConGPU is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PIConGPU. +# If not, see . +# + +# +# generic compile options +# + +################################################################################ +# add presets here +# - default: index 0 +# - start with zero index +# - increase by 1, no gaps + +flags[0]="" +flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_DIMENSION=DIM3'" +flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_PRECISION=precision64Bit'" +flags[3]="-DPARAM_OVERWRITES:LIST='-DPARAM_PRECISION=precision64Bit;-DPARAM_DIMENSION=DIM3'" +flags[4]="-DPARAM_OVERWRITES:LIST='-DPARAM_IONS=1'" +flags[5]="-DPARAM_OVERWRITES:LIST='-DPARAM_ANGLES=1;-DPARAM_PRECISION=precision64Bit;-DPARAM_DIMENSION=DIM3'" + +################################################################################ +# execution + +case "$1" in + -l) echo ${#flags[@]} + ;; + -ll) for f in "${flags[@]}"; do echo $f; done + ;; + *) echo -n ${flags[$1]} + ;; +esac diff --git a/share/picongpu/tests/XrayScattering/etc/picongpu/1.cfg b/share/picongpu/tests/XrayScattering/etc/picongpu/1.cfg new file mode 100644 index 0000000000..57ee9bbd42 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/etc/picongpu/1.cfg @@ -0,0 +1,86 @@ +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, +# Pawel Ordyna +# +# This file is part of PIConGPU. +# +# PIConGPU is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PIConGPU is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PIConGPU. +# If not, see . +# + +## +## This configuration file is used by PIConGPU's TBG tool to create a +## batch script for PIConGPU runs. For a detailed description of PIConGPU +## configuration files including all available variables, see +## +## docs/TBG_macros.cfg +## + + +################################# +## Section: Required Variables ## +################################# + + +TBG_wallTime="0:30:00" + +TBG_devices_x=1 +TBG_devices_y=1 +TBG_devices_z=1 + +TBG_gridSize="128 128 32" +TBG_steps="1" + +# leave TBG_movingWindow empty to disable moving window +TBG_movingWindow="" + + + +################################# +## Section: Optional Variables ## +################################# + +# file I/O with openPMD-HDF5 +TBG_openPMD="--openPMD.period 1 \ + --openPMD.file simData \ + --openPMD.source 'e_density' \ + --openPMD.ext h5" + +TBG_e_xrayScattering="--e_xrayScattering.period 1 \ + --e_xrayScattering.outputPeriod 1 \ + --e_xrayScattering.n_qx 128 --e_xrayScattering.n_qy 128 \ + --e_xrayScattering.qx_min -0.001 --e_xrayScattering.qx_max +0.001 \ + --e_xrayScattering.qy_min -0.001 --e_xrayScattering.qy_max +0.001 \ + --e_xrayScattering.memoryLayout distribute \ + --e_xrayScattering.ext h5" + +TBG_plugins="!TBG_e_xrayScattering !TBG_openPMD" + + +################################# +## Section: Program Parameters ## +################################# + +TBG_deviceDist="!TBG_devices_x !TBG_devices_y !TBG_devices_z" + +TBG_programParams="-d !TBG_deviceDist \ + -g !TBG_gridSize \ + -s !TBG_steps \ + !TBG_movingWindow \ + !TBG_plugins \ + --versionOnce" + +# TOTAL number of devices +TBG_tasks="$(( TBG_devices_x * TBG_devices_y * TBG_devices_z ))" + +"$TBG_cfgPath"/submitAction.sh diff --git a/share/picongpu/tests/XrayScattering/etc/picongpu/1_ions.cfg b/share/picongpu/tests/XrayScattering/etc/picongpu/1_ions.cfg new file mode 100644 index 0000000000..423dcd834e --- /dev/null +++ b/share/picongpu/tests/XrayScattering/etc/picongpu/1_ions.cfg @@ -0,0 +1,93 @@ +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, +# Pawel Ordyna +# +# This file is part of PIConGPU. +# +# PIConGPU is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PIConGPU is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PIConGPU. +# If not, see . +# + +## +## This configuration file is used by PIConGPU's TBG tool to create a +## batch script for PIConGPU runs. For a detailed description of PIConGPU +## configuration files including all available variables, see +## +## docs/TBG_macros.cfg +## + + +################################# +## Section: Required Variables ## +################################# + +TBG_wallTime="0:30:00" + +TBG_devices_x=1 +TBG_devices_y=1 +TBG_devices_z=1 + +TBG_gridSize="128 128 32" +TBG_steps="2" + +# leave TBG_movingWindow empty to disable moving window +TBG_movingWindow="" + + + +################################# +## Section: Optional Variables ## +################################# + +# file I/O with openPMD-HDF5 +TBG_openPMD="--openPMD.period 1 \ + --openPMD.file simData \ + --openPMD.source 'e_density' \ + --openPMD.ext h5" + +TBG_e_xrayScattering="--e_xrayScattering.period 1 \ + --e_xrayScattering.outputPeriod 1 \ + --e_xrayScattering.n_qx 128 --e_xrayScattering.n_qy 128 \ + --e_xrayScattering.qx_min -0.001 --e_xrayScattering.qx_max +0.001 \ + --e_xrayScattering.qy_min -0.001 --e_xrayScattering.qy_max +0.001 \ + --e_xrayScattering.memoryLayout distribute \ + --e_xrayScattering.ext h5" + +TBG_i_xrayScattering="--i_xrayScattering.period 1 \ + --i_xrayScattering.outputPeriod 1 \ + --i_xrayScattering.n_qx 120 --i_xrayScattering.n_qy 120 \ + --i_xrayScattering.qx_min -0.001 --i_xrayScattering.qx_max +0.001 \ + --i_xrayScattering.qy_min -0.001 --i_xrayScattering.qy_max +0.001 \ + --i_xrayScattering.memoryLayout distribute \ + --i_xrayScattering.ext h5" + +TBG_plugins="!TBG_e_xrayScattering !TBG_i_xrayScattering !TBG_openPMD" + + +################################# +## Section: Program Parameters ## +################################# + +TBG_deviceDist="!TBG_devices_x !TBG_devices_y !TBG_devices_z" + +TBG_programParams="-d !TBG_deviceDist \ + -g !TBG_gridSize \ + -s !TBG_steps \ + !TBG_movingWindow \ + !TBG_plugins \ + --versionOnce" + +# TOTAL number of devices +TBG_tasks="$(( TBG_devices_x * TBG_devices_y * TBG_devices_z ))" + +"$TBG_cfgPath"/submitAction.sh diff --git a/share/picongpu/tests/XrayScattering/etc/picongpu/1_mirror.cfg b/share/picongpu/tests/XrayScattering/etc/picongpu/1_mirror.cfg new file mode 100644 index 0000000000..8cafb10812 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/etc/picongpu/1_mirror.cfg @@ -0,0 +1,84 @@ +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, +# Pawel Ordyna +# +# This file is part of PIConGPU. +# +# PIConGPU is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PIConGPU is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PIConGPU. +# If not, see . +# + +## +## This configuration file is used by PIConGPU's TBG tool to create a +## batch script for PIConGPU runs. For a detailed description of PIConGPU +## configuration files including all available variables, see +## +## docs/TBG_macros.cfg +## + + +################################# +## Section: Required Variables ## +################################# + +TBG_wallTime="0:30:00" + +TBG_devices_x=1 +TBG_devices_y=1 +TBG_devices_z=1 + +TBG_gridSize="128 128 32" +TBG_steps="2" + +# leave TBG_movingWindow empty to disable moving window +TBG_movingWindow="" + + + +################################# +## Section: Optional Variables ## +################################# + +# file I/O with openPMD-HDF5 +TBG_openPMD="--openPMD.period 1 \ + --openPMD.file simData \ + --openPMD.source 'e_density' \ + --openPMD.ext h5" + +TBG_e_xrayScattering="--e_xrayScattering.period 1 \ + --e_xrayScattering.outputPeriod 1 \ + --e_xrayScattering.n_qx 128 --e_xrayScattering.n_qy 128 \ + --e_xrayScattering.qx_min -0.001 --e_xrayScattering.qx_max +0.001 \ + --e_xrayScattering.qy_min -0.001 --e_xrayScattering.qy_max +0.001 \ + --e_xrayScattering.memoryLayout mirror --e_xrayScattering.ext h5" + +TBG_plugins="!TBG_e_xrayScattering !TBG_openPMD" + + +################################# +## Section: Program Parameters ## +################################# + +TBG_deviceDist="!TBG_devices_x !TBG_devices_y !TBG_devices_z" + +TBG_programParams="-d !TBG_deviceDist \ + -g !TBG_gridSize \ + -s !TBG_steps \ + !TBG_movingWindow \ + !TBG_plugins \ + --versionOnce" + +# TOTAL number of devices +TBG_tasks="$(( TBG_devices_x * TBG_devices_y * TBG_devices_z ))" + +"$TBG_cfgPath"/submitAction.sh diff --git a/share/picongpu/tests/XrayScattering/etc/picongpu/2.cfg b/share/picongpu/tests/XrayScattering/etc/picongpu/2.cfg new file mode 100644 index 0000000000..9dc22878a1 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/etc/picongpu/2.cfg @@ -0,0 +1,85 @@ +# Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, +# Pawel Ordyna +# +# This file is part of PIConGPU. +# +# PIConGPU is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PIConGPU is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PIConGPU. +# If not, see . +# + +## +## This configuration file is used by PIConGPU's TBG tool to create a +## batch script for PIConGPU runs. For a detailed description of PIConGPU +## configuration files including all available variables, see +## +## docs/TBG_macros.cfg +## + + +################################# +## Section: Required Variables ## +################################# + +TBG_wallTime="0:30:00" + +TBG_devices_x=1 +TBG_devices_y=1 +TBG_devices_z=1 + +TBG_gridSize="128 128 32" +TBG_steps="1" + +# leave TBG_movingWindow empty to disable moving window +TBG_movingWindow="" + + + +################################# +## Section: Optional Variables ## +################################# + +# file I/O with openPMD-HDF5 +TBG_openPMD="--openPMD.period 1 \ + --openPMD.file simData \ + --openPMD.source 'e_density' \ + --openPMD.ext h5" + +TBG_e_xrayScattering="--e_xrayScattering.period 1 \ + --e_xrayScattering.outputPeriod 1 \ + --e_xrayScattering.n_qx 128 --e_xrayScattering.n_qy 128 \ + --e_xrayScattering.qx_min -0.001 --e_xrayScattering.qx_max +0.001 \ + --e_xrayScattering.qy_min -0.001 --e_xrayScattering.qy_max +0.001 \ + --e_xrayScattering.memoryLayout distribute \ + --e_xrayScattering.ext h5" + +TBG_plugins="!TBG_e_xrayScattering !TBG_openPMD" + + +################################# +## Section: Program Parameters ## +################################# + +TBG_deviceDist="!TBG_devices_x !TBG_devices_y !TBG_devices_z" + +TBG_programParams="-d !TBG_deviceDist \ + -g !TBG_gridSize \ + -s !TBG_steps \ + !TBG_movingWindow \ + !TBG_plugins \ + --versionOnce" + +# TOTAL number of devices +TBG_tasks="$(( TBG_devices_x * TBG_devices_y * TBG_devices_z ))" + +"$TBG_cfgPath"/submitAction.sh diff --git a/share/picongpu/tests/XrayScattering/include/picongpu/param/density.param b/share/picongpu/tests/XrayScattering/include/picongpu/param/density.param new file mode 100644 index 0000000000..eb7ed10e6f --- /dev/null +++ b/share/picongpu/tests/XrayScattering/include/picongpu/param/density.param @@ -0,0 +1,164 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, + * Richard Pausch, Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Configure existing or define new normalized density profiles here. + * During particle species creation in speciesInitialization.param, + * those profiles can be translated to spatial particle distributions. + */ + +#pragma once + +#include "picongpu/particles/densityProfiles/profiles.def" +/* preprocessor struct generator */ +#include + + +namespace picongpu +{ + namespace SI + { + /** Base density in particles per m^3 in the density profiles. + * + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. + * + * unit: ELEMENTS/m^3 + */ + constexpr float_64 BASE_DENSITY_SI = 1e25; + } // namespace SI + + namespace densityProfiles + { + struct DoubleSlitFunctor + { + /** This formula uses SI quantities only. + * The profile will be multiplied by BASE_DENSITY_SI. + * + * Two stripes, in x,y plane, with density 0, everywhere else density + * is 1. Translation invariant in z. + * + * @param position_SI total offset including all slides [meter] + * @param cellSize_SI cell sizes [meter] + * + * @return float_X density [normalized to 1.0] + */ + HDINLINE float_X operator()(const floatD_64& position_SI, const float3_64& cellSize_SI) + { + const float_64 x(position_SI.x()); + const float_64 y(position_SI.y()); + const uint64_t xCellId(uint64_t(position_SI.x() / cellSize_SI[0])); + const uint64_t yCellId(uint64_t(position_SI.y() / cellSize_SI[1])); + constexpr uint32_t cellsY = 128; + constexpr uint32_t cellsX = 128; + constexpr uint32_t w = 8; + constexpr uint32_t d = 30; + constexpr uint32_t total = 2 * w + d; + constexpr uint32_t start = (cellsX - total) / 2; + constexpr uint32_t slitHalfHeight = 45; + float_X s = 1.0_X; + if(yCellId > cellsY / 2 - slitHalfHeight && yCellId <= cellsY / 2 + slitHalfHeight) + { + if((xCellId > start - 1 && xCellId < start + w) + || (xCellId >= start + w + d && xCellId < start + w + d + w)) + { + s = 0.0; + } + } + s *= float_X(s >= 0.0); + return s; + } + }; + + template + struct PeriodicGrid2DFunctor + { + /** This formula uses SI quantities only. + * The profile will be multiplied by BASE_DENSITY_SI. + * density(x) = max{sgn(sin(2 * pi/ T * (x- T/4))), 0} + * + * identical for y + * + * @param position_SI total offset including all slides [meter] + * @param cellSize_SI cell sizes [meter] + * + * @tparam T period + * @return float_X density [normalized to 1.0] + */ + HDINLINE float_X operator()(const floatD_64& position_SI, const float3_64& cellSize_SI) + { + // get cell number + const uint64_t xCellId = static_cast(position_SI.x() / cellSize_SI.x()); + const uint64_t yCellId = static_cast(position_SI.y() / cellSize_SI.y()); + + float_64 dens = 1.0; + if(((xCellId + T / 4) / (T / 2)) % 2 && ((yCellId + T / 4) / (T / 2)) % 2) + { + dens = 0.0; + } + + // safety check: all parts of the function MUST be > 0 + dens *= float_64(dens >= 0.0); + return dens; + } + }; + + template + struct PeriodicStripesFunctor + { + /** This formula uses SI quantities only. + * The profile will be multiplied by BASE_DENSITY_SI. + * density(y) = max{sgn(sin(2 * pi/ T * (x- T/4))), 0} + * + * + * + * @param position_SI total offset including all slides [meter] + * @param cellSize_SI cell sizes [meter] + * + * @tparam T period + * @return float_X density [normalized to 1.0] + */ + HDINLINE float_X operator()(const floatD_64& position_SI, const float3_64& cellSize_SI) + { + // get cell number + const uint64_t yCellId = static_cast(position_SI.y() / cellSize_SI.y()); + + float_64 dens = 1.0; + if(((yCellId + T / 4) / (T / 2)) % 2) + { + dens = 0.0; + } + + // safety check: all parts of the function MUST be > 0 + dens *= float_64(dens >= 0.0); + return dens; + } + }; + // definition of free formula profiles + using PeriodicGrid2D = FreeFormulaImpl>; + using PeriodicStripes = FreeFormulaImpl>; + using DoubleSlit = FreeFormulaImpl; + // definition of homogeneous profile + using Homogenous = HomogenousImpl; + using UsedDensity = DoubleSlit; + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/tests/XrayScattering/include/picongpu/param/dimension.param b/share/picongpu/tests/XrayScattering/include/picongpu/param/dimension.param new file mode 100644 index 0000000000..eb9dcd9c52 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/include/picongpu/param/dimension.param @@ -0,0 +1,31 @@ +/* Copyright 2014-2021 Axel Huebl, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#ifndef PARAM_DIMENSION +# define PARAM_DIMENSION DIM2 +#endif + +#define SIMDIM PARAM_DIMENSION + +namespace picongpu +{ + constexpr uint32_t simDim = SIMDIM; +} // namespace picongpu diff --git a/share/picongpu/tests/XrayScattering/include/picongpu/param/fileOutput.param b/share/picongpu/tests/XrayScattering/include/picongpu/param/fileOutput.param new file mode 100644 index 0000000000..3f21f6faf0 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/include/picongpu/param/fileOutput.param @@ -0,0 +1,96 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, + * Benjamin Worpitz, Richard Pausch, Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include + +/* some forward declarations we need */ +#include "picongpu/fields/Fields.def" +#include "picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def" + +#include + + +namespace picongpu +{ + /** FieldTmp output (calculated at runtime) ******************************* + * + * Those operations derive scalar field quantities from particle species + * at runtime. Each value is mapped per cell. Some operations are identical + * up to a constant, so avoid writing those twice to save storage. + * + * you can choose any of these particle to grid projections: + * - Density: particle position + shape on the grid + * - BoundElectronDensity: density of bound electrons + * note: only makes sense for partially ionized ions + * - ChargeDensity: density * charge + * note: for species that do not change their charge state, this is + * the same as the density times a constant for the charge + * - Energy: sum of kinetic particle energy per cell with respect to shape + * - EnergyDensity: average kinetic particle energy per cell times the + * particle density + * note: this is the same as the sum of kinetic particle energy + * divided by a constant for the cell volume + * - MomentumComponent: ratio between a selected momentum component and + * the absolute momentum with respect to shape + * - LarmorPower: radiated Larmor power + * (species must contain the attribute `momentumPrev1`) + * + * for debugging: + * - MidCurrentDensityComponent: + * density * charge * velocity_component + * - Counter: counts point like particles per cell + * - MacroCounter: counts point like macro particles per cell + */ + namespace deriveField = particles::particleToGrid; + + /* ChargeDensity section */ + using ChargeDensity_Seq + = deriveField::CreateEligible_t; + + /* Density section */ + using Density_Seq = deriveField::CreateEligible_t; + + + /** FieldTmpSolvers groups all solvers that create data for FieldTmp ****** + * + * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size + */ + using FieldTmpSolvers = MakeSeq_t; + + + /** FileOutputFields: Groups all Fields that shall be dumped *************/ + + /** Possible native fields: FieldE, FieldB, FieldJ + */ + using NativeFileOutputFields = MakeSeq_t; + + using FileOutputFields = MakeSeq_t; + + + /** FileOutputParticles: Groups all Species that shall be dumped ********** + * + * hint: to disable particle output set to + * using FileOutputParticles = MakeSeq_t< >; + */ + using FileOutputParticles = VectorAllSpecies; + +} // namespace picongpu diff --git a/share/picongpu/tests/XrayScattering/include/picongpu/param/grid.param b/share/picongpu/tests/XrayScattering/include/picongpu/param/grid.param new file mode 100644 index 0000000000..2cbf2018de --- /dev/null +++ b/share/picongpu/tests/XrayScattering/include/picongpu/param/grid.param @@ -0,0 +1,105 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Definition of cell sizes and time step. Our cells are defining a regular, + * cartesian grid. Our explicit FDTD field solvers define an upper bound for + * the time step value in relation to the cell size for convergence. Make + * sure to resolve important wavelengths of your simulation, e.g. shortest + * plasma wavelength and central laser wavelength both spatially and + * temporarily. + * + * **Units in reduced dimensions** + * + * In 2D3V simulations, the CELL_DEPTH_SI (Z) cell length + * is still used for normalization of densities, etc.. + * + * A 2D3V simulation in a cartesian PIC simulation such as + * ours only changes the degrees of freedom in motion for + * (macro) particles and all (field) information in z + * travels instantaneous, making the 2D3V simulation + * behave like the interaction of infinite "wire particles" + * in fields with perfect symmetry in Z. + * + */ + +#pragma once + + +namespace picongpu +{ + namespace SI + { + /** equals X + * unit: meter */ + // multiple of PI ensures nice q-space limits in the FFT + constexpr float_64 CELL_WIDTH_SI = PI * 0.1e-6; + /** equals Y + * unit: meter */ + constexpr float_64 CELL_HEIGHT_SI = CELL_WIDTH_SI; + /** equals Z + * unit: meter */ + constexpr float_64 CELL_DEPTH_SI = CELL_WIDTH_SI; + + /** Duration of one timestep + * unit: seconds */ + // Works for both 2D and 3D. + constexpr float_64 DELTA_T_SI = CELL_WIDTH_SI / (1.734 * SPEED_OF_LIGHT_SI); + + } // namespace SI + + /** Defines the size of the absorbing zone (in cells) + * + * unit: none + */ + constexpr uint32_t ABSORBER_CELLS[3][2] = { + {32, 32}, /*x direction [negative,positive]*/ + {32, 32}, /*y direction [negative,positive]*/ + {32, 32} /*z direction [negative,positive]*/ + }; + + /** Define the strength of the absorber for any direction + * + * unit: none + */ + constexpr float_X ABSORBER_STRENGTH[3][2] = { + {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ + {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; + + /** When to move the co-moving window. + * An initial pseudo particle, flying with the speed of light, + * is fired at the begin of the simulation. + * When it reaches movePoint % of the absolute(*) simulation area, + * the co-moving window starts to move with the speed of light. + * + * (*) Note: beware, that there is one "hidden" row of gpus at the y-front, + * when you use the co-moving window + * 0.75 means only 75% of simulation area is used for real simulation + * + * Warning: this variable is deprecated, but currently still required for + * building purposes. Please keep the variable here. In case a moving window + * is enabled in your .cfg file, please set the move point using the + * 'windowMovePoint' parameter in that file, its default value is movePoint. + */ + constexpr float_64 movePoint = 0.9; + +} // namespace picongpu diff --git a/share/picongpu/tests/XrayScattering/include/picongpu/param/particle.param b/share/picongpu/tests/XrayScattering/include/picongpu/param/particle.param new file mode 100644 index 0000000000..692418a213 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/include/picongpu/param/particle.param @@ -0,0 +1,94 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, + * Richard Pausch, Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Configurations for particle manipulators. Set up and declare functors that + * can be used in speciesInitalization.param for particle species + * initialization and manipulation, such as temperature distributions, drifts, + * pre-ionization and in-cell position. + */ + +#pragma once + +#include "picongpu/particles/startPosition/functors.def" +#include "picongpu/particles/manipulators/manipulators.def" + +namespace picongpu +{ + namespace particles + { + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted + * + * unit: none */ + constexpr float_X MIN_WEIGHTING = 10.0; + + /** Number of maximum particles per cell during density profile evaluation. + * + * Determines the weighting of a macro particle and with it, the number of + * particles "sampling" dynamics in phase space. + */ + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL = 1u; + + namespace manipulators + { + // ionize ions once by removing one bound electron + struct OnceIonizedImpl + { + template + DINLINE void operator()(T_Particle& particle) + { + constexpr float_X protonNumber = GetAtomicNumbers::type::numberOfProtons; + particle[boundElectrons_] = protonNumber - 1.0_X; + } + }; + using OnceIonized = generic::Free; + + + } // namespace manipulators + + namespace startPosition + { + /** sit directly in the middle of the cell */ + CONST_VECTOR( + float_X, + 3, + InCellOffset, + /* each x, y, z in-cell position component in range [0.0, 1.0) */ + 0.5, + 0.5, + 0.5); + struct OnePositionParameter + { + /** Count of particles per cell at initial state + * + * unit: none */ + static constexpr uint32_t numParticlesPerCell = TYPICAL_PARTICLES_PER_CELL; + + const InCellOffset_t inCellOffset; + }; + + /** definition of one specific position for particle start */ + using OnePosition = OnePositionImpl; + + } // namespace startPosition + } // namespace particles +} // namespace picongpu diff --git a/share/picongpu/tests/XrayScattering/include/picongpu/param/precision.param b/share/picongpu/tests/XrayScattering/include/picongpu/param/precision.param new file mode 100644 index 0000000000..162c25da0d --- /dev/null +++ b/share/picongpu/tests/XrayScattering/include/picongpu/param/precision.param @@ -0,0 +1,59 @@ +/* Copyright 2013-2021 Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Define the precision of typically used floating point types in the + * simulation. + * + * PIConGPU normalizes input automatically, allowing to use single-precision by + * default for the core algorithms. Note that implementations of various + * algorithms (usually plugins or non-core components) might still decide to + * hard-code a different (mixed) precision for some critical operations. + */ + +#pragma once + + +namespace picongpu +{ +/*! Select a precision for the simulation data + * - precision32Bit : use 32Bit floating point numbers + * [significant digits 7 to 8] + * - precision64Bit : use 64Bit floating point numbers + * [significant digits 15 to 16] + */ +#ifndef PARAM_PRECISION +# define PARAM_PRECISION precision32Bit +#endif + namespace precisionPIConGPU = PARAM_PRECISION; + + /*! Select a precision special operations (can be different from simulation precision) + * - precisionPIConGPU : use precision which is selected on top (precisionPIConGPU) + * - precision32Bit : use 32Bit floating point numbers + * - precision64Bit : use 64Bit floating point numbers + */ + namespace precisionSqrt = precisionPIConGPU; + namespace precisionExp = precisionPIConGPU; + namespace precisionTrigonometric = precisionPIConGPU; + + +} // namespace picongpu + +#include "picongpu/unitless/precision.unitless" diff --git a/share/picongpu/tests/XrayScattering/include/picongpu/param/species.param b/share/picongpu/tests/XrayScattering/include/picongpu/param/species.param new file mode 100644 index 0000000000..75f7785cd9 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/include/picongpu/param/species.param @@ -0,0 +1,106 @@ +/* Copyright 2014-2021 Rene Widera, Richard Pausch, Annegret Roeszler, Klaus Steiniger, Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Particle shape, field to particle interpolation, current solver, and particle pusher + * can be declared here for usage in `speciesDefinition.param`. + * + * @see + * **MODELS / Hierarchy of Charge Assignment Schemes** + * in the online documentation for information on particle shapes. + * + * + * \attention + * The higher order shape names are redefined with release 0.6.0 in order to provide a consistent naming: + * * PQS is the name of the 3rd order assignment function (instead of PCS) + * * PCS is the name of the 4th order assignment function (instead of P4S) + * * P4S does not exist anymore + */ + +#pragma once + +#include "picongpu/particles/shapes.hpp" +#include "picongpu/algorithms/FieldToParticleInterpolationNative.hpp" +#include "picongpu/algorithms/FieldToParticleInterpolation.hpp" +#include "picongpu/algorithms/AssignedTrilinearInterpolation.hpp" +#include "picongpu/particles/flylite/NonLTE.def" +#include "picongpu/fields/currentDeposition/Solver.def" + + +namespace picongpu +{ + /** select macroparticle shape + * + * **WARNING** the shape names are redefined and diverge from PIConGPU versions before 0.6.0. + * + * - particles::shapes::CIC : Assignment function is a piecewise linear spline + * - particles::shapes::TSC : Assignment function is a piecewise quadratic spline + * - particles::shapes::PQS : Assignment function is a piecewise cubic spline + * - particles::shapes::PCS : Assignment function is a piecewise quartic spline + */ + using UsedParticleShape = particles::shapes::Counter; + + /** select interpolation method to be used for interpolation of grid-based field values to particle positions + */ + using UsedField2Particle = FieldToParticleInterpolation; + + /*! select current solver method + * - currentSolver::Esirkepov< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * - currentSolver::VillaBune< SHAPE, STRATEGY > : particle shapes - CIC (1st order) only + * - currentSolver::EmZ< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * + * For development purposes: + * - currentSolver::EsirkepovNative< SHAPE, STRATEGY > : generic version of currentSolverEsirkepov + * without optimization (~4x slower and needs more shared memory) + * + * STRATEGY (optional): + * - currentSolver::strategy::StridedCachedSupercells + * - currentSolver::strategy::StridedCachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::CachedSupercells + * - currentSolver::strategy::CachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::NonCachedSupercells + * - currentSolver::strategy::NonCachedSupercellsScaled with N >= 1 + */ + using UsedParticleCurrentSolver = currentSolver::Esirkepov; + + /** particle pusher configuration + * + * Defining a pusher is optional for particles + * + * - particles::pusher::HigueraCary : Higuera & Cary's relativistic pusher preserving both volume and ExB velocity + * - particles::pusher::Vay : Vay's relativistic pusher preserving ExB velocity + * - particles::pusher::Boris : Boris' relativistic pusher preserving volume + * - particles::pusher::ReducedLandauLifshitz : 4th order RungeKutta pusher + * with classical radiation reaction + * - particles::pusher::Composite : composite of two given pushers, + * switches between using one (or none) of those + * + * For diagnostics & modeling: ------------------------------------------------ + * - particles::pusher::Acceleration : Accelerate particles by applying a constant electric field + * - particles::pusher::Free : free propagation, ignore fields + * (= free stream model) + * - particles::pusher::Photon : propagate with c in direction of normalized mom. + * - particles::pusher::Probe : Probe particles that interpolate E & B + * For development purposes: -------------------------------------------------- + * - particles::pusher::Axel : a pusher developed at HZDR during 2011 (testing) + */ + using UsedParticlePusher = particles::pusher::Boris; + +} // namespace picongpu diff --git a/share/picongpu/tests/XrayScattering/include/picongpu/param/speciesDefinition.param b/share/picongpu/tests/XrayScattering/include/picongpu/param/speciesDefinition.param new file mode 100644 index 0000000000..ffd9cf762b --- /dev/null +++ b/share/picongpu/tests/XrayScattering/include/picongpu/param/speciesDefinition.param @@ -0,0 +1,120 @@ +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Heiko Burau, Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Define particle species. + * + * This file collects all previous declarations of base (reference) quantities + * and configured solvers for species and defines particle species. This + * includes "attributes" (lvalues to store with each species) and "flags" + * (rvalues & aliases for solvers to perform with the species for each timestep + * and ratios to base quantities). With those information, a `Particles` class + * is defined for each species and then collected in the list + * `VectorAllSpecies`. + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/particles/Particles.hpp" + +#include +#include +#include +#include +#include + +namespace picongpu +{ + /*########################### define particle attributes #####################*/ + + /** describe attributes of a particle*/ + using DefaultParticleAttributes = MakeSeq_t, momentum, weighting>; + + /*########################### end particle attributes ########################*/ + + /*########################### define species #################################*/ + + /*--------------------------- electrons --------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + + using ParticleFlagsElectrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio +#if(ENABLE_SYNCHROTRON_PHOTONS == 1) + , + synchrotronPhotons +#endif + >; + + /* define species electrons */ + using PIC_Electrons = Particles; + + /*--------------------------- ions -------------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioIons, 115837); + value_identifier(float_X, ChargeRatioIons, -29.0); + + /* ratio relative to BASE_DENSITY */ + value_identifier(float_X, DensityRatioIons, 1.0); + + using ParticleFlagsIons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio, + densityRatio, + atomicNumbers>; + + /* define species ions */ + using PIC_Ions + = Particles>; + +/*########################### end species ####################################*/ + +/*enable (1) or disable (0) ions*/ +#ifndef PARAM_IONS +# define PARAM_IONS 0 +#endif + + /** All known particle species of the simulation + * + * List all defined particle species from above in this list + * to make them available to the PIC algorithm. + */ + using VectorAllSpecies = MakeSeq_t< + PIC_Electrons +#if(PARAM_IONS == 1) + , + PIC_Ions +#endif + >; + +} // namespace picongpu diff --git a/share/picongpu/tests/XrayScattering/include/picongpu/param/speciesInitialization.param b/share/picongpu/tests/XrayScattering/include/picongpu/param/speciesInitialization.param new file mode 100644 index 0000000000..560b5f9cd6 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/include/picongpu/param/speciesInitialization.param @@ -0,0 +1,52 @@ +/* Copyright 2015-2021 Rene Widera, Axel Huebl + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Initialize particles inside particle species. This is the final step in + * setting up particles (defined in `speciesDefinition.param`) via density + * profiles (defined in `density.param`). One can then further derive particles + * from one species to another and manipulate attributes with "manipulators" + * and "filters" (defined in `particle.param` and `particleFilters.param`). + */ + +#pragma once + +#include "picongpu/particles/InitFunctors.hpp" + + +namespace picongpu +{ + namespace particles + { + /** InitPipeline defines in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< + CreateDensity +#if(PARAM_IONS == 1) + , + Derive, + Manipulate +#endif + >; + + } // namespace particles +} // namespace picongpu diff --git a/share/picongpu/tests/XrayScattering/include/picongpu/param/xrayScattering.param b/share/picongpu/tests/XrayScattering/include/picongpu/param/xrayScattering.param new file mode 100644 index 0000000000..27e35241b7 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/include/picongpu/param/xrayScattering.param @@ -0,0 +1,74 @@ +/* Copyright 2020-2021 Pawel Ordyna + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/plugins/xrayScattering/beam/Side.hpp" + +/* preprocessor struct generator */ +#include + +#ifndef PARAM_ANGLES +# define PARAM_ANGLES 0 +#endif + +namespace picongpu +{ + namespace plugins + { + namespace xrayScattering + { + namespace beam + { + using namespace picongpu::plugins::xrayScattering::beam; + /* Choose from: + * - ZSide + * - YSide + * - XSide + * - ZRSide + * - YRSide + * - XRSide + */ + using ProbingSide = ZSide; + + PMACC_STRUCT( + RotationParam, + (PMACC_C_VALUE( + float_X, + yawAngle, +#if PARAM_ANGLES == 1 + 20.0_X / 180.0_X * pmacc::math::Pi::value +#else + 0.0_X +#endif + ))( + PMACC_C_VALUE( + float_X, + pitchAngle, +#if PARAM_ANGLES == 1 + 42.0_X / 180.0_X * pmacc::math::Pi::value +#else + 0.0_X +#endif + ))); + } // namespace beam + } // namespace xrayScattering + } // namespace plugins +} // namespace picongpu diff --git a/share/picongpu/tests/XrayScattering/lib/python/picongpu/checks.py b/share/picongpu/tests/XrayScattering/lib/python/picongpu/checks.py new file mode 100644 index 0000000000..5a84fd9bd1 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/lib/python/picongpu/checks.py @@ -0,0 +1,81 @@ +from os.path import join +import numpy as np +import openpmd_api as api +from picongpu.plugins.data import XrayScatteringData +from is_close import is_close + + +def compare_with_fft(species, bound_electrons, rotation=None): + simulation_path = '../../../../' + + # Load pluginOutput + xray_scattering_data = XrayScatteringData(simulation_path, species, 'h5') + amplitude = xray_scattering_data.get(iteration=0) + del xray_scattering_data + + # Load density + internal_path = 'simOutput/h5' + file_name = 'simData_%T.h5' + path_output = join(simulation_path, internal_path, file_name) + series_output = api.Series(path_output, api.Access_Type.read_only) + i = series_output.iterations[0] + e_mesh = i.meshes['e_density'] + ed = e_mesh[api.Mesh_Record_Component.SCALAR] + electron_density = ed.load_chunk() + # ions have the same density in this setup + electron_density *= bound_electrons + series_output.flush() + + # Transform data + # (SideZ) + if electron_density.ndim == 3: + # zyx(openPMD) -> xyz(PIC) -> yxz(beam Side z) + electron_density = np.moveaxis(electron_density, (0, 1, 2), (2, 0, 1)) + # for dim == 2 nothing changes xy are swiped twice. + if rotation is not None: + electron_density = rotation(electron_density) + fft = np.fft.fftn(electron_density) + if electron_density.ndim == 3: + fft = fft[:, :, 0] # Take the z=0 slice. + fft = np.fft.fftshift(fft) + # Now some magic. Since x_beam = -1 * y_PIC (side z) we need to do the + # equivalent transformation q_x -> -q_x. The [1:,:] is necessary since the + # fft output has one extra, mismatching after reflection, frequency. It is + # left out of the comparision. + fft, amplitude = fft[1:, 1:], amplitude[1:, 1:] + fft = fft[::-1, :] + + fft = fft.astype(amplitude.dtype.type) + if amplitude.real.dtype.type is np.float32: + params = {"abs_tolerance": 1e-1, + "threshold": 1e-1, "rel_tolerance": 1e-1} + elif amplitude.real.dtype.type is np.float64: + params = {"abs_tolerance": 1e-8, + "threshold": 1e-8, "rel_tolerance": 1e-8} + else: + raise TypeError + + check_real = is_close(amplitude.real, fft.real, **params) + check_imag = is_close(amplitude.imag, fft.imag, **params) + return check_real and check_imag + + +def check_summation(): + simulation_path = '../../../../' + # Load pluginOutput + xray_scattering_data = XrayScatteringData(simulation_path, 'e', 'h5') + amplitude0 = xray_scattering_data.get(iteration=0) + amplitude1 = xray_scattering_data.get(iteration=1) + del xray_scattering_data + difference = amplitude1 - amplitude0 + if amplitude0.real.dtype.type is np.float32: + params = {"abs_tolerance": 1e-4, + "threshold": 1e-2, "rel_tolerance": 1e-3} + elif amplitude0.real.dtype.type is np.float64: + params = {"abs_tolerance": 1e-12, + "threshold": 1e-11, "rel_tolerance": 1e-11} + else: + raise TypeError + real_check = is_close(difference.real, amplitude0.real, **params) + imag_check = is_close(difference.imag, amplitude0.imag, **params) + return real_check and imag_check diff --git a/share/picongpu/tests/XrayScattering/lib/python/picongpu/is_close.py b/share/picongpu/tests/XrayScattering/lib/python/picongpu/is_close.py new file mode 100644 index 0000000000..001f5dc6b9 --- /dev/null +++ b/share/picongpu/tests/XrayScattering/lib/python/picongpu/is_close.py @@ -0,0 +1,10 @@ +import numpy as np + + +def is_close(input1, input2, abs_tolerance, threshold, rel_tolerance): + assert input1.dtype.type is input2.dtype.type + diff = np.abs(input1 - input2) + check0 = np.minimum(np.abs(input1), np.abs(input2)) < threshold + check1 = diff < abs_tolerance + check2 = diff < rel_tolerance * np.maximum(np.abs(input1), np.abs(input2)) + return np.all(np.logical_or(np.logical_and(check0, check1), check2)) diff --git a/share/picongpu/tests/XrayScattering/lib/python/picongpu/test_1.py b/share/picongpu/tests/XrayScattering/lib/python/picongpu/test_1.py new file mode 100644 index 0000000000..52c92acc7a --- /dev/null +++ b/share/picongpu/tests/XrayScattering/lib/python/picongpu/test_1.py @@ -0,0 +1,23 @@ +from checks import compare_with_fft +from checks import check_summation + + +def main(): + + electrons_check = compare_with_fft('e', 1) + ions_check = compare_with_fft('i', 28) + summation_check = check_summation() + if summation_check and electrons_check and ions_check: + print("All tests passed.") + else: + print("Some tests didn't pass.") + print("electrons test {})" + "".format(electrons_check)) + print("ion test {}" + "".format(ions_check)) + print("check summation test {}" + "".format(summation_check)) + + +if __name__ == '__main__': + main() diff --git a/share/picongpu/tests/XrayScattering/lib/python/picongpu/test_2.py b/share/picongpu/tests/XrayScattering/lib/python/picongpu/test_2.py new file mode 100644 index 0000000000..62ebc1531c --- /dev/null +++ b/share/picongpu/tests/XrayScattering/lib/python/picongpu/test_2.py @@ -0,0 +1,16 @@ +from checks import compare_with_fft + + +def main(): + + electrons_check = compare_with_fft('e', 1) + if electrons_check: + print("All tests passed.") + else: + print("Some tests didn't pass.") + print("electrons test {}" + "".format(electrons_check)) + + +if __name__ == '__main__': + main() diff --git a/share/picongpu/tests/compileCurrentSolver/README.rst b/share/picongpu/tests/compileCurrentSolver/README.rst new file mode 100644 index 0000000000..d1ffee0bd2 --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/README.rst @@ -0,0 +1,5 @@ +Compile Test for Selected Species Solver +======================================== + +This test compiles current solver for different partcle shapes. +Particle pusher are checked in the example SingleParticleTest. diff --git a/share/picongpu/tests/compileCurrentSolver/cmakeFlags b/share/picongpu/tests/compileCurrentSolver/cmakeFlags new file mode 100755 index 0000000000..c0bb0d6f6e --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/cmakeFlags @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# +# Copyright 2013-2021 Axel Huebl, Rene Widera +# +# This file is part of PIConGPU. +# +# PIConGPU is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PIConGPU is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PIConGPU. +# If not, see . +# + +# +# generic compile options +# + +################################################################################ +# add presets here +# - default: index 0 +# - start with zero index +# - increase by 1, no gaps + +flags[0]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=EmZ;-DPARAM_PARTICLESHAPE=PQS'" +flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=EmZ;-DPARAM_PARTICLESHAPE=PCS;-DPARAM_DIMENSION=DIM2'" +# Esirkepov and TSC is tested in most examples +flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=Esirkepov;-DPARAM_PARTICLESHAPE=CIC'" +flags[3]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=Esirkepov;-DPARAM_PARTICLESHAPE=TSC;-DPARAM_CURRENTINTERPOLATION=Binomial'" +flags[4]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=Esirkepov;-DPARAM_PARTICLESHAPE=PQS;-DPARAM_DIMENSION=DIM2'" +flags[5]="-DPARAM_OVERWRITES:LIST='-DPARAM_CURRENTSOLVER=VillaBune<>;-DPARAM_PARTICLESHAPE=CIC'" + + +################################################################################ +# execution + +case "$1" in + -l) echo ${#flags[@]} + ;; + -ll) for f in "${flags[@]}"; do echo $f; done + ;; + *) echo -n ${flags[$1]} + ;; +esac diff --git a/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/density.param b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/density.param new file mode 100644 index 0000000000..bed7ea6308 --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/density.param @@ -0,0 +1,46 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, + * Richard Pausch + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/particles/densityProfiles/profiles.def" + + +namespace picongpu +{ + namespace SI + { + /** Base density in particles per m^3 in the density profiles. + * + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. + * + * unit: ELEMENTS/m^3 + */ + constexpr float_64 BASE_DENSITY_SI = 1.e25; + } // namespace SI + + namespace densityProfiles + { + /* definition of homogenous profile */ + using Homogenous = HomogenousImpl; + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/dimension.param b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/dimension.param new file mode 100644 index 0000000000..9cda9d9a01 --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/dimension.param @@ -0,0 +1,31 @@ +/* Copyright 2014-2021 Axel Huebl, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#ifndef PARAM_DIMENSION +# define PARAM_DIMENSION DIM3 +#endif + +#define SIMDIM PARAM_DIMENSION + +namespace picongpu +{ + constexpr uint32_t simDim = SIMDIM; +} // namespace picongpu diff --git a/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/fieldSolver.param b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/fieldSolver.param new file mode 100644 index 0000000000..c082c8add3 --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/fieldSolver.param @@ -0,0 +1,85 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Sergei Bastrakov, Klaus Steiniger + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Configure the field solver. + * + * Select the numerical Maxwell solver (e.g. Yee's method). + * + * Also allows to configure ad hoc mitigations for high frequency + * noise in some setups via current smoothing. + * + * \attention + * Currently, the laser initialization in PIConGPU is implemented to work with the standard Yee solver. + * Using a solver of higher order will result in a slightly increased laser amplitude and energy than expected. + * + */ + +#pragma once + +#include "picongpu/fields/MaxwellSolver/Solvers.def" +#include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" + + +namespace picongpu +{ + namespace fields + { + /** Current Interpolation + * + * CurrentInterpolation is used to set a method performing the + * interpolate/assign operation from the generated currents of particle + * species to the electro-magnetic fields. + * + * Allowed values are: + * - None: + * - default for staggered grids/Yee-scheme + * - updates E + * - Binomial: 2nd order Binomial filter + * - smooths the current before assignment in staggered grid + * - updates E & breaks local charge conservation slightly + */ +#ifndef PARAM_CURRENTINTERPOLATION +# define PARAM_CURRENTINTERPOLATION None +#endif + using CurrentInterpolation = currentInterpolation::PARAM_CURRENTINTERPOLATION; + + /** FieldSolver + * + * Field Solver Selection: + * - Yee< CurrentInterpolation > : Standard Yee solver approximating derivatives with respect to time and + * space by second order finite differences. + * - YeePML< CurrentInterpolation >: Standard Yee solver using Perfectly Matched Layer Absorbing Boundary + * Conditions (PML) + * - Lehe< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction + * - LehePML< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction + * using Perfectly Matched Layer Absorbing Boundary Conditions (PML) + * - ArbitraryOrderFDTD< 4, CurrentInterpolation >: Solver using 4 neighbors to each direction to approximate + * *spatial* derivatives by finite differences. The number of neighbors can be changed from 4 to any positive, + * integer number. The order of the solver will be twice the number of neighbors in each direction. Yee's + * method is a special case of this using one neighbor to each direction. + * - ArbitraryOrderFDTDPML< 4, CurrentInterpolation >: ArbitraryOrderFDTD solver using Perfectly Matched Layer + * Absorbing Boundary Conditions (PML) + * - None< CurrentInterpolation >: disable the vacuum update of E and B + */ + using Solver = maxwellSolver::Yee; + + } // namespace fields +} // namespace picongpu diff --git a/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/fileOutput.param b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/fileOutput.param new file mode 100644 index 0000000000..18a50ebfa6 --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/fileOutput.param @@ -0,0 +1,56 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, + * Benjamin Worpitz, Richard Pausch + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include + +/* some forward declarations we need */ +#include "picongpu/fields/Fields.def" +#include "picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def" + +#include + + +namespace picongpu +{ + /** FieldTmpSolvers groups all solvers that create data for FieldTmp ****** + * + * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size + */ + using FieldTmpSolvers = MakeSeq_t<>; + + /** FileOutputFields: Groups all Fields that shall be dumped *************/ + + /** Possible native fields: FieldE, FieldB, FieldJ + */ + using NativeFileOutputFields = MakeSeq_t<>; + + using FileOutputFields = MakeSeq_t<>; + + + /** FileOutputParticles: Groups all Species that shall be dumped ********** + * + * hint: to disable particle output set to + * using FileOutputParticles = MakeSeq_t< >; + */ + using FileOutputParticles = MakeSeq_t<>; + +} // namespace picongpu diff --git a/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/isaac.param b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/isaac.param new file mode 100644 index 0000000000..af98f960f6 --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/isaac.param @@ -0,0 +1,57 @@ +/* Copyright 2016-2021 Alexander Matthes + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Definition which native fields and density fields of particles will be + * visualizable with ISAAC. ISAAC is an in-situ visualization library with which + * the PIC simulation can be observed while it is running avoiding the time + * consuming writing and reading of simulation data for the classical post + * processing of data. + * + * ISAAC can directly visualize natives fields like the E or B field, but + * density fields of particles need to be calculated from PIConGPU on the fly + * which slightly increases the runtime and the memory consumption. Every + * particle density field will reduce the amount of memory left for PIConGPUs + * particles and fields. + * + * To get best performance, ISAAC defines an exponential amount of different + * visualization kernels for every combination of (at runtime) activated + * fields. So furthermore a lot of fields will increase the compilation time. + * + */ + +#pragma once + +namespace picongpu +{ + namespace isaacP + { + /** Intermediate list of native particle species of PIConGPU which shall be + * visualized. */ + using Particle_Seq = MakeSeq_t<>; + + + /** Compile time sequence of all fields which shall be visualized. Basically + * the join of Native_Seq and Density_Seq. */ + using Fields_Seq = MakeSeq_t<>; + + + } // namespace isaacP +} // namespace picongpu diff --git a/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/particle.param b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/particle.param new file mode 100644 index 0000000000..792e7acad0 --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/particle.param @@ -0,0 +1,85 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, + * Richard Pausch + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/particles/startPosition/functors.def" +#include "picongpu/particles/manipulators/manipulators.def" +#include "picongpu/particles/filter/filter.def" + +#include + +namespace picongpu +{ + namespace particles + { + namespace startPosition + { + struct QuietParam25ppc + { + /** Count of particles per cell per direction at initial state + * unit: none + */ + using numParticlesPerDimension = typename mCT::shrinkTo, simDim>::type; + }; + using Quiet25ppc = QuietImpl; + + } // namespace startPosition + + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted + * unit: none + */ + constexpr float_X MIN_WEIGHTING = 10.0; + + /** During unit normalization, we assume this is a typical + * number of particles per cell for normalization of weighted + * particle attributes. + */ + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL + = mCT::volume::type::value; + + namespace manipulators + { + CONST_VECTOR(float_X, 3, DriftParamPositive_direction, 1.0, 0.0, 0.0); + struct DriftParamPositive + { + /** Initial particle drift velocity for electrons and ions + * Examples: + * - No drift is equal to 1.0 + * unit: none + */ + static constexpr float_64 gamma = 1.021; + const DriftParamPositive_direction_t direction; + }; + using AssignXDriftPositive = unary::Drift; + + struct TemperatureParam + { + /* Initial temperature + * unit: keV + */ + static constexpr float_64 temperature = 0.0005; + }; + using AddTemperature = unary::Temperature; + + } // namespace manipulators + } // namespace particles +} // namespace picongpu diff --git a/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/particleFilters.param b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/particleFilters.param new file mode 100644 index 0000000000..5d6ad5c91b --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/particleFilters.param @@ -0,0 +1,83 @@ +/* Copyright 2013-2021 Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * A common task in both modeling and in situ processing (output) is the + * selection of particles of a particle species by attributes. Users can + * define such selections as particle filters in this file. + * + * Particle filters are simple mappings assigning each particle of a species + * either `true` or `false` (ignore / filter out). + * + * All active filters need to be listed in `AllParticleFilters`. They are then + * combined with `VectorAllSpecies` at compile-time, e.g. for plugins. + */ + +#pragma once + +#include "picongpu/particles/filter/filter.def" +#include "picongpu/particles/traits/SpeciesEligibleForSolver.hpp" + +#include +#include + + +namespace picongpu +{ + namespace particles + { + namespace filter + { + struct IfRelativeGlobalPositionParamLowQuarterPosition + { + /* lowerBound is included in the range */ + static constexpr float_X lowerBound = 0.0; + /* upperBound is excluded in the range */ + static constexpr float_X upperBound = 0.25; + /* dimension for the filter + * x = 0; y= 1; z = 2 + */ + static constexpr uint32_t dimension = 1u; + + // filter name + static constexpr char const* name = "lowerQuarterYPosition"; + }; + + using LowerQuarterYPosition + = filter::RelativeGlobalDomainPosition; + + /** Plugins: collection of all available particle filters + * + * Create a list of all filters here that you want to use in plugins. + * + * Note: filter All is defined in picongpu/particles/filter/filter.def + */ + using AllParticleFilters = MakeSeq_t; + + } // namespace filter + + namespace traits + { + /* if needed for generic "free" filters, + * place `SpeciesEligibleForSolver` traits for filters here + */ + } // namespace traits + } // namespace particles +} // namespace picongpu diff --git a/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/precision.param b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/precision.param new file mode 100644 index 0000000000..162c25da0d --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/precision.param @@ -0,0 +1,59 @@ +/* Copyright 2013-2021 Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Define the precision of typically used floating point types in the + * simulation. + * + * PIConGPU normalizes input automatically, allowing to use single-precision by + * default for the core algorithms. Note that implementations of various + * algorithms (usually plugins or non-core components) might still decide to + * hard-code a different (mixed) precision for some critical operations. + */ + +#pragma once + + +namespace picongpu +{ +/*! Select a precision for the simulation data + * - precision32Bit : use 32Bit floating point numbers + * [significant digits 7 to 8] + * - precision64Bit : use 64Bit floating point numbers + * [significant digits 15 to 16] + */ +#ifndef PARAM_PRECISION +# define PARAM_PRECISION precision32Bit +#endif + namespace precisionPIConGPU = PARAM_PRECISION; + + /*! Select a precision special operations (can be different from simulation precision) + * - precisionPIConGPU : use precision which is selected on top (precisionPIConGPU) + * - precision32Bit : use 32Bit floating point numbers + * - precision64Bit : use 64Bit floating point numbers + */ + namespace precisionSqrt = precisionPIConGPU; + namespace precisionExp = precisionPIConGPU; + namespace precisionTrigonometric = precisionPIConGPU; + + +} // namespace picongpu + +#include "picongpu/unitless/precision.unitless" diff --git a/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/species.param b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/species.param new file mode 100644 index 0000000000..c25d0d838a --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/species.param @@ -0,0 +1,112 @@ +/* Copyright 2014-2021 Rene Widera, Richard Pausch, Annegret Roeszler, Klaus Steiniger + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Particle shape, field to particle interpolation, current solver, and particle pusher + * can be declared here for usage in `speciesDefinition.param`. + * + * @see + * **MODELS / Hierarchy of Charge Assignment Schemes** + * in the online documentation for information on particle shapes. + * + * + * \attention + * The higher order shape names are redefined with release 0.6.0 in order to provide a consistent naming: + * * PQS is the name of the 3rd order assignment function (instead of PCS) + * * PCS is the name of the 4th order assignment function (instead of P4S) + * * P4S does not exist anymore + */ + +#pragma once + +#include "picongpu/particles/shapes.hpp" +#include "picongpu/algorithms/FieldToParticleInterpolationNative.hpp" +#include "picongpu/algorithms/FieldToParticleInterpolation.hpp" +#include "picongpu/algorithms/AssignedTrilinearInterpolation.hpp" +#include "picongpu/particles/flylite/NonLTE.def" +#include "picongpu/fields/currentDeposition/Solver.def" + + +namespace picongpu +{ +/** select macroparticle shape + * + * **WARNING** the shape names are redefined and diverge from PIConGPU versions before 0.6.0. + * + * - particles::shapes::CIC : Assignment function is a piecewise linear spline + * - particles::shapes::TSC : Assignment function is a piecewise quadratic spline + * - particles::shapes::PQS : Assignment function is a piecewise cubic spline + * - particles::shapes::PCS : Assignment function is a piecewise quartic spline + */ +#ifndef PARAM_PARTICLESHAPE +# define PARAM_PARTICLESHAPE TSC +#endif + using UsedParticleShape = particles::shapes::PARAM_PARTICLESHAPE; + + /** select interpolation method to be used for interpolation of grid-based field values to particle positions + */ + using UsedField2Particle = FieldToParticleInterpolation; + + /*! select current solver method + * - currentSolver::Esirkepov< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * - currentSolver::VillaBune< SHAPE, STRATEGY > : particle shapes - CIC (1st order) only + * - currentSolver::EmZ< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * + * For development purposes: + * - currentSolver::EsirkepovNative< SHAPE, STRATEGY > : generic version of currentSolverEsirkepov + * without optimization (~4x slower and needs more shared memory) + * + * STRATEGY (optional): + * - currentSolver::strategy::StridedCachedSupercells + * - currentSolver::strategy::StridedCachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::CachedSupercells + * - currentSolver::strategy::CachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::NonCachedSupercells + * - currentSolver::strategy::NonCachedSupercellsScaled with N >= 1 + */ +#ifndef PARAM_CURRENTSOLVER +# define PARAM_CURRENTSOLVER Esirkepov +#endif + using UsedParticleCurrentSolver = currentSolver::PARAM_CURRENTSOLVER; + + /** particle pusher configuration + * + * Defining a pusher is optional for particles + * + * - particles::pusher::HigueraCary : Higuera & Cary's relativistic pusher preserving both volume and ExB velocity + * - particles::pusher::Vay : Vay's relativistic pusher preserving ExB velocity + * - particles::pusher::Boris : Boris' relativistic pusher preserving volume + * - particles::pusher::ReducedLandauLifshitz : 4th order RungeKutta pusher + * with classical radiation reaction + * - particles::pusher::Composite : composite of two given pushers, + * switches between using one (or none) of those + * + * For diagnostics & modeling: ------------------------------------------------ + * - particles::pusher::Acceleration : Accelerate particles by applying a constant electric field + * - particles::pusher::Free : free propagation, ignore fields + * (= free stream model) + * - particles::pusher::Photon : propagate with c in direction of normalized mom. + * - particles::pusher::Probe : Probe particles that interpolate E & B + * For development purposes: -------------------------------------------------- + * - particles::pusher::Axel : a pusher developed at HZDR during 2011 (testing) + */ + using UsedParticlePusher = particles::pusher::Boris; + +} // namespace picongpu diff --git a/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/speciesDefinition.param b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/speciesDefinition.param new file mode 100644 index 0000000000..81954926d5 --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/speciesDefinition.param @@ -0,0 +1,83 @@ +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Heiko Burau + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/particles/Particles.hpp" + +#include +#include +#include +#include +#include + + +#ifndef PARAM_RADIATION +/* disable radiation calculation */ +# define PARAM_RADIATION 0 +#endif + + +namespace picongpu +{ + /*########################### define particle attributes #####################*/ + + /** describe attributes of a particle*/ + using DefaultParticleAttributes = MakeSeq_t< + position, + momentum, + weighting +#if(PARAM_RADIATION == 1) + , + momentumPrev1 +#endif + >; + + /*########################### end particle attributes ########################*/ + + /*########################### define species #################################*/ + + /*--------------------------- electrons --------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + + using ParticleFlagsElectrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio>; + + /* define species electrons */ + using PIC_Electrons = Particles; + + /*########################### end species ####################################*/ + + /** All known particle species of the simulation + * + * List all defined particle species from above in this list + * to make them available to the PIC algorithm. + */ + using VectorAllSpecies = MakeSeq_t; + +} // namespace picongpu diff --git a/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/speciesInitialization.param b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/speciesInitialization.param new file mode 100644 index 0000000000..065fe94be8 --- /dev/null +++ b/share/picongpu/tests/compileCurrentSolver/include/picongpu/param/speciesInitialization.param @@ -0,0 +1,48 @@ +/* Copyright 2015-2021 Rene Widera, Axel Huebl + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Initialize particles inside particle species. This is the final step in + * setting up particles (defined in `speciesDefinition.param`) via density + * profiles (defined in `density.param`). One can then further derive particles + * from one species to another and manipulate attributes with "manipulators" + * and "filters" (defined in `particle.param` and `particleFilters.param`). + */ + +#pragma once + +#include "picongpu/particles/InitFunctors.hpp" + + +namespace picongpu +{ + namespace particles + { + /** InitPipeline define in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< + CreateDensity, + Manipulate, + Manipulate>; + + } // namespace particles +} // namespace picongpu diff --git a/share/picongpu/tests/compileFieldSolver/README.rst b/share/picongpu/tests/compileFieldSolver/README.rst new file mode 100644 index 0000000000..2d44f77f07 --- /dev/null +++ b/share/picongpu/tests/compileFieldSolver/README.rst @@ -0,0 +1,5 @@ +Compile Test for Field Solver +============================= + +This test compiles filed solver for two and three dimensions. +One species electron is required to test the current interpolation algorithms. diff --git a/share/picongpu/tests/compileFieldSolver/cmakeFlags b/share/picongpu/tests/compileFieldSolver/cmakeFlags new file mode 100755 index 0000000000..014ad8597d --- /dev/null +++ b/share/picongpu/tests/compileFieldSolver/cmakeFlags @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# +# Copyright 2013-2021 Axel Huebl, Rene Widera +# +# This file is part of PIConGPU. +# +# PIConGPU is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PIConGPU is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PIConGPU. +# If not, see . +# + +# +# generic compile options +# + +################################################################################ +# add presets here +# - default: index 0 +# - start with zero index +# - increase by 1, no gaps + +# Yee solver is tested in the example FieldAbsorberTest +flags[0]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=LehePML;-DPARAM_PRECISION=precision64Bit'" +flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=LehePML;-DPARAM_DIMENSION=DIM2'" +flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_FIELDSOLVER=Lehe;-DPARAM_DIMENSION=DIM2'" + + +################################################################################ +# execution + +case "$1" in + -l) echo ${#flags[@]} + ;; + -ll) for f in "${flags[@]}"; do echo $f; done + ;; + *) echo -n ${flags[$1]} + ;; +esac diff --git a/share/picongpu/tests/compileFieldSolver/include/picongpu/param/dimension.param b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/dimension.param new file mode 100644 index 0000000000..9cda9d9a01 --- /dev/null +++ b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/dimension.param @@ -0,0 +1,31 @@ +/* Copyright 2014-2021 Axel Huebl, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#ifndef PARAM_DIMENSION +# define PARAM_DIMENSION DIM3 +#endif + +#define SIMDIM PARAM_DIMENSION + +namespace picongpu +{ + constexpr uint32_t simDim = SIMDIM; +} // namespace picongpu diff --git a/share/picongpu/tests/compileFieldSolver/include/picongpu/param/fieldSolver.param b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/fieldSolver.param new file mode 100644 index 0000000000..74e9a79ce2 --- /dev/null +++ b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/fieldSolver.param @@ -0,0 +1,82 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Sergei Bastrakov, Klaus Steiniger + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Configure the field solver. + * + * Select the numerical Maxwell solver (e.g. Yee's method). + * + * Also allows to configure ad hoc mitigations for high frequency + * noise in some setups via current smoothing. + * + * \attention + * Currently, the laser initialization in PIConGPU is implemented to work with the standard Yee solver. + * Using a solver of higher order will result in a slightly increased laser amplitude and energy than expected. + * + */ + +#pragma once + +#include "picongpu/fields/MaxwellSolver/Solvers.def" +#include "picongpu/fields/currentInterpolation/CurrentInterpolation.def" + + +namespace picongpu +{ + namespace fields + { + /** Current Interpolation + * + * CurrentInterpolation is used to set a method performing the + * interpolate/assign operation from the generated currents of particle + * species to the electro-magnetic fields. + * + * Allowed values are: + * - None: + * - default for staggered grids/Yee-scheme + * - updates E + * - Binomial: 2nd order Binomial filter + * - smooths the current before assignment in staggered grid + * - updates E & breaks local charge conservation slightly + */ + using CurrentInterpolation = currentInterpolation::None; + + /** FieldSolver + * + * Field Solver Selection: + * - Yee< CurrentInterpolation > : Standard Yee solver approximating derivatives with respect to time and + * space by second order finite differences. + * - YeePML< CurrentInterpolation >: Standard Yee solver using Perfectly Matched Layer Absorbing Boundary + * Conditions (PML) + * - Lehe< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction + * - LehePML< CurrentInterpolation >: Num. Cherenkov free field solver in a chosen direction + * using Perfectly Matched Layer Absorbing Boundary Conditions (PML) + * - ArbitraryOrderFDTD< 4, CurrentInterpolation >: Solver using 4 neighbors to each direction to approximate + * *spatial* derivatives by finite differences. The number of neighbors can be changed from 4 to any positive, + * integer number. The order of the solver will be twice the number of neighbors in each direction. Yee's + * method is a special case of this using one neighbor to each direction. + * - ArbitraryOrderFDTDPML< 4, CurrentInterpolation >: ArbitraryOrderFDTD solver using Perfectly Matched Layer + * Absorbing Boundary Conditions (PML) + * - None< CurrentInterpolation >: disable the vacuum update of E and B + */ + using Solver = maxwellSolver::PARAM_FIELDSOLVER; + + } // namespace fields +} // namespace picongpu diff --git a/share/picongpu/tests/compileFieldSolver/include/picongpu/param/fileOutput.param b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/fileOutput.param new file mode 100644 index 0000000000..18a50ebfa6 --- /dev/null +++ b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/fileOutput.param @@ -0,0 +1,56 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, + * Benjamin Worpitz, Richard Pausch + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include + +/* some forward declarations we need */ +#include "picongpu/fields/Fields.def" +#include "picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def" + +#include + + +namespace picongpu +{ + /** FieldTmpSolvers groups all solvers that create data for FieldTmp ****** + * + * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size + */ + using FieldTmpSolvers = MakeSeq_t<>; + + /** FileOutputFields: Groups all Fields that shall be dumped *************/ + + /** Possible native fields: FieldE, FieldB, FieldJ + */ + using NativeFileOutputFields = MakeSeq_t<>; + + using FileOutputFields = MakeSeq_t<>; + + + /** FileOutputParticles: Groups all Species that shall be dumped ********** + * + * hint: to disable particle output set to + * using FileOutputParticles = MakeSeq_t< >; + */ + using FileOutputParticles = MakeSeq_t<>; + +} // namespace picongpu diff --git a/share/picongpu/tests/compileFieldSolver/include/picongpu/param/grid.param b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/grid.param new file mode 100644 index 0000000000..ffefe1be6c --- /dev/null +++ b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/grid.param @@ -0,0 +1,87 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch, + * Benjamin Worpitz + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + + +#pragma once + +namespace picongpu +{ + namespace SI + { + /** Duration of one timestep + * unit: seconds */ + constexpr float_64 DELTA_T_SI = 1.79e-16; + + /** equals X + * unit: meter */ + constexpr float_64 CELL_WIDTH_SI = 9.34635e-8; + /** equals Y + * unit: meter */ + constexpr float_64 CELL_HEIGHT_SI = CELL_WIDTH_SI; + /** equals Z + * unit: meter */ + constexpr float_64 CELL_DEPTH_SI = CELL_WIDTH_SI; + + /** Note on units in reduced dimensions + * + * In 2D3V simulations, the CELL_DEPTH_SI (Z) cell length + * is still used for normalization of densities, etc. + * + * A 2D3V simulation in a cartesian PIC simulation such as + * ours only changes the degrees of freedom in motion for + * (macro) particles and all (field) information in z + * travels instantaneous, making the 2D3V simulation + * behave like the interaction of infinite "wire particles" + * in fields with perfect symmetry in Z. + */ + } // namespace SI + + //! Defines the size of the absorbing zone (in cells) + constexpr uint32_t ABSORBER_CELLS[3][2] = { + {32, 32}, /*x direction [negative,positive]*/ + {32, 32}, /*y direction [negative,positive]*/ + {32, 32} /*z direction [negative,positive]*/ + }; // unit: number of cells + + //! Define the strength of the absorber for any direction + constexpr float_X ABSORBER_STRENGTH[3][2] = { + {1.0e-3, 1.0e-3}, /*x direction [negative,positive]*/ + {1.0e-3, 1.0e-3}, /*y direction [negative,positive]*/ + {1.0e-3, 1.0e-3} /*z direction [negative,positive]*/ + }; // unit: none + + /** When to move the co-moving window. + * An initial pseudo particle, flying with the speed of light, + * is fired at the begin of the simulation. + * When it reaches movePoint % of the absolute(*) simulation area, + * the co-moving window starts to move with the speed of light. + * + * (*) Note: beware, that there is one "hidden" row of gpus at the y-front, + * when you use the co-moving window + * 0.75 means only 75% of simulation area is used for real simulation + * + * Warning: this variable is deprecated, but currently still required for + * building purposes. Please keep the variable here. In case a moving window + * is enabled in your .cfg file, please set the move point using the + * 'windowMovePoint' parameter in that file, its default value is movePoint. + */ + constexpr float_64 movePoint = 0.90; + +} // namespace picongpu diff --git a/share/picongpu/tests/compileFieldSolver/include/picongpu/param/isaac.param b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/isaac.param new file mode 100644 index 0000000000..af98f960f6 --- /dev/null +++ b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/isaac.param @@ -0,0 +1,57 @@ +/* Copyright 2016-2021 Alexander Matthes + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Definition which native fields and density fields of particles will be + * visualizable with ISAAC. ISAAC is an in-situ visualization library with which + * the PIC simulation can be observed while it is running avoiding the time + * consuming writing and reading of simulation data for the classical post + * processing of data. + * + * ISAAC can directly visualize natives fields like the E or B field, but + * density fields of particles need to be calculated from PIConGPU on the fly + * which slightly increases the runtime and the memory consumption. Every + * particle density field will reduce the amount of memory left for PIConGPUs + * particles and fields. + * + * To get best performance, ISAAC defines an exponential amount of different + * visualization kernels for every combination of (at runtime) activated + * fields. So furthermore a lot of fields will increase the compilation time. + * + */ + +#pragma once + +namespace picongpu +{ + namespace isaacP + { + /** Intermediate list of native particle species of PIConGPU which shall be + * visualized. */ + using Particle_Seq = MakeSeq_t<>; + + + /** Compile time sequence of all fields which shall be visualized. Basically + * the join of Native_Seq and Density_Seq. */ + using Fields_Seq = MakeSeq_t<>; + + + } // namespace isaacP +} // namespace picongpu diff --git a/share/picongpu/tests/compileFieldSolver/include/picongpu/param/precision.param b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/precision.param new file mode 100644 index 0000000000..162c25da0d --- /dev/null +++ b/share/picongpu/tests/compileFieldSolver/include/picongpu/param/precision.param @@ -0,0 +1,59 @@ +/* Copyright 2013-2021 Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Define the precision of typically used floating point types in the + * simulation. + * + * PIConGPU normalizes input automatically, allowing to use single-precision by + * default for the core algorithms. Note that implementations of various + * algorithms (usually plugins or non-core components) might still decide to + * hard-code a different (mixed) precision for some critical operations. + */ + +#pragma once + + +namespace picongpu +{ +/*! Select a precision for the simulation data + * - precision32Bit : use 32Bit floating point numbers + * [significant digits 7 to 8] + * - precision64Bit : use 64Bit floating point numbers + * [significant digits 15 to 16] + */ +#ifndef PARAM_PRECISION +# define PARAM_PRECISION precision32Bit +#endif + namespace precisionPIConGPU = PARAM_PRECISION; + + /*! Select a precision special operations (can be different from simulation precision) + * - precisionPIConGPU : use precision which is selected on top (precisionPIConGPU) + * - precision32Bit : use 32Bit floating point numbers + * - precision64Bit : use 64Bit floating point numbers + */ + namespace precisionSqrt = precisionPIConGPU; + namespace precisionExp = precisionPIConGPU; + namespace precisionTrigonometric = precisionPIConGPU; + + +} // namespace picongpu + +#include "picongpu/unitless/precision.unitless" diff --git a/share/picongpu/tests/compileParticlePusher/README.rst b/share/picongpu/tests/compileParticlePusher/README.rst new file mode 100644 index 0000000000..70fbfc1770 --- /dev/null +++ b/share/picongpu/tests/compileParticlePusher/README.rst @@ -0,0 +1,4 @@ +Compile Test for Particle Pushers +================================= + +This test compiles all particle pushers, each for one particle shape. diff --git a/share/picongpu/tests/compileParticlePusher/cmakeFlags b/share/picongpu/tests/compileParticlePusher/cmakeFlags new file mode 100755 index 0000000000..431e8b47a1 --- /dev/null +++ b/share/picongpu/tests/compileParticlePusher/cmakeFlags @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# +# Copyright 2013-2021 Axel Huebl, Rene Widera, Sergei Bastrakov +# +# This file is part of PIConGPU. +# +# PIConGPU is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# PIConGPU is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with PIConGPU. +# If not, see . +# + +# +# generic compile options +# + +################################################################################ +# add presets here +# - default: index 0 +# - start with zero index +# - increase by 1, no gaps + +# Pushers are generally independent from particle shapes, so do not attempt to +# test all possible combinations, just all pushers except Boris (tested in examples) +flags[0]="-DPARAM_OVERWRITES:LIST='-DPARAM_PARTICLEPUSHER=HigueraCary;-DPARAM_PARTICLESHAPE=NGP'" +flags[1]="-DPARAM_OVERWRITES:LIST='-DPARAM_PARTICLEPUSHER=Vay;-DPARAM_PARTICLESHAPE=CIC'" +flags[2]="-DPARAM_OVERWRITES:LIST='-DPARAM_PARTICLEPUSHER=ReducedLandauLifshitz;-DPARAM_PARTICLESHAPE=TSC'" +flags[3]="-DPARAM_OVERWRITES:LIST='-DPARAM_PARTICLEPUSHER=Boris;-DPARAM_COMPOSITEPUSHER=1;-DPARAM_PARTICLESHAPE=PQS'" +flags[4]="-DPARAM_OVERWRITES:LIST='-DPARAM_PARTICLEPUSHER=HigueraCary;-DPARAM_COMPOSITEPUSHER=1;-DPARAM_PARTICLESHAPE=PCS'" +flags[5]="-DPARAM_OVERWRITES:LIST='-DPARAM_PARTICLEPUSHER=Free;-DPARAM_PARTICLESHAPE=CIC'" +flags[6]="-DPARAM_OVERWRITES:LIST='-DPARAM_PARTICLEPUSHER=Photon;-DPARAM_PARTICLESHAPE=TSC'" +flags[7]="-DPARAM_OVERWRITES:LIST='-DPARAM_PARTICLEPUSHER=Probe;-DPARAM_PARTICLESHAPE=PQS'" + +################################################################################ +# execution + +case "$1" in + -l) echo ${#flags[@]} + ;; + -ll) for f in "${flags[@]}"; do echo $f; done + ;; + *) echo -n ${flags[$1]} + ;; +esac diff --git a/share/picongpu/tests/compileParticlePusher/include/picongpu/param/density.param b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/density.param new file mode 100644 index 0000000000..bed7ea6308 --- /dev/null +++ b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/density.param @@ -0,0 +1,46 @@ +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, Felix Schmitt, + * Richard Pausch + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/particles/densityProfiles/profiles.def" + + +namespace picongpu +{ + namespace SI + { + /** Base density in particles per m^3 in the density profiles. + * + * This is often taken as reference maximum density in normalized profiles. + * Individual particle species can define a `densityRatio` flag relative + * to this value. + * + * unit: ELEMENTS/m^3 + */ + constexpr float_64 BASE_DENSITY_SI = 1.e25; + } // namespace SI + + namespace densityProfiles + { + /* definition of homogenous profile */ + using Homogenous = HomogenousImpl; + } // namespace densityProfiles +} // namespace picongpu diff --git a/share/picongpu/tests/compileParticlePusher/include/picongpu/param/dimension.param b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/dimension.param new file mode 100644 index 0000000000..9cda9d9a01 --- /dev/null +++ b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/dimension.param @@ -0,0 +1,31 @@ +/* Copyright 2014-2021 Axel Huebl, Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#ifndef PARAM_DIMENSION +# define PARAM_DIMENSION DIM3 +#endif + +#define SIMDIM PARAM_DIMENSION + +namespace picongpu +{ + constexpr uint32_t simDim = SIMDIM; +} // namespace picongpu diff --git a/share/picongpu/tests/compileParticlePusher/include/picongpu/param/fileOutput.param b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/fileOutput.param new file mode 100644 index 0000000000..18a50ebfa6 --- /dev/null +++ b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/fileOutput.param @@ -0,0 +1,56 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Felix Schmitt, + * Benjamin Worpitz, Richard Pausch + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include + +/* some forward declarations we need */ +#include "picongpu/fields/Fields.def" +#include "picongpu/particles/particleToGrid/ComputeGridValuePerFrame.def" + +#include + + +namespace picongpu +{ + /** FieldTmpSolvers groups all solvers that create data for FieldTmp ****** + * + * FieldTmpSolvers is used in @see FieldTmp to calculate the exchange size + */ + using FieldTmpSolvers = MakeSeq_t<>; + + /** FileOutputFields: Groups all Fields that shall be dumped *************/ + + /** Possible native fields: FieldE, FieldB, FieldJ + */ + using NativeFileOutputFields = MakeSeq_t<>; + + using FileOutputFields = MakeSeq_t<>; + + + /** FileOutputParticles: Groups all Species that shall be dumped ********** + * + * hint: to disable particle output set to + * using FileOutputParticles = MakeSeq_t< >; + */ + using FileOutputParticles = MakeSeq_t<>; + +} // namespace picongpu diff --git a/share/picongpu/tests/compileParticlePusher/include/picongpu/param/isaac.param b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/isaac.param new file mode 100644 index 0000000000..af98f960f6 --- /dev/null +++ b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/isaac.param @@ -0,0 +1,57 @@ +/* Copyright 2016-2021 Alexander Matthes + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Definition which native fields and density fields of particles will be + * visualizable with ISAAC. ISAAC is an in-situ visualization library with which + * the PIC simulation can be observed while it is running avoiding the time + * consuming writing and reading of simulation data for the classical post + * processing of data. + * + * ISAAC can directly visualize natives fields like the E or B field, but + * density fields of particles need to be calculated from PIConGPU on the fly + * which slightly increases the runtime and the memory consumption. Every + * particle density field will reduce the amount of memory left for PIConGPUs + * particles and fields. + * + * To get best performance, ISAAC defines an exponential amount of different + * visualization kernels for every combination of (at runtime) activated + * fields. So furthermore a lot of fields will increase the compilation time. + * + */ + +#pragma once + +namespace picongpu +{ + namespace isaacP + { + /** Intermediate list of native particle species of PIConGPU which shall be + * visualized. */ + using Particle_Seq = MakeSeq_t<>; + + + /** Compile time sequence of all fields which shall be visualized. Basically + * the join of Native_Seq and Density_Seq. */ + using Fields_Seq = MakeSeq_t<>; + + + } // namespace isaacP +} // namespace picongpu diff --git a/share/picongpu/tests/compileParticlePusher/include/picongpu/param/particle.param b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/particle.param new file mode 100644 index 0000000000..792e7acad0 --- /dev/null +++ b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/particle.param @@ -0,0 +1,85 @@ +/* Copyright 2013-2021 Axel Huebl, Rene Widera, Benjamin Worpitz, + * Richard Pausch + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/particles/startPosition/functors.def" +#include "picongpu/particles/manipulators/manipulators.def" +#include "picongpu/particles/filter/filter.def" + +#include + +namespace picongpu +{ + namespace particles + { + namespace startPosition + { + struct QuietParam25ppc + { + /** Count of particles per cell per direction at initial state + * unit: none + */ + using numParticlesPerDimension = typename mCT::shrinkTo, simDim>::type; + }; + using Quiet25ppc = QuietImpl; + + } // namespace startPosition + + /** a particle with a weighting below MIN_WEIGHTING will not + * be created / will be deleted + * unit: none + */ + constexpr float_X MIN_WEIGHTING = 10.0; + + /** During unit normalization, we assume this is a typical + * number of particles per cell for normalization of weighted + * particle attributes. + */ + constexpr uint32_t TYPICAL_PARTICLES_PER_CELL + = mCT::volume::type::value; + + namespace manipulators + { + CONST_VECTOR(float_X, 3, DriftParamPositive_direction, 1.0, 0.0, 0.0); + struct DriftParamPositive + { + /** Initial particle drift velocity for electrons and ions + * Examples: + * - No drift is equal to 1.0 + * unit: none + */ + static constexpr float_64 gamma = 1.021; + const DriftParamPositive_direction_t direction; + }; + using AssignXDriftPositive = unary::Drift; + + struct TemperatureParam + { + /* Initial temperature + * unit: keV + */ + static constexpr float_64 temperature = 0.0005; + }; + using AddTemperature = unary::Temperature; + + } // namespace manipulators + } // namespace particles +} // namespace picongpu diff --git a/share/picongpu/tests/compileParticlePusher/include/picongpu/param/precision.param b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/precision.param new file mode 100644 index 0000000000..162c25da0d --- /dev/null +++ b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/precision.param @@ -0,0 +1,59 @@ +/* Copyright 2013-2021 Rene Widera + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Define the precision of typically used floating point types in the + * simulation. + * + * PIConGPU normalizes input automatically, allowing to use single-precision by + * default for the core algorithms. Note that implementations of various + * algorithms (usually plugins or non-core components) might still decide to + * hard-code a different (mixed) precision for some critical operations. + */ + +#pragma once + + +namespace picongpu +{ +/*! Select a precision for the simulation data + * - precision32Bit : use 32Bit floating point numbers + * [significant digits 7 to 8] + * - precision64Bit : use 64Bit floating point numbers + * [significant digits 15 to 16] + */ +#ifndef PARAM_PRECISION +# define PARAM_PRECISION precision32Bit +#endif + namespace precisionPIConGPU = PARAM_PRECISION; + + /*! Select a precision special operations (can be different from simulation precision) + * - precisionPIConGPU : use precision which is selected on top (precisionPIConGPU) + * - precision32Bit : use 32Bit floating point numbers + * - precision64Bit : use 64Bit floating point numbers + */ + namespace precisionSqrt = precisionPIConGPU; + namespace precisionExp = precisionPIConGPU; + namespace precisionTrigonometric = precisionPIConGPU; + + +} // namespace picongpu + +#include "picongpu/unitless/precision.unitless" diff --git a/share/picongpu/tests/compileParticlePusher/include/picongpu/param/species.param b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/species.param new file mode 100644 index 0000000000..3064c2d9e1 --- /dev/null +++ b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/species.param @@ -0,0 +1,130 @@ +/* Copyright 2014-2021 Rene Widera, Richard Pausch, Annegret Roeszler, Klaus Steiniger, Sergei Bastrakov + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Particle shape, field to particle interpolation, current solver, and particle pusher + * can be declared here for usage in `speciesDefinition.param`. + * + * @see + * **MODELS / Hierarchy of Charge Assignment Schemes** + * in the online documentation for information on particle shapes. + * + * + * \attention + * The higher order shape names are redefined with release 0.6.0 in order to provide a consistent naming: + * * PQS is the name of the 3rd order assignment function (instead of PCS) + * * PCS is the name of the 4th order assignment function (instead of P4S) + * * P4S does not exist anymore + */ + +#pragma once + +#include "picongpu/particles/shapes.hpp" +#include "picongpu/algorithms/FieldToParticleInterpolationNative.hpp" +#include "picongpu/algorithms/FieldToParticleInterpolation.hpp" +#include "picongpu/algorithms/AssignedTrilinearInterpolation.hpp" +#include "picongpu/particles/flylite/NonLTE.def" +#include "picongpu/fields/currentDeposition/Solver.def" + + +namespace picongpu +{ +/** select macroparticle shape + * + * **WARNING** the shape names are redefined and diverge from PIConGPU versions before 0.6.0. + * + * - particles::shapes::CIC : Assignment function is a piecewise linear spline + * - particles::shapes::TSC : Assignment function is a piecewise quadratic spline + * - particles::shapes::PQS : Assignment function is a piecewise cubic spline + * - particles::shapes::PCS : Assignment function is a piecewise quartic spline + */ +#ifndef PARAM_PARTICLESHAPE +# define PARAM_PARTICLESHAPE TSC +#endif + using UsedParticleShape = particles::shapes::PARAM_PARTICLESHAPE; + + /** select interpolation method to be used for interpolation of grid-based field values to particle positions + */ + using UsedField2Particle = FieldToParticleInterpolation; + + /*! select current solver method + * - currentSolver::Esirkepov< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * - currentSolver::VillaBune< SHAPE, STRATEGY > : particle shapes - CIC (1st order) only + * - currentSolver::EmZ< SHAPE, STRATEGY > : particle shapes - CIC, TSC, PQS, PCS (1st to 4th order) + * + * For development purposes: + * - currentSolver::EsirkepovNative< SHAPE, STRATEGY > : generic version of currentSolverEsirkepov + * without optimization (~4x slower and needs more shared memory) + * + * STRATEGY (optional): + * - currentSolver::strategy::StridedCachedSupercells + * - currentSolver::strategy::StridedCachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::CachedSupercells + * - currentSolver::strategy::CachedSupercellsScaled with N >= 1 + * - currentSolver::strategy::NonCachedSupercells + * - currentSolver::strategy::NonCachedSupercellsScaled with N >= 1 + */ + using UsedParticleCurrentSolver = currentSolver::EmZ; + +/** particle pusher configuration + * + * Defining a pusher is optional for particles + * + * - particles::pusher::HigueraCary : Higuera & Cary's relativistic pusher preserving both volume and ExB velocity + * - particles::pusher::Vay : Vay's relativistic pusher preserving ExB velocity + * - particles::pusher::Boris : Boris' relativistic pusher preserving volume + * - particles::pusher::ReducedLandauLifshitz : 4th order RungeKutta pusher + * with classical radiation reaction + * - particles::pusher::Composite : composite of two given pushers, + * switches between using one (or none) of those + * + * For diagnostics & modeling: ------------------------------------------------ + * - particles::pusher::Acceleration : Accelerate particles by applying a constant electric field + * - particles::pusher::Free : free propagation, ignore fields + * (= free stream model) + * - particles::pusher::Photon : propagate with c in direction of normalized mom. + * - particles::pusher::Probe : Probe particles that interpolate E & B + * For development purposes: -------------------------------------------------- + * - particles::pusher::Axel : a pusher developed at HZDR during 2011 (testing) + */ +#ifndef PARAM_PARTICLEPUSHER +# define PARAM_PARTICLEPUSHER Boris +#endif + +/* To avoid issues with commas in macro definitions, + * pass composite pushers via a special flag + */ +#ifndef PARAM_COMPOSITEPUSHER +# define PARAM_COMPOSITEPUSHER 0 +#endif + +#if PARAM_COMPOSITEPUSHER +# define PUSHER \ + particles::pusher::Composite< \ + particles::pusher::Vay, \ + particles::pusher::PARAM_PARTICLEPUSHER, \ + particles::pusher::CompositeBinarySwitchActivationFunctor<10>> +#else +# define PUSHER particles::pusher::PARAM_PARTICLEPUSHER +#endif + + using UsedParticlePusher = PUSHER; + +} // namespace picongpu diff --git a/share/picongpu/tests/compileParticlePusher/include/picongpu/param/speciesDefinition.param b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/speciesDefinition.param new file mode 100644 index 0000000000..97a58949a7 --- /dev/null +++ b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/speciesDefinition.param @@ -0,0 +1,69 @@ +/* Copyright 2013-2021 Rene Widera, Benjamin Worpitz, Heiko Burau + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +#pragma once + +#include "picongpu/simulation_defines.hpp" +#include "picongpu/particles/Particles.hpp" + +#include +#include +#include +#include +#include + + +namespace picongpu +{ + /*########################### define particle attributes #####################*/ + + /** describe attributes of a particle*/ + using DefaultParticleAttributes = MakeSeq_t, momentum, weighting, probeE, probeB>; + + /*########################### end particle attributes ########################*/ + + /*########################### define species #################################*/ + + /*--------------------------- electrons --------------------------------------*/ + + /* ratio relative to BASE_CHARGE and BASE_MASS */ + value_identifier(float_X, MassRatioElectrons, 1.0); + value_identifier(float_X, ChargeRatioElectrons, 1.0); + + using ParticleFlagsElectrons = MakeSeq_t< + particlePusher, + shape, + interpolation, + current, + massRatio, + chargeRatio>; + + /* define species electrons */ + using PIC_Electrons = Particles; + + /*########################### end species ####################################*/ + + /** All known particle species of the simulation + * + * List all defined particle species from above in this list + * to make them available to the PIC algorithm. + */ + using VectorAllSpecies = MakeSeq_t; + +} // namespace picongpu diff --git a/share/picongpu/tests/compileParticlePusher/include/picongpu/param/speciesInitialization.param b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/speciesInitialization.param new file mode 100644 index 0000000000..4d7745ef40 --- /dev/null +++ b/share/picongpu/tests/compileParticlePusher/include/picongpu/param/speciesInitialization.param @@ -0,0 +1,47 @@ +/* Copyright 2015-2021 Rene Widera, Axel Huebl + * + * This file is part of PIConGPU. + * + * PIConGPU is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * PIConGPU is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with PIConGPU. + * If not, see . + */ + +/** @file + * + * Initialize particles inside particle species. This is the final step in + * setting up particles (defined in `speciesDefinition.param`) via density + * profiles (defined in `density.param`). One can then further derive particles + * from one species to another and manipulate attributes with "manipulators" + * and "filters" (defined in `particle.param` and `particleFilters.param`). + */ + +#pragma once + +#include "picongpu/particles/InitFunctors.hpp" + + +namespace picongpu +{ + namespace particles + { + /** InitPipeline define in which order species are initialized + * + * the functors are called in order (from first to last functor) + */ + using InitPipeline = bmpl::vector< + CreateDensity, + Manipulate>; + + } // namespace particles +} // namespace picongpu diff --git a/share/pmacc/examples/gameOfLife2D/CMakeLists.txt b/share/pmacc/examples/gameOfLife2D/CMakeLists.txt index b2baeefd9a..798d2646ab 100644 --- a/share/pmacc/examples/gameOfLife2D/CMakeLists.txt +++ b/share/pmacc/examples/gameOfLife2D/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Rene Widera, Axel Huebl +# Copyright 2013-2021 Rene Widera, Axel Huebl # # This file is part of PMacc. # @@ -23,7 +23,7 @@ # Required cmake version ################################################################################ -cmake_minimum_required(VERSION 3.11.4) +cmake_minimum_required(VERSION 3.15.0) ################################################################################ @@ -60,10 +60,10 @@ endif() # Language Flags ############################################################################### -# enforce C++11 +# enforce C++14 set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) ################################################################################ diff --git a/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp b/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp index ef6178feec..96cf25dceb 100644 --- a/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp +++ b/share/pmacc/examples/gameOfLife2D/include/Evolution.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Marco Garten +/* Copyright 2013-2021 Rene Widera, Marco Garten * * This file is part of PMacc. * @@ -26,11 +26,10 @@ #include #include #include -#include #include -#include -#include -#include +#include +#include +#include #include #include #include @@ -39,269 +38,188 @@ namespace gol { -namespace kernel -{ - using namespace pmacc; - - /** run game of life stencil - * - * evaluate each cell in the supercell - * - * @tparam T_numWorkers number of workers - */ - template< uint32_t T_numWorkers > - struct Evolution + namespace kernel { - /** run stencil for a supercell + using namespace pmacc; + + /** run game of life stencil * - * @tparam T_BoxReadOnly PMacc::DataBox, box type of the old grid data - * @tparam T_BoxWriteOnly PMacc::DataBox, box type of the new grid data - * @tparam T_Mapping mapping functor type + * evaluate each cell in the supercell * - * @param buffRead buffer with cell data of the current step - * @param buffWrite buffer for the updated cell data - * @param rule description of the rule as bitmap mask - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_BoxReadOnly, - typename T_BoxWriteOnly, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_BoxReadOnly const & buffRead, - T_BoxWriteOnly & buffWrite, - uint32_t const rule, - T_Mapping const & mapper - ) const + template + struct Evolution { - using namespace mappings::threads; - - using Type = typename T_BoxReadOnly::ValueType; - using SuperCellSize = typename T_Mapping::SuperCellSize; - using BlockArea = SuperCellDescription< - SuperCellSize, - math::CT::Int< 1, 1 >, - math::CT::Int< 1, 1 > - >; - auto cache = CachedBox::create< - 0, - Type - >( acc, BlockArea( ) ); - - Space const block( mapper.getSuperCellIndex( Space( blockIdx ) ) ); - Space const blockCell = block * T_Mapping::SuperCellSize::toRT( ); - - constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; - - auto buffRead_shifted = buffRead.shift( blockCell ); - - ThreadCollective< - BlockArea, - numWorkers - > collective( workerIdx ); - - nvidia::functors::Assign assign; - collective( - acc, - assign, - cache, - buffRead_shifted - ); - - __syncthreads(); - - ForEachIdx< - IdxConfig< - cellsPerSuperCell, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { + /** run stencil for a supercell + * + * @tparam T_BoxReadOnly PMacc::DataBox, box type of the old grid data + * @tparam T_BoxWriteOnly PMacc::DataBox, box type of the new grid data + * @tparam T_Mapping mapping functor type + * + * @param buffRead buffer with cell data of the current step + * @param buffWrite buffer for the updated cell data + * @param rule description of the rule as bitmap mask + * @param mapper functor to map a block to a supercell + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_BoxReadOnly const& buffRead, + T_BoxWriteOnly& buffWrite, + uint32_t const rule, + T_Mapping const& mapper) const + { + using namespace mappings::threads; + + using Type = typename T_BoxReadOnly::ValueType; + using SuperCellSize = typename T_Mapping::SuperCellSize; + using BlockArea = SuperCellDescription, math::CT::Int<1, 1>>; + auto cache = CachedBox::create<0, Type>(acc, BlockArea()); + + Space const block(mapper.getSuperCellIndex(Space(cupla::blockIdx(acc)))); + Space const blockCell = block * T_Mapping::SuperCellSize::toRT(); + + constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + auto buffRead_shifted = buffRead.shift(blockCell); + + ThreadCollective collective(workerIdx); + + nvidia::functors::Assign assign; + collective(acc, assign, cache, buffRead_shifted); + + cupla::__syncthreads(acc); + + ForEachIdx>{ + workerIdx}([&](uint32_t const linearIdx, uint32_t const) { // cell index within the superCell - DataSpace< DIM2 > const cellIdx = DataSpaceOperations< DIM2 >::template map< SuperCellSize >( linearIdx ); + DataSpace const cellIdx = DataSpaceOperations::template map(linearIdx); Type neighbors = 0; - for (uint32_t i = 1; i < 9; ++i) + for(uint32_t i = 1; i < 9; ++i) { - Space const offset( Mask::getRelativeDirections< DIM2 > ( i ) ); - neighbors += cache( cellIdx + offset ); + Space const offset(Mask::getRelativeDirections(i)); + neighbors += cache(cellIdx + offset); } - Type isLife = cache( cellIdx ); - isLife = static_cast< bool >( ( (!isLife)*( 1 << (neighbors + 9) ) ) & rule ) + - static_cast< bool >( ( isLife*( 1 << ( neighbors ) ) ) & rule ); + Type isLife = cache(cellIdx); + isLife = static_cast(((!isLife) * (1 << (neighbors + 9))) & rule) + + static_cast((isLife * (1 << (neighbors))) & rule); - buffWrite( blockCell + cellIdx ) = isLife; - } - ); - } - }; + buffWrite(blockCell + cellIdx) = isLife; + }); + } + }; - /** initialize each cell - * - * randomly activate each cell within a supercell - * - * @tparam T_numWorkers number of workers - */ - template< uint32_t T_numWorkers > - struct RandomInit - { /** initialize each cell * - * @tparam T_BoxWriteOnly PMacc::DataBox, box type of the new grid data - * @tparam T_Mapping mapping functor type + * randomly activate each cell within a supercell * - * @param buffRead buffer with cell data of the current step - * @param seed random number generator seed - * @param threshold threshold to activate a cell, range [0.0;1.0] - * if random number is <= threshold than the cell will - * be activated - * @param mapper functor to map a block to a supercell + * @tparam T_numWorkers number of workers */ - template< - typename T_BoxWriteOnly, - typename T_Mapping, - typename T_Acc - > - DINLINE void operator()( - T_Acc const & acc, - T_BoxWriteOnly & buffWrite, - uint32_t const seed, - float const threshold, - T_Mapping const & mapper - ) const + template + struct RandomInit { - using namespace mappings::threads; - - using SuperCellSize = typename T_Mapping::SuperCellSize; - constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume< SuperCellSize >::type::value; - constexpr uint32_t numWorkers = T_numWorkers; - uint32_t const workerIdx = threadIdx.x; - - // get position in grid in units of SuperCells from blockID - Space const block( mapper.getSuperCellIndex( Space( blockIdx ) ) ); - // convert position in unit of cells - Space const blockCell = block * T_Mapping::SuperCellSize::toRT( ); - // convert CUDA dim3 to DataSpace - Space const threadIndex(threadIdx); - - uint32_t const globalUniqueId = DataSpaceOperations< DIM2 >::map( - mapper.getGridSuperCells() * T_Mapping::SuperCellSize::toRT(), - blockCell + DataSpaceOperations< DIM2 >::template map< SuperCellSize >( workerIdx ) - ); - - // get uniform random number from seed - auto rng = nvidia::rng::create( - nvidia::rng::methods::Xor< T_Acc >( acc, seed, globalUniqueId ), - nvidia::rng::distributions::Uniform_float::get( acc ) - ); - - ForEachIdx< - IdxConfig< - cellsPerSuperCell, - numWorkers - > - >{ workerIdx }( - [&]( - uint32_t const linearIdx, - uint32_t const - ) - { + /** initialize each cell + * + * @tparam T_BoxWriteOnly PMacc::DataBox, box type of the new grid data + * @tparam T_Mapping mapping functor type + * + * @param buffRead buffer with cell data of the current step + * @param seed random number generator seed + * @param threshold threshold to activate a cell, range [0.0;1.0] + * if random number is <= threshold than the cell will + * be activated + * @param mapper functor to map a block to a supercell + */ + template + DINLINE void operator()( + T_Acc const& acc, + T_BoxWriteOnly& buffWrite, + uint32_t const seed, + float const threshold, + T_Mapping const& mapper) const + { + using namespace mappings::threads; + + using SuperCellSize = typename T_Mapping::SuperCellSize; + constexpr uint32_t cellsPerSuperCell = pmacc::math::CT::volume::type::value; + constexpr uint32_t numWorkers = T_numWorkers; + uint32_t const workerIdx = cupla::threadIdx(acc).x; + + // get position in grid in units of SuperCells from blockID + Space const block(mapper.getSuperCellIndex(Space(cupla::blockIdx(acc)))); + // convert position in unit of cells + Space const blockCell = block * T_Mapping::SuperCellSize::toRT(); + // convert CUDA dim3 to DataSpace + Space const threadIndex(cupla::threadIdx(acc)); + + uint32_t const globalUniqueId = DataSpaceOperations::map( + mapper.getGridSuperCells() * T_Mapping::SuperCellSize::toRT(), + blockCell + DataSpaceOperations::template map(workerIdx)); + + // create a random number state and generator + using RngMethod = random::methods::XorMin; + using State = typename RngMethod::StateType; + State state; + RngMethod method; + method.init(acc, state, seed, globalUniqueId); + using Distribution = random::distributions::Uniform; + using Random = random::Random; + Random rng(&state); + + ForEachIdx>{ + workerIdx}([&](uint32_t const linearIdx, uint32_t const) { // cell index within the superCell - DataSpace< DIM2 > const cellIdx = DataSpaceOperations< DIM2 >::template map< SuperCellSize >( linearIdx ); + DataSpace const cellIdx = DataSpaceOperations::template map(linearIdx); // write 1(white) if uniform random number 0( rng() <= threshold ); - } - ); - } - }; -} // namespace kernel + buffWrite(blockCell + cellIdx) = static_cast(rng(acc) <= threshold); + }); + } + }; + } // namespace kernel - template< typename T_MappingDesc > + template struct Evolution { - std::unique_ptr< T_MappingDesc > mapping; + std::unique_ptr mapping; uint32_t rule; - Evolution( uint32_t rule ) : rule( rule ) + Evolution(uint32_t rule) : rule(rule) { - } - void init( - Space const & layout, - Space const & guardSize - ) + void init(Space const& layout, Space const& guardSize) { - mapping = memory::makeUnique< T_MappingDesc >( - layout, - guardSize - ); + mapping = std::make_unique(layout, guardSize); } - template< typename DBox > - void initEvolution( - DBox const & writeBox, - float const fraction - ) + template + void initEvolution(DBox const& writeBox, float const fraction) { - AreaMapping < - CORE + BORDER, - T_MappingDesc - > mapper( *mapping ); - constexpr uint32_t numWorkers = traits::GetNumWorkers< - math::CT::volume< typename T_MappingDesc::SuperCellSize >::type::value - >::value; + AreaMapping mapper(*mapping); + constexpr uint32_t numWorkers + = traits::GetNumWorkers::type::value>::value; - GridController< DIM2 >& gc = Environment< DIM2 >::get( ).GridController( ); - uint32_t seed = gc.getGlobalSize( ) + gc.getGlobalRank( ); + GridController& gc = Environment::get().GridController(); + uint32_t seed = gc.getGlobalSize() + gc.getGlobalRank(); - PMACC_KERNEL( kernel::RandomInit< numWorkers >{ } )( - mapper.getGridDim( ), - numWorkers - )( - writeBox, - seed, - fraction, - mapper - ); + PMACC_KERNEL(kernel::RandomInit{}) + (mapper.getGridDim(), numWorkers)(writeBox, seed, fraction, mapper); } - template< - uint32_t Area, - typename DBox - > - void run( - DBox const & readBox, - DBox const & writeBox - ) + template + void run(DBox const& readBox, DBox const& writeBox) { - AreaMapping < - Area, - T_MappingDesc - > mapper( *mapping ); - constexpr uint32_t numWorkers = traits::GetNumWorkers< - math::CT::volume< typename T_MappingDesc::SuperCellSize >::type::value - >::value; + AreaMapping mapper(*mapping); + constexpr uint32_t numWorkers + = traits::GetNumWorkers::type::value>::value; - PMACC_KERNEL( kernel::Evolution< numWorkers >{ } )( - mapper.getGridDim( ), - numWorkers - )( - readBox, - writeBox, - rule, - mapper - ); + PMACC_KERNEL(kernel::Evolution{}) + (mapper.getGridDim(), numWorkers)(readBox, writeBox, rule, mapper); } }; diff --git a/share/pmacc/examples/gameOfLife2D/include/GatherSlice.hpp b/share/pmacc/examples/gameOfLife2D/include/GatherSlice.hpp index d33ed5c18d..6ee1644067 100644 --- a/share/pmacc/examples/gameOfLife2D/include/GatherSlice.hpp +++ b/share/pmacc/examples/gameOfLife2D/include/GatherSlice.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Axel Huebl, Heiko Burau, Rene Widera, +/* Copyright 2013-2021 Axel Huebl, Heiko Burau, Rene Widera, * Maximilian Knespel, Benjamin Worpitz * * This file is part of PMacc. @@ -24,210 +24,210 @@ #include #include #include -#include // DIM* +#include // DIM* #include namespace gol { -using namespace pmacc; + using namespace pmacc; -struct MessageHeader -{ - - MessageHeader() - { - } - - MessageHeader(Space simSize, GridLayout layout, Space nodeOffset) : - simSize(simSize), - nodeOffset(nodeOffset) + struct MessageHeader { - nodeSize = layout.getDataSpace(); - nodePictureSize = layout.getDataSpaceWithoutGuarding(); - nodeGuardCells = layout.getGuard(); - } - - Space simSize; - Space nodeSize; - Space nodePictureSize; - Space nodeGuardCells; - Space nodeOffset; - -}; - -struct GatherSlice -{ - - GatherSlice() : mpiRank(-1), numRanks(0), filteredData(nullptr), fullData(nullptr), isMPICommInitialized(false) - { - } + MessageHeader() + { + } - ~GatherSlice() - { + MessageHeader(Space simSize, GridLayout layout, Space nodeOffset) + : simSize(simSize) + , nodeOffset(nodeOffset) + { + nodeSize = layout.getDataSpace(); + nodePictureSize = layout.getDataSpaceWithoutGuarding(); + nodeGuardCells = layout.getGuard(); + } - } + Space simSize; + Space nodeSize; + Space nodePictureSize; + Space nodeGuardCells; + Space nodeOffset; + }; - void finalize() + struct GatherSlice { - if (filteredData != nullptr) - { - delete[] filteredData; - filteredData=nullptr; - } - if (fullData != nullptr) + GatherSlice() : mpiRank(-1), numRanks(0), filteredData(nullptr), fullData(nullptr), isMPICommInitialized(false) { - delete[] fullData; - fullData=nullptr; } - if (isMPICommInitialized) + + ~GatherSlice() { - MPI_Comm_free(&comm); - isMPICommInitialized=false; } - mpiRank=-1; - } - - /* - * Saves the message header and creates a new MPI group with all ranks - * that called this with isActive = true - * @return true if the current rank is the master of the new MPI group - */ - bool init(const MessageHeader mHeader, bool isActive) - { - header = mHeader; - int countRanks = Environment::get().GridController().getGpuNodes().productOfComponents(); - std::vector gatherRanks(countRanks); - std::vector groupRanks(countRanks); - mpiRank = Environment::get().GridController().getGlobalRank(); - if (!isActive) - mpiRank = -1; - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Allgather(&mpiRank, 1, MPI_INT, &gatherRanks[0], 1, MPI_INT, MPI_COMM_WORLD)); - - for (int i = 0; i < countRanks; ++i) + void finalize() { - if (gatherRanks[i] != -1) + if(filteredData != nullptr) + { + delete[] filteredData; + filteredData = nullptr; + } + if(fullData != nullptr) + { + delete[] fullData; + fullData = nullptr; + } + if(isMPICommInitialized) { - groupRanks[numRanks] = gatherRanks[i]; - numRanks++; + MPI_Comm_free(&comm); + isMPICommInitialized = false; } + mpiRank = -1; } - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_Group group; - MPI_Group newgroup; - MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &group)); - MPI_CHECK(MPI_Group_incl(group, numRanks, &groupRanks[0], &newgroup)); - - MPI_CHECK(MPI_Comm_create(MPI_COMM_WORLD, newgroup, &comm)); - - if (mpiRank != -1) + /* + * Saves the message header and creates a new MPI group with all ranks + * that called this with isActive = true + * @return true if the current rank is the master of the new MPI group + */ + bool init(const MessageHeader mHeader, bool isActive) { - MPI_Comm_rank(comm, &mpiRank); - isMPICommInitialized = true; - } - - return mpiRank == 0; - } + header = mHeader; - template - Box operator()(Box data) - { - typedef typename Box::ValueType ValueType; + int countRanks = Environment::get().GridController().getGpuNodes().productOfComponents(); + std::vector gatherRanks(countRanks); + std::vector groupRanks(countRanks); + mpiRank = Environment::get().GridController().getGlobalRank(); + if(!isActive) + mpiRank = -1; - Box dstBox = Box(PitchedBox ( - (ValueType*) filteredData, - Space(), - header.simSize, - header.simSize.x() * sizeof (ValueType) - )); - MessageHeader mHeader; - MessageHeader* fakeHeader = &mHeader; - memcpy(fakeHeader, &header, sizeof(MessageHeader)); + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Allgather(&mpiRank, 1, MPI_INT, &gatherRanks[0], 1, MPI_INT, MPI_COMM_WORLD)); - char* recvHeader = new char[ sizeof(MessageHeader)* numRanks]; - - if (fullData == nullptr && mpiRank == 0) - fullData = (char*) new ValueType[header.nodeSize.productOfComponents() * numRanks]; - - // avoid deadlock between not finished pmacc tasks and mpi blocking collectives - __getTransactionEvent().waitForFinished(); - MPI_CHECK(MPI_Gather(fakeHeader, sizeof(MessageHeader), MPI_CHAR, recvHeader, sizeof(MessageHeader), - MPI_CHAR, 0, comm)); + for(int i = 0; i < countRanks; ++i) + { + if(gatherRanks[i] != -1) + { + groupRanks[numRanks] = gatherRanks[i]; + numRanks++; + } + } - const size_t elementsCount = header.nodeSize.productOfComponents() * sizeof (ValueType); + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_Group group; + MPI_Group newgroup; + MPI_CHECK(MPI_Comm_group(MPI_COMM_WORLD, &group)); + MPI_CHECK(MPI_Group_incl(group, numRanks, &groupRanks[0], &newgroup)); - MPI_CHECK(MPI_Gather( - (char*) (data.getPointer()), elementsCount, MPI_CHAR, - fullData, elementsCount, MPI_CHAR, - 0, comm)); + MPI_CHECK(MPI_Comm_create(MPI_COMM_WORLD, newgroup, &comm)); + if(mpiRank != -1) + { + MPI_Comm_rank(comm, &mpiRank); + isMPICommInitialized = true; + } + return mpiRank == 0; + } - if (mpiRank == 0) + template + Box operator()(Box data) { - if (filteredData == nullptr) - filteredData = (char*) new ValueType[header.simSize.productOfComponents()]; - - /*create box with valid memory*/ - dstBox = Box(PitchedBox ( - (ValueType*) filteredData, - Space(), - header.simSize, - header.simSize.x() * sizeof (ValueType) - )); - - - for (int i = 0; i < numRanks; ++i) + typedef typename Box::ValueType ValueType; + + Box dstBox = Box(PitchedBox( + (ValueType*) filteredData, + Space(), + header.simSize, + header.simSize.x() * sizeof(ValueType))); + MessageHeader mHeader; + MessageHeader* fakeHeader = &mHeader; + memcpy(fakeHeader, &header, sizeof(MessageHeader)); + + char* recvHeader = new char[sizeof(MessageHeader) * numRanks]; + + if(fullData == nullptr && mpiRank == 0) + fullData = (char*) new ValueType[header.nodeSize.productOfComponents() * numRanks]; + + // avoid deadlock between not finished pmacc tasks and mpi blocking collectives + __getTransactionEvent().waitForFinished(); + MPI_CHECK(MPI_Gather( + fakeHeader, + sizeof(MessageHeader), + MPI_CHAR, + recvHeader, + sizeof(MessageHeader), + MPI_CHAR, + 0, + comm)); + + const size_t elementsCount = header.nodeSize.productOfComponents() * sizeof(ValueType); + + MPI_CHECK(MPI_Gather( + (char*) (data.getPointer()), + elementsCount, + MPI_CHAR, + fullData, + elementsCount, + MPI_CHAR, + 0, + comm)); + + + if(mpiRank == 0) { - MessageHeader* head = (MessageHeader*) (recvHeader + sizeof(MessageHeader)* i); - size_t offset = header.nodeSize.productOfComponents() * static_cast(i); - Box srcBox = Box(PitchedBox ( - reinterpret_cast(fullData) + offset, - Space(), - head->nodeSize, - head->nodeSize.x() * sizeof (ValueType) - )); - - insertData(dstBox, srcBox, head->nodeOffset, head->nodePictureSize, head->nodeGuardCells); + if(filteredData == nullptr) + filteredData = (char*) new ValueType[header.simSize.productOfComponents()]; + + /*create box with valid memory*/ + dstBox = Box(PitchedBox( + (ValueType*) filteredData, + Space(), + header.simSize, + header.simSize.x() * sizeof(ValueType))); + + + for(int i = 0; i < numRanks; ++i) + { + MessageHeader* head = (MessageHeader*) (recvHeader + sizeof(MessageHeader) * i); + size_t offset = header.nodeSize.productOfComponents() * static_cast(i); + Box srcBox = Box(PitchedBox( + reinterpret_cast(fullData) + offset, + Space(), + head->nodeSize, + head->nodeSize.x() * sizeof(ValueType))); + + insertData(dstBox, srcBox, head->nodeOffset, head->nodePictureSize, head->nodeGuardCells); + } } - } - - delete[] recvHeader; + delete[] recvHeader; - return dstBox; - } + return dstBox; + } - template - void insertData(DstBox& dst, const SrcBox& src, Space offsetToSimNull, Space srcSize, Space nodeGuardCells) - { - for (int y = 0; y < srcSize.y(); ++y) + template + void insertData(DstBox& dst, const SrcBox& src, Space offsetToSimNull, Space srcSize, Space nodeGuardCells) { - for (int x = 0; x < srcSize.x(); ++x) + for(int y = 0; y < srcSize.y(); ++y) { - dst[y + offsetToSimNull.y()][x + offsetToSimNull.x()] = - src[nodeGuardCells.y() + y][nodeGuardCells.x() + x]; + for(int x = 0; x < srcSize.x(); ++x) + { + dst[y + offsetToSimNull.y()][x + offsetToSimNull.x()] + = src[nodeGuardCells.y() + y][nodeGuardCells.x() + x]; + } } } - } - -private: - - char* filteredData; - char* fullData; - MPI_Comm comm; - int mpiRank; - int numRanks; - bool isMPICommInitialized; - MessageHeader header; -}; - -}//namespace - + private: + char* filteredData; + char* fullData; + MPI_Comm comm; + int mpiRank; + int numRanks; + bool isMPICommInitialized; + MessageHeader header; + }; + +} // namespace gol diff --git a/share/pmacc/examples/gameOfLife2D/include/PngCreator.hpp b/share/pmacc/examples/gameOfLife2D/include/PngCreator.hpp index e16207faf5..061add1bf0 100644 --- a/share/pmacc/examples/gameOfLife2D/include/PngCreator.hpp +++ b/share/pmacc/examples/gameOfLife2D/include/PngCreator.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Heiko Burau, Rene Widera +/* Copyright 2013-2021 Heiko Burau, Rene Widera * * This file is part of PMacc. * @@ -25,12 +25,10 @@ namespace gol { - struct PngCreator { - template - void operator() (uint32_t currentStep, DBox data, Space dataSize) + void operator()(uint32_t currentStep, DBox data, Space dataSize) { std::stringstream step; step << std::setw(6) << std::setfill('0') << currentStep; @@ -38,11 +36,11 @@ namespace gol pngwriter png(dataSize.x(), dataSize.y(), 0, filename.c_str()); png.setcompressionlevel(9); - for (int y = 0; y < dataSize.y(); ++y) + for(int y = 0; y < dataSize.y(); ++y) { - for (int x = 0; x < dataSize.x(); ++x) + for(int x = 0; x < dataSize.x(); ++x) { - float p = data[y ][x ]; + float p = data[y][x]; png.plot(x + 1, dataSize.y() - y, p, p, p); } } @@ -50,5 +48,4 @@ namespace gol } }; -} - +} // namespace gol diff --git a/share/pmacc/examples/gameOfLife2D/include/Simulation.hpp b/share/pmacc/examples/gameOfLife2D/include/Simulation.hpp index 7ce13e9557..66f777a92c 100644 --- a/share/pmacc/examples/gameOfLife2D/include/Simulation.hpp +++ b/share/pmacc/examples/gameOfLife2D/include/Simulation.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera, Maximilian Knespel, Alexander Grund +/* Copyright 2013-2021 Rene Widera, Maximilian Knespel, Alexander Grund * * This file is part of PMacc. * @@ -41,198 +41,195 @@ namespace gol { - -class Simulation -{ -private: - /* math::CT::Int<16,16> is arbitrarily chosen SuperCellSize! */ - typedef MappingDescription > MappingDesc; - typedef Evolution Evolutiontype; - - Space gridSize; - /* holds rule mask derived from 23/3 input, \see Evolution.hpp */ - Evolutiontype evo; - GatherSlice gather; - - /* for storing black (dead) and white (alive) data for gol */ - Buffer* buff1; /* Buffer(\see types.h) for swapping between old and new world */ - Buffer* buff2; /* like evolve(buff2 &, const buff1) would work internally */ - uint32_t steps; - - bool isMaster; - -public: - - Simulation(uint32_t rule, int32_t steps, Space gridSize, Space devices, Space periodic) : - evo(rule), steps(steps), gridSize(gridSize), isMaster(false), buff1(nullptr), buff2(nullptr) - { - /* -First this initializes the GridController with number of 'devices'* - * and 'periodic'ity. The init-routine will then create and manage * - * the MPI processes and communication group and topology. * - * -Second the cudaDevices will be allocated to the corresponding * - * Host MPI processes where hostRank == deviceNumber, if the device * - * is not marked to be used exclusively by another process. This * - * affects: cudaMalloc,cudaKernelLaunch, * - * -Then the CUDA Stream Controller is activated and one stream is * - * added. It's basically a List of cudaStreams. Used to parallelize * - * Memory transfers and calculations. * - * -Initialize TransactionManager */ - Environment::get().initDevices(devices, periodic); - - /* Now we have allocated every node to a grid position in the GC. We * - * use that grid position to allocate every node to a position in the * - * physic grid. Using the localGridSize = the number of cells per * - * node = number of cells / nodes, we can get the position of the * - * current node as an offset in numbers of cells */ - GridController & gc = Environment::get().GridController(); - Space localGridSize(gridSize / devices); - - /* - This forwards arguments to SubGrid.init() * - * - Create Singletons: EnvironmentController, DataConnector, * - * PluginConnector, nvidia::memory::MemoryInfo */ - Environment::get().initGrids( gridSize, localGridSize, - gc.getPosition() * localGridSize); - } - - virtual ~Simulation() - { - } - - void finalize() - { - gather.finalize(); - __delete(buff1); - __delete(buff2); - } - - void init() + class Simulation { - /* subGrid holds global and - * local SimulationSize and where the local SimArea is in the greater - * scheme using Offsets from global LEFT, TOP, FRONT - */ - const SubGrid& subGrid = Environment::get().SubGrid(); - - /* The following sets up the local layout which consists of the actual - * grid cells and some surrounding cells, called guards. - * - * ASCII Visualization: example taken for 1D, - * distributed over 2 GPUs, only 1 border shown between those two GPUs - * assuming non-periodic boundary conditions. - * In a N-GPU or periodic example, border cells guard cells exist in each direction. - * _______GPU 0________ _______GPU 1________ - * | 0 | 1 | 2 | 3 | 4 | | 3 | 4 | 5 | 6 | 7 | <-- Global (super)cell idx - * |___|___|___|___|___| |___|___|___|___|___| - * |___Core____|Bor|Gua| |Gua|Bor|___Core____| - * |___________|der|rd_| |rd_|der|___________| - * |__"real" cells_|***| |***|__"real" cells_| - * - * |***| Clones cells which correspond to the border cells of the neighbor GPU - * (sometimes also called "ghost" or "halo" cells/region) - * - * Recall that the following is defined: - * typedef MappingDescription > MappingDesc; - * where math::CT::Int<16,16> is arbitrarily(!) chosen SuperCellSize - * and DIM2 is the dimension of the grid. - * Expression of 2nd argument translates to DataSpace(16,16,0). - * This is the guard size (here set to be one Supercell wide in all - * directions). Meaning we have 16*16*(2*grid.x+2*grid.y+4) more - * cells in GridLayout than in the SubGrid. - * The formula above is SuperCellSize * TotalNumGuardCells with (in this case) - * SuperCellSize = 16*16 (16 cells in 2 dimensions) - * TotalNumGuardCells = 2 * grid.x (top and bottom) - * + 2 * grid.y (left and right) - * + 4 (the corners) - */ - GridLayout layout( subGrid.getLocalDomain().size, - MappingDesc::SuperCellSize::toRT()); - - /* getDataSpace will return DataSpace( grid.x +16+16, grid.y +16+16) * - * MappingDesc stores the layout regarding Core, Border and Guard * - * in units of SuperCells. * - * This is saved by init to be used by the kernel to identify itself. */ - evo.init(layout.getDataSpace(), Space::create(1)); - - buff1 = new Buffer(layout, false); - buff2 = new Buffer(layout, false); - - /* Set up the future data exchange. In this case we need to copy the - * border cells of our neighbors to our guard cells, since we only read - * from the guard cells but never write to it. - * guardingCells holds the number of guard(super)cells in each dimension - */ - Space guardingCells(1, 1); - for (uint32_t i = 1; i < traits::NumberOfExchanges::value; ++i) + private: + /* math::CT::Int<16,16> is arbitrarily chosen SuperCellSize! */ + typedef MappingDescription> MappingDesc; + typedef Evolution Evolutiontype; + + Space gridSize; + /* holds rule mask derived from 23/3 input, \see Evolution.hpp */ + Evolutiontype evo; + GatherSlice gather; + + /* for storing black (dead) and white (alive) data for gol */ + Buffer* buff1; /* Buffer(\see types.h) for swapping between old and new world */ + Buffer* buff2; /* like evolve(buff2 &, const buff1) would work internally */ + uint32_t steps; + + bool isMaster; + + public: + Simulation(uint32_t rule, int32_t steps, Space gridSize, Space devices, Space periodic) + : evo(rule) + , steps(steps) + , gridSize(gridSize) + , isMaster(false) + , buff1(nullptr) + , buff2(nullptr) { - /* to check which number corresponds to which direction, you can * - * use the following member of class Mask like done in the two * - * lines below: * - * DataSpacerelVec = Mask::getRelativeDirections(i); * - * std::cout << "Direction:" << i << " => Vec: (" << relVec[0] * - * << "," << relVec[1] << ")\n"; * - * The result is: 1:right(1,0), 2:left(-1,0), 3:up(0,1), * - * 4:up right(1,1), 5:(-1,1), 6:(0,-1), 7:(1,-1), 8:(-1,-1) */ - - /* types.hpp: enum CommunicationTags{ BUFF1 = 0u, BUFF2 = 1u }; */ - buff1->addExchange(GUARD, Mask(i), guardingCells, BUFF1); - buff2->addExchange(GUARD, Mask(i), guardingCells, BUFF2); + /* -First this initializes the GridController with number of 'devices'* + * and 'periodic'ity. The init-routine will then create and manage * + * the MPI processes and communication group and topology. * + * -Second the cudaDevices will be allocated to the corresponding * + * Host MPI processes where hostRank == deviceNumber, if the device * + * is not marked to be used exclusively by another process. This * + * affects: cudaMalloc,cudaKernelLaunch, * + * -Then the CUDA Stream Controller is activated and one stream is * + * added. It's basically a List of cudaStreams. Used to parallelize * + * Memory transfers and calculations. * + * -Initialize TransactionManager */ + Environment::get().initDevices(devices, periodic); + + /* Now we have allocated every node to a grid position in the GC. We * + * use that grid position to allocate every node to a position in the * + * physic grid. Using the localGridSize = the number of cells per * + * node = number of cells / nodes, we can get the position of the * + * current node as an offset in numbers of cells */ + GridController& gc = Environment::get().GridController(); + Space localGridSize(gridSize / devices); + + /* - This forwards arguments to SubGrid.init() * + * - Create Singletons: EnvironmentController, DataConnector, * + * PluginConnector, nvidia::memory::MemoryInfo */ + Environment::get().initGrids(gridSize, localGridSize, gc.getPosition() * localGridSize); } - /* Both next lines are defined in GatherSlice.hpp: * - * -gather saves the MessageHeader object * - * -Then do an Allgather for the gloabalRanks from GC, sort out * - * -inactive processes (second/boolean ,argument in gather.init) and* - * save new MPI_COMMUNICATOR created from these into private var. * - * -return if rank == 0 */ - MessageHeader header(gridSize, layout, subGrid.getLocalDomain().offset); - isMaster = gather.init(header, true); + virtual ~Simulation() + { + } - /* Calls kernel to initialize random generator. Game of Life is then * - * initialized using uniform random numbers. With 10% (second arg) * - * white points. World will be written to buffer in first argument */ - evo.initEvolution(buff1->getDeviceBuffer().getDataBox(), 0.1); + void finalize() + { + gather.finalize(); + __delete(buff1); + __delete(buff2); + } - } + void init() + { + /* subGrid holds global and + * local SimulationSize and where the local SimArea is in the greater + * scheme using Offsets from global LEFT, TOP, FRONT + */ + const SubGrid& subGrid = Environment::get().SubGrid(); + + /* The following sets up the local layout which consists of the actual + * grid cells and some surrounding cells, called guards. + * + * ASCII Visualization: example taken for 1D, + * distributed over 2 GPUs, only 1 border shown between those two GPUs + * assuming non-periodic boundary conditions. + * In a N-GPU or periodic example, border cells guard cells exist in each direction. + * _______GPU 0________ _______GPU 1________ + * | 0 | 1 | 2 | 3 | 4 | | 3 | 4 | 5 | 6 | 7 | <-- Global (super)cell idx + * |___|___|___|___|___| |___|___|___|___|___| + * |___Core____|Bor|Gua| |Gua|Bor|___Core____| + * |___________|der|rd_| |rd_|der|___________| + * |__"real" cells_|***| |***|__"real" cells_| + * + * |***| Clones cells which correspond to the border cells of the neighbor GPU + * (sometimes also called "ghost" or "halo" cells/region) + * + * Recall that the following is defined: + * typedef MappingDescription > MappingDesc; + * where math::CT::Int<16,16> is arbitrarily(!) chosen SuperCellSize + * and DIM2 is the dimension of the grid. + * Expression of 2nd argument translates to DataSpace(16,16,0). + * This is the guard size (here set to be one Supercell wide in all + * directions). Meaning we have 16*16*(2*grid.x+2*grid.y+4) more + * cells in GridLayout than in the SubGrid. + * The formula above is SuperCellSize * TotalNumGuardCells with (in this case) + * SuperCellSize = 16*16 (16 cells in 2 dimensions) + * TotalNumGuardCells = 2 * grid.x (top and bottom) + * + 2 * grid.y (left and right) + * + 4 (the corners) + */ + GridLayout layout(subGrid.getLocalDomain().size, MappingDesc::SuperCellSize::toRT()); + + /* getDataSpace will return DataSpace( grid.x +16+16, grid.y +16+16) * + * MappingDesc stores the layout regarding Core, Border and Guard * + * in units of SuperCells. * + * This is saved by init to be used by the kernel to identify itself. */ + evo.init(layout.getDataSpace(), Space::create(1)); + + buff1 = new Buffer(layout, false); + buff2 = new Buffer(layout, false); + + /* Set up the future data exchange. In this case we need to copy the + * border cells of our neighbors to our guard cells, since we only read + * from the guard cells but never write to it. + * guardingCells holds the number of guard(super)cells in each dimension + */ + Space guardingCells(1, 1); + for(uint32_t i = 1; i < traits::NumberOfExchanges::value; ++i) + { + /* to check which number corresponds to which direction, you can * + * use the following member of class Mask like done in the two * + * lines below: * + * DataSpacerelVec = Mask::getRelativeDirections(i); * + * std::cout << "Direction:" << i << " => Vec: (" << relVec[0] * + * << "," << relVec[1] << ")\n"; * + * The result is: 1:right(1,0), 2:left(-1,0), 3:up(0,1), * + * 4:up right(1,1), 5:(-1,1), 6:(0,-1), 7:(1,-1), 8:(-1,-1) */ + + /* types.hpp: enum CommunicationTags{ BUFF1 = 0u, BUFF2 = 1u }; */ + buff1->addExchange(GUARD, Mask(i), guardingCells, BUFF1); + buff2->addExchange(GUARD, Mask(i), guardingCells, BUFF2); + } + + /* Both next lines are defined in GatherSlice.hpp: * + * -gather saves the MessageHeader object * + * -Then do an Allgather for the gloabalRanks from GC, sort out * + * -inactive processes (second/boolean ,argument in gather.init) and* + * save new MPI_COMMUNICATOR created from these into private var. * + * -return if rank == 0 */ + MessageHeader header(gridSize, layout, subGrid.getLocalDomain().offset); + isMaster = gather.init(header, true); + + /* Calls kernel to initialize random generator. Game of Life is then * + * initialized using uniform random numbers. With 10% (second arg) * + * white points. World will be written to buffer in first argument */ + evo.initEvolution(buff1->getDeviceBuffer().getDataBox(), 0.1); + } - void start() - { - Buffer* read = buff1; - Buffer* write = buff2; - for (uint32_t i = 0; i < steps; ++i) + void start() { - oneStep(i, read, write); - std::swap(read, write); + Buffer* read = buff1; + Buffer* write = buff2; + for(uint32_t i = 0; i < steps; ++i) + { + oneStep(i, read, write); + std::swap(read, write); + } } - } -private: - void oneStep(uint32_t currentStep, Buffer* read, Buffer* write) - { - auto splitEvent = __getTransactionEvent(); - /* GridBuffer 'read' will use 'splitEvent' to schedule transaction * - * tasks from the Borders of the neighboring areas to the Guards of * - * this local Area added by 'addExchange'. All transactions in * - * Transaction Manager will then be done in parallel to the * - * calculations in the core. In order to synchronize the data * - * transfer for the case the core calculation is finished earlier, * - * GridBuffer.asyncComm returns a transaction handle we can check */ - auto send = read->asyncCommunication(splitEvent); - evo.run( read->getDeviceBuffer().getDataBox(), - write->getDeviceBuffer().getDataBox() ); - /* Join communication with worker tasks, Now all next tasks run sequential */ - __setTransactionEvent(send); - /* Calculate Borders */ - evo.run( read->getDeviceBuffer().getDataBox(), - write->getDeviceBuffer().getDataBox() ); - write->deviceToHost(); - - /* gather::operator() gathers all the buffers and assembles those to * - * a complete picture discarding the guards. */ - auto picture = gather(write->getHostBuffer().getDataBox()); - PngCreator png; - if (isMaster) png(currentStep, picture, gridSize); - - } - -}; -} + private: + void oneStep(uint32_t currentStep, Buffer* read, Buffer* write) + { + auto splitEvent = __getTransactionEvent(); + /* GridBuffer 'read' will use 'splitEvent' to schedule transaction * + * tasks from the Borders of the neighboring areas to the Guards of * + * this local Area added by 'addExchange'. All transactions in * + * Transaction Manager will then be done in parallel to the * + * calculations in the core. In order to synchronize the data * + * transfer for the case the core calculation is finished earlier, * + * GridBuffer.asyncComm returns a transaction handle we can check */ + auto send = read->asyncCommunication(splitEvent); + evo.run(read->getDeviceBuffer().getDataBox(), write->getDeviceBuffer().getDataBox()); + /* Join communication with worker tasks, Now all next tasks run sequential */ + __setTransactionEvent(send); + /* Calculate Borders */ + evo.run(read->getDeviceBuffer().getDataBox(), write->getDeviceBuffer().getDataBox()); + write->deviceToHost(); + + /* gather::operator() gathers all the buffers and assembles those to * + * a complete picture discarding the guards. */ + auto picture = gather(write->getHostBuffer().getDataBox()); + PngCreator png; + if(isMaster) + png(currentStep, picture, gridSize); + } + }; +} // namespace gol diff --git a/share/pmacc/examples/gameOfLife2D/include/types.hpp b/share/pmacc/examples/gameOfLife2D/include/types.hpp index 7da90c2324..865d4d186e 100644 --- a/share/pmacc/examples/gameOfLife2D/include/types.hpp +++ b/share/pmacc/examples/gameOfLife2D/include/types.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -30,12 +30,11 @@ namespace gol typedef DataSpace Space; typedef GridController GC; - typedef GridBuffer Buffer; + typedef GridBuffer Buffer; enum CommunicationTags { - BUFF1 = 0u, BUFF2 = 1u + BUFF1 = 0u, + BUFF2 = 1u }; -} - - +} // namespace gol diff --git a/share/pmacc/examples/gameOfLife2D/main.cpp b/share/pmacc/examples/gameOfLife2D/main.cpp index d33dc5abaa..4702f06636 100644 --- a/share/pmacc/examples/gameOfLife2D/main.cpp +++ b/share/pmacc/examples/gameOfLife2D/main.cpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Rene Widera +/* Copyright 2013-2021 Rene Widera * * This file is part of PMacc. * @@ -37,38 +37,43 @@ namespace po = boost::program_options; * @param argc count of arguments in argv * @param argv arguments of program start */ -int main( int argc, char **argv ) +int main(int argc, char** argv) { typedef ::gol::Space Space; - std::vector devices; /* will be set by boost program argument option "-d 3 3 3" */ + std::vector devices; /* will be set by boost program argument option "-d 3 3" */ std::vector gridSize; /* same but with -g */ std::vector periodic; uint32_t steps; std::string rule; /* Game of Life Simulation Rules like 23/3 */ - po::options_description desc( "Allowed options" ); - desc.add_options( ) - ( "help,h", "produce help message" ) - ( "steps,s", po::value ( &steps ), "simulation steps" ) - ( "rule,r", po::value ( &rule ), "simulation rule etc. 23/3" ) - ( "devices,d", po::value > ( &devices )->multitoken( ), - "number of devices in each dimension (only 1D or 2D). If you use more than " - "one device in total, you will need to run mpirun with \"mpirun -n " - " ./gameOfLife\"" ) - ( "grid,g", po::value > ( &gridSize )->multitoken( ), - "size of the simulation grid (must be 2D, e.g.: -g 128 128). Because of the border, which is one supercell = 16 cells wide, " - "the size in each direction should be greater or equal than 3*16=48 per device, so that the core will be non-empty" ) - ( "periodic,p", po::value > ( &periodic )->multitoken( ), - "specifying whether the grid is periodic (1) or not (0) in each dimension, default: no periodic dimensions" ); + po::options_description desc("Allowed options"); + desc.add_options()("help,h", "produce help message")( + "steps,s", + po::value(&steps)->default_value(100), + "simulation steps")("rule,r", po::value(&rule)->default_value("23/3"), "simulation rule")( + "devices,d", + po::value>(&devices)->multitoken(), + "number of devices in each dimension (only 1D or 2D). If you use more than " + "one device in total, you will need to run mpirun with \"mpirun -n " + " ./gameOfLife\"")( + "grid,g", + po::value>(&gridSize)->multitoken(), + "size of the simulation grid (must be 2D, e.g.: -g 128 128). Because of the border, which is one supercell = " + "16 cells wide, " + "the size in each direction should be greater or equal than 3*16=48 per device, so that the core will be " + "non-empty")( + "periodic,p", + po::value>(&periodic)->multitoken(), + "specifying whether the grid is periodic (1) or not (0) in each dimension, default: no periodic dimensions"); /* parse command line options and config file and store values in vm */ po::variables_map vm; - po::store( boost::program_options::parse_command_line( argc, argv, desc ), vm ); - po::notify( vm ); + po::store(boost::program_options::parse_command_line(argc, argv, desc), vm); + po::notify(vm); /* print help message and quit simulation */ - if ( vm.count( "help" ) ) + if(vm.count("help")) { std::cerr << desc << "\n"; return false; @@ -76,62 +81,62 @@ int main( int argc, char **argv ) /* fill periodic with 0 */ - while ( periodic.size( ) < DIM2 ) - periodic.push_back( 0 ); + while(periodic.size() < DIM2) + periodic.push_back(0); /* check on correct number of devices. fill with default value 1 for missing dimensions */ - if ( devices.size( ) > DIM2 ) + if(devices.size() > DIM2) { std::cerr << "Invalid number of devices.\nuse [-d dx=1 dy=1 dz=1]" << std::endl; } else - while ( devices.size( ) < DIM2 ) - devices.push_back( 1 ); + while(devices.size() < DIM2) + devices.push_back(1); /* check on correct grid size. fill with default grid size value 1 for missing 3. dimension */ - if ( gridSize.size( ) != DIM2 ) + if(gridSize.size() != DIM2) { std::cerr << "Invalid or missing grid size.\nuse -g width height [depth=1]" << std::endl; - MPI_CHECK( MPI_Finalize( ) ); + MPI_CHECK(MPI_Finalize()); return 0; } /* after checking all input values, copy into DataSpace Datatype */ - Space gpus( devices[0], devices[1] ); - Space grid( gridSize[0], gridSize[1] ); - Space endless( periodic[0], periodic[1] ); + Space gpus(devices[0], devices[1]); + Space grid(gridSize[0], gridSize[1]); + Space endless(periodic[0], periodic[1]); uint32_t ruleMask = 0; - size_t strLen = rule.length( ); - size_t gPoint = rule.find( '/' ); - std::string stayAliveIf = rule.substr( 0, gPoint ); - std::string newBornIf = rule.substr( gPoint + 1, strLen - gPoint - 1 ); + size_t strLen = rule.length(); + size_t gPoint = rule.find('/'); + std::string stayAliveIf = rule.substr(0, gPoint); + std::string newBornIf = rule.substr(gPoint + 1, strLen - gPoint - 1); - for ( unsigned int i = 0; i < newBornIf.length( ); ++i ) + for(unsigned int i = 0; i < newBornIf.length(); ++i) { - std::stringstream ss; /* used for converting const char* "123" to int 123 */ + std::stringstream ss; /* used for converting const char* "123" to int 123 */ ss << newBornIf[i]; int shift; ss >> shift; - ruleMask = ruleMask | 1 << ( shift + 9 ); + ruleMask = ruleMask | 1 << (shift + 9); } - for ( unsigned int i = 0; i < stayAliveIf.length( ); ++i ) + for(unsigned int i = 0; i < stayAliveIf.length(); ++i) { std::stringstream ss; ss << stayAliveIf[i]; int shift; ss >> shift; - ruleMask = ruleMask | 1 << ( shift ); + ruleMask = ruleMask | 1 << (shift); } std::cout << "newborn if=" << newBornIf << " stay alive if=" << stayAliveIf << " mask=" << ruleMask << std::endl; /* start game of life simulation */ - gol::Simulation sim( ruleMask, steps, grid, gpus, endless ); - sim.init( ); - sim.start( ); - sim.finalize( ); + gol::Simulation sim(ruleMask, steps, grid, gpus, endless); + sim.init(); + sim.start(); + sim.finalize(); /* finalize the pmacc context */ pmacc::Environment<>::get().finalize(); diff --git a/share/pmacc/examples/gameOfLife2D/submit/1.cfg b/share/pmacc/examples/gameOfLife2D/submit/1.cfg index 6a545b0238..6500c6065c 100644 --- a/share/pmacc/examples/gameOfLife2D/submit/1.cfg +++ b/share/pmacc/examples/gameOfLife2D/submit/1.cfg @@ -1,5 +1,5 @@ # -# Copyright 2013-2020 Rene Widera +# Copyright 2013-2021 Rene Widera # # This file is part of PMacc. # diff --git a/share/pmacc/examples/gameOfLife2D/submit/2.cfg b/share/pmacc/examples/gameOfLife2D/submit/2.cfg index 0fc18f13af..f957c75d46 100644 --- a/share/pmacc/examples/gameOfLife2D/submit/2.cfg +++ b/share/pmacc/examples/gameOfLife2D/submit/2.cfg @@ -1,5 +1,5 @@ # -# Copyright 2013-2020 Rene Widera +# Copyright 2013-2021 Rene Widera # # This file is part of PMacc. # diff --git a/share/pmacc/examples/gameOfLife2D/submit/4.cfg b/share/pmacc/examples/gameOfLife2D/submit/4.cfg index 0cdca74ee2..d4832ad50d 100644 --- a/share/pmacc/examples/gameOfLife2D/submit/4.cfg +++ b/share/pmacc/examples/gameOfLife2D/submit/4.cfg @@ -1,5 +1,5 @@ # -# Copyright 2013-2020 Rene Widera +# Copyright 2013-2021 Rene Widera # # This file is part of PMacc. # diff --git a/share/pmacc/examples/gameOfLife2D/submit/bash/bash_mpiexec.tpl b/share/pmacc/examples/gameOfLife2D/submit/bash/bash_mpiexec.tpl index 7611ba9f2e..c3f3011a3d 100644 --- a/share/pmacc/examples/gameOfLife2D/submit/bash/bash_mpiexec.tpl +++ b/share/pmacc/examples/gameOfLife2D/submit/bash/bash_mpiexec.tpl @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Rene Widera, Axel Huebl +# Copyright 2013-2021 Rene Widera, Axel Huebl # # This file is part of PMacc. # diff --git a/share/pmacc/examples/gameOfLife2D/submit/bash/bash_mpirun.tpl b/share/pmacc/examples/gameOfLife2D/submit/bash/bash_mpirun.tpl index 224cf6ad93..6e62bf7c08 100644 --- a/share/pmacc/examples/gameOfLife2D/submit/bash/bash_mpirun.tpl +++ b/share/pmacc/examples/gameOfLife2D/submit/bash/bash_mpirun.tpl @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Rene Widera, Axel Huebl +# Copyright 2013-2021 Rene Widera, Axel Huebl # # This file is part of PMacc. # diff --git a/src/tools/bin/BinEnergyPlot.sh b/src/tools/bin/BinEnergyPlot.sh index 3c9333ddba..ed355740d6 100755 --- a/src/tools/bin/BinEnergyPlot.sh +++ b/src/tools/bin/BinEnergyPlot.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera, Richard Pausch +# Copyright 2013-2021 Axel Huebl, Rene Widera, Richard Pausch # # This file is part of PIConGPU. # diff --git a/src/tools/bin/addLicense b/src/tools/bin/addLicense index 7b7e1faec2..48cc9ce611 100755 --- a/src/tools/bin/addLicense +++ b/src/tools/bin/addLicense @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # diff --git a/src/tools/bin/create.sh b/src/tools/bin/create.sh index 0902891dc7..c0fadea8cb 100755 --- a/src/tools/bin/create.sh +++ b/src/tools/bin/create.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # diff --git a/src/tools/bin/findAndDo b/src/tools/bin/findAndDo index dfb71e54a4..3e9c26ffbc 100755 --- a/src/tools/bin/findAndDo +++ b/src/tools/bin/findAndDo @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # @@ -23,7 +23,7 @@ # $2 = filename pattern # $3 = programm to call: programmName filename -#example call: for i in `echo "*.def *.h *.cpp *.cu *.hpp *.tpp *.kernel *.loader *.param *.unitless"` ; do findAndDo include/pmacc/ "$i" deleteHeadComment ; done +#example call: for i in `echo "-iname *.def -iname *.h -iname *.cpp -iname *.cu -iname *.hpp -iname *.tpp -iname *.kernel -iname *.loader -iname *.param -iname *.unitless"` ; do findAndDo include/pmacc/ "$i" deleteHeadComment ; done find $1 -name "$2" -type f | grep -v "\.svn" | grep -v "\.git" | \ xargs -n1 -P8 -I{} $3 {} diff --git a/src/tools/bin/newVersion.sh b/src/tools/bin/newVersion.sh index b09481494d..5f543c62c0 100755 --- a/src/tools/bin/newVersion.sh +++ b/src/tools/bin/newVersion.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2017-2020 Axel Huebl +# Copyright 2017-2021 Axel Huebl # # This file is part of PIConGPU. # @@ -131,16 +131,16 @@ sed -i 's/'\ sed -i 's/'\ 'picongpu@[0-9]\+\.[0-9]\+\.[0-9]\+\(-.\+\)*/'\ 'picongpu@'$VERSION_STR'/g' \ - $REPO_DIR/share/picongpu/dockerfiles/ubuntu-1604/Dockerfile + $REPO_DIR/share/picongpu/dockerfiles/ubuntu-2004/Dockerfile sed -i 's/'\ '\/picongpu:[0-9]\+\.[0-9]\+\.[0-9]\+\(-.\+\)*/'\ '\/picongpu:'$VERSION_STR'/g' \ - $REPO_DIR/share/picongpu/dockerfiles/ubuntu-1604/Singularity + $REPO_DIR/share/picongpu/dockerfiles/ubuntu-2004/Singularity sed -i 's/'\ 'Version [0-9]\+\.[0-9]\+\.[0-9]\+\(-.\+\)*/'\ 'Version '$VERSION_STR'/g' \ - $REPO_DIR/share/picongpu/dockerfiles/ubuntu-1604/Singularity + $REPO_DIR/share/picongpu/dockerfiles/ubuntu-2004/Singularity # @todo `project(...)` version in CMakeLists.txt (future) diff --git a/src/tools/bin/nextstep_from_period.sh b/src/tools/bin/nextstep_from_period.sh index 6da5cd0a74..52c4bffc76 100755 --- a/src/tools/bin/nextstep_from_period.sh +++ b/src/tools/bin/nextstep_from_period.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2017-2020 Axel Huebl, Ilja Goethel +# Copyright 2017-2021 Axel Huebl, Ilja Goethel # # This file is part of PIConGPU. # diff --git a/src/tools/bin/pic2xdmf.py b/src/tools/bin/pic2xdmf.py index 24eca75f55..398e9a9fb2 100755 --- a/src/tools/bin/pic2xdmf.py +++ b/src/tools/bin/pic2xdmf.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # -# Copyright 2014-2020 Felix Schmitt, Conrad Schumann +# Copyright 2014-2021 Felix Schmitt, Conrad Schumann # # This file is part of PIConGPU. # diff --git a/src/tools/bin/plotIntensity b/src/tools/bin/plotIntensity index c8b5c53edf..b9231409a0 100755 --- a/src/tools/bin/plotIntensity +++ b/src/tools/bin/plotIntensity @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # diff --git a/src/tools/bin/plotNumericalHeating b/src/tools/bin/plotNumericalHeating index 5064aef21c..eef43b3208 100755 --- a/src/tools/bin/plotNumericalHeating +++ b/src/tools/bin/plotNumericalHeating @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2015-2020 Richard Pausch +# Copyright 2015-2021 Richard Pausch # # This file is part of PIConGPU. # @@ -119,7 +119,7 @@ for sim in directories: mydir = sim+simDir # get relevant files with energy files = [f for f in os.listdir(mydir) - if os.path.isfile(os.path.join(mydir, f)) and (re.search('^.*_energy_all.dat', f) or re.search('^.fields_energy.dat', f))] + if os.path.isfile(os.path.join(mydir, f)) and (re.search('^.*_energy_all.dat', f) or re.search('^fields_energy.dat', f))] # check if file list is empty if len(files) == 0: sys.exit("There were no energy files in \"{}\".".format(mydir)) diff --git a/src/tools/bin/plotRadiation b/src/tools/bin/plotRadiation index 2a45df727e..f940a6ee01 100755 --- a/src/tools/bin/plotRadiation +++ b/src/tools/bin/plotRadiation @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2013-2020 Richard Pausch +# Copyright 2013-2021 Richard Pausch # # This file is part of PIConGPU. # diff --git a/src/tools/bin/plotSumEnergyRange b/src/tools/bin/plotSumEnergyRange index baea76f133..89c5c46cf1 100755 --- a/src/tools/bin/plotSumEnergyRange +++ b/src/tools/bin/plotSumEnergyRange @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl, Rene Widera +# Copyright 2013-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # diff --git a/src/tools/bin/plot_chargeConservation.py b/src/tools/bin/plot_chargeConservation.py index 1ceed9965a..db21828e84 100755 --- a/src/tools/bin/plot_chargeConservation.py +++ b/src/tools/bin/plot_chargeConservation.py @@ -1,6 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -# Copyright 2015-2020 Richard Pausch +# Copyright 2015-2021 Richard Pausch # # This file is part of PIConGPU. # @@ -20,14 +20,13 @@ # import argparse -import os import numpy as np -import h5py import matplotlib.pyplot as plt +import openpmd_api as io __doc__ = ''' This program reads electric field and charge density data -from hdf5 files created by PIConGPU and checks charge conservation +from openPMD files created by PIConGPU and checks charge conservation for the Yee scheme. Three slice plots show the error in $div(E) - rho/epsilon_0$ @@ -52,55 +51,64 @@ def set_colorbar(cb): t.set_fontsize(16) -def plotError(h5file, slice_pos=[0.5, 0.5, 0.5]): +def plotError(file_pattern, slice_pos=[0.5, 0.5, 0.5], timestep=-1): """ - read field data from hdf5 files + read field data from an openPMD file compute div(E) - rho/epsilon_0 plot slices through simulation volume Parameters: - h5file: file name - file name to hdf5 data set from PIConGPU + file_pattern: file name + openPMD file series pattern e.g. simData_%%T.bp slice_pos: list of floats list of 3 floats to define slice position [0, 1] Default=[0.5, 0.5, 0.5] + + timestep: selected timestep + simulation step used if file is an + openPMD file series pattern e.g. simData_%%T.bp """ - # load hdf5 file - f = h5py.File(h5file, "r") + # load file + series = io.Series(file_pattern, io.Access.read_only) + + # read time step + if timestep == -1: + *_, timestep = series.iterations - # read time step (python 2 and 3 save) - timestep = -1 - for i in f['/data'].keys(): - timestep = i + f = series.iterations[timestep] # load physics constants and simulation parameters - EPS0 = f["/data/{}".format(timestep)].attrs["eps0"] - CELL_WIDTH = f["/data/{}".format(timestep)].attrs["cell_width"] - CELL_HEIGHT = f["/data/{}".format(timestep)].attrs["cell_height"] - CELL_DEPTH = f["/data/{}".format(timestep)].attrs["cell_depth"] + EPS0 = f.get_attribute("eps0") + CELL_WIDTH = f.get_attribute("cell_width") + CELL_HEIGHT = f.get_attribute("cell_height") + CELL_DEPTH = f.get_attribute("cell_depth") # load electric field - Ex = np.array(f["/data/{}/fields/E/x".format(timestep)]) - Ey = np.array(f["/data/{}/fields/E/y".format(timestep)]) - Ez = np.array(f["/data/{}/fields/E/z".format(timestep)]) + Ex = f.meshes["E"]["x"][:] + Ey = f.meshes["E"]["y"][:] + Ez = f.meshes["E"]["z"][:] + + series.flush() # load and add charge density charge = np.zeros_like(Ex) norm = 0.0 - for field_name in f["/data/{}/fields/".format(timestep)].keys(): - if field_name[-14:] == "_chargeDensity": + + for fieldName in f.meshes: + search_pattern = "_chargeDensity" + if fieldName[-len(search_pattern):] == search_pattern: # load species density - species_Density = np.array( - f["/data/{}/fields/".format(timestep) + field_name] - ) + species_Density = \ + f.meshes[fieldName][io.Mesh_Record_Component.SCALAR][:] + series.flush() # choose norm to be the maximal charge density of all species norm = np.max([norm, np.amax(np.abs(species_Density))]) # add charge density to total charge density charge += species_Density - # close hdf5 file - f.close() + # close file + del series # compute divergence of electric field according to Yee scheme div = ((Ex[1:, 1:, 1:] - Ex[1:, 1:, :-1]) / CELL_WIDTH + @@ -116,7 +124,7 @@ def plotError(h5file, slice_pos=[0.5, 0.5, 0.5]): plt.figure(figsize=(14, 5)) plt.subplot(131) - slice_cell_z = np.int(np.floor((diff.shape[0]-1) * slice_pos[0])) + slice_cell_z = np.int(np.floor((diff.shape[0] - 1) * slice_pos[0])) plt.title("slice in z at {}".format(slice_cell_z), fontsize=20) plt.imshow(diff[slice_cell_z, :, :], vmin=-limit, vmax=+limit, @@ -132,7 +140,7 @@ def plotError(h5file, slice_pos=[0.5, 0.5, 0.5]): ) plt.subplot(132) - slice_cell_y = np.int(np.floor((diff.shape[1]-1) * slice_pos[1])) + slice_cell_y = np.int(np.floor((diff.shape[1] - 1) * slice_pos[1])) plt.title("slice in y at {}".format(slice_cell_y), fontsize=20) plt.imshow(diff[:, slice_cell_y, :], vmin=-limit, vmax=+limit, @@ -148,7 +156,7 @@ def plotError(h5file, slice_pos=[0.5, 0.5, 0.5]): ) plt.subplot(133) - slice_cell_x = np.int(np.floor((diff.shape[2]-1) * slice_pos[2])) + slice_cell_x = np.int(np.floor((diff.shape[2] - 1) * slice_pos[2])) plt.title("slice in x at {}".format(slice_cell_x), fontsize=20) plt.imshow(diff[:, :, slice_cell_x], vmin=-limit, vmax=+limit, @@ -176,15 +184,24 @@ def plotError(h5file, slice_pos=[0.5, 0.5, 0.5]): parser = argparse.ArgumentParser( description=__doc__, epilog='For further questions please contact Richard Pausch.' - ) + ) - parser.add_argument(metavar="hdf5 file", - dest="h5file_name", - help='hdf5 file with PIConGPU data', + parser.add_argument(metavar="openPMD file name", + dest="filename", + help='openPMD file or series pattern ' + 'with PIConGPU data', action='store', type=str) - parser.add_argument("--x", + parser.add_argument("-t", + dest="selected_timestep", + help='simulation step used if file is an ' + 'openPMD file series pattern e.g. simData_%%T.bp', + action='store', + default=-1, + type=int) + + parser.add_argument("-x", dest="x_split", action='store', default=0.5, @@ -192,7 +209,7 @@ def plotError(h5file, slice_pos=[0.5, 0.5, 0.5]): help='float value between [0,1] to set slice ' + 'position in x (default = 0.5)') - parser.add_argument("--y", + parser.add_argument("-y", dest="y_split", action='store', default=0.5, @@ -200,7 +217,7 @@ def plotError(h5file, slice_pos=[0.5, 0.5, 0.5]): help='float value between [0,1] to set slice ' + 'position in y (default = 0.5)') - parser.add_argument("--z", + parser.add_argument("-z", dest="z_split", action='store', default=0.5, @@ -222,7 +239,5 @@ def plotError(h5file, slice_pos=[0.5, 0.5, 0.5]): args.x_split], 0, 1) - if os.path.isfile(args.h5file_name): - plotError(args.h5file_name, slice_pos=slice_pos) - else: - print("ERROR: {} is not a file".format(args.h5file_name)) + plotError(args.filename, slice_pos=slice_pos, + timestep=args.selected_timestep) diff --git a/src/tools/bin/plot_chargeConservation_overTime.py b/src/tools/bin/plot_chargeConservation_overTime.py index 03471e9d8c..6f9225809a 100755 --- a/src/tools/bin/plot_chargeConservation_overTime.py +++ b/src/tools/bin/plot_chargeConservation_overTime.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2015-2020 Richard Pausch, Axel Huebl +# Copyright 2015-2021 Richard Pausch, Axel Huebl, Rene Widera # # This file is part of PIConGPU. # @@ -20,20 +20,17 @@ # # import system interface modules -import os -import re -import sys import argparse # import data analysis and plotting modules import numpy as np -import h5py import matplotlib.pyplot as plt from matplotlib.ticker import LinearLocator, FormatStrFormatter +import openpmd_api as io __doc__ = """ This program reads electric field and charge density data -from all hdf5 files created by a PIConGPU simulation and +from all openPMD files created by a PIConGPU simulation and plots a variety of values to check charge conservation over time. @@ -41,74 +38,47 @@ normalized to the maximum [per-species] charge in the first simulation time step. -Developer: Richard Pausch +Developer: Richard Pausch, Rene Widera """ -def get_list_of_hdf5_files(base_directory): +def deviation_charge_conservation(series, iteration): """ - Returns a list of hdf5 files (`*_.h5`) - listed in sub-directory `simOutput/h5/` - - Parameters: - base_directory: string - directory path where to find simOutput/h5/ - - Return: - list of strings with hdf5 file names found - """ - h5_list = [] # empty list for hdf5 files - h5_dir = base_directory + "/simOutput/h5/" - if not os.path.isdir(h5_dir): - raise Exception(("Error: {} does not contain" + - " a simOutput/h5/ directory").format(directory)) - - for filename in os.listdir(h5_dir): - if os.path.isfile(h5_dir+filename): - if re.search(r".+_[0-9]+\.h5", filename): - h5_list.append(h5_dir + filename) - return h5_list - - -def deviation_charge_conservation(h5file): - """ - read field data from hdf5 files + read field data from openPMD file compute d = div(E)*epsilon_0 - rho Parameters: - h5file: file name - file name and path to hdf5 data from PIConGPU + series: file name + openPMD file series pattern e.g. simData_%%T.bp + + iteration: + openPMD iteration object Return: - list of floats: [timestep, max(abs(d)), - mean(abs(d)), std(d), norm] + list of floats: [max(abs(d)), mean(abs(d)), std(d), norm] """ - # load hdf5 file - f = h5py.File(h5file, "r") - - # read time step (python 2 and 3 save) - timestep = -1 - for i in f["/data"].keys(): - timestep = i # load physics constants and simulation parameters - EPS0 = f["/data/{}".format(timestep)].attrs["eps0"] + EPS0 = iteration.get_attribute("eps0") is2D = False # load electric field - Ex = np.array(f["/data/{}/fields/E/x".format(timestep)]) - Ey = np.array(f["/data/{}/fields/E/y".format(timestep)]) - Ez = np.array(f["/data/{}/fields/E/z".format(timestep)]) + Ex = iteration.meshes["E"]["x"][:] + Ey = iteration.meshes["E"]["y"][:] + Ez = iteration.meshes["E"]["z"][:] + + series.flush() # load and add charge density charge = np.zeros_like(Ex) norm = 0.0 - for field_name in f["/data/{}/fields/".format(timestep)].keys(): - if field_name[-14:] == "_chargeDensity": + for fieldName in iteration.meshes: + if fieldName[-14:] == "_chargeDensity": + # load species density # load species density - species_Density_pointer = f["/data/{}/fields/".format(timestep) + - field_name] - species_Density = np.array(species_Density_pointer) + species_Density = \ + iteration.meshes[fieldName][io.Mesh_Record_Component.SCALAR][:] + series.flush() # choose norm to be the maximal charge density of all species norm = np.max([norm, np.amax(np.abs(species_Density))]) # add charge density to total charge density @@ -119,16 +89,16 @@ def deviation_charge_conservation(h5file): # a 2D simulation, the size of the z or [2]-component is 1, which # is <2. The code changes the 2D3D flag if one Density data set is # 2D. - if species_Density_pointer.attrs['_size'][2] < 2: + if species_Density.ndim == 2: is2D = True # load cell size and compute cell volume - CELL_WIDTH = f["/data/{}".format(timestep)].attrs["cell_width"] - CELL_HEIGHT = f["/data/{}".format(timestep)].attrs["cell_height"] - CELL_DEPTH = f["/data/{}".format(timestep)].attrs["cell_depth"] + CELL_WIDTH = iteration.get_attribute("cell_width") + CELL_HEIGHT = iteration.get_attribute("cell_height") + CELL_DEPTH = iteration.get_attribute("cell_depth") - # close hdf5 file - f.close() + # close iteration + iteration.close() if is2D: # compute divergence of electric field according to Yee scheme @@ -149,7 +119,7 @@ def deviation_charge_conservation(h5file): # density diff = (div * EPS0 - charge[1:, 1:, 1:]) - return float(timestep), np.amax(np.abs(diff)), np.mean(np.abs(diff)), \ + return np.amax(np.abs(diff)), np.mean(np.abs(diff)), \ np.std(diff), norm @@ -160,13 +130,29 @@ def deviation_charge_conservation(h5file): parser = argparse.ArgumentParser( description=__doc__, epilog="For further questions please contact Richard Pausch." - ) + ) + + parser.add_argument("--start", + dest="start_timestep", + help='first timstep', + action='store', + default=0, + type=int) + + parser.add_argument("--last", + dest="last_timestep", + help='last timstep', + action='store', + default=-1, + type=int) parser.add_argument(metavar="simulation directories", - dest="directories", - help="simulation base directories", + dest="file_pattern", + help="openPMD series pattern with PIConGPU " + "data e.g. simData_%%T.bp", action="store", - nargs="+") + nargs="+" + ) parser.add_argument("--export", metavar="file name", @@ -176,7 +162,7 @@ def deviation_charge_conservation(h5file): "(disable interactive window)") args = parser.parse_args() - directories = args.directories + file_patterns = args.file_pattern # prepare plot of data plt.figure(figsize=(10, 5)) @@ -214,35 +200,26 @@ def deviation_charge_conservation(h5file): # underscore labels) sim_dir_counter = 1 - for directory in directories: - # do the data reading and catch errors - try: - # test if directory is a directory - if not os.path.isdir(directory): - raise Exception("Error: {} is not a directory".format( - directory)) - - # check if any hdf5 files were found - h5_file_list = get_list_of_hdf5_files(directory) - if len(h5_file_list) == 0: - raise Exception("No hdf5 files found in {}".format( - directory + "simOutput/h5/")) - - except Exception as error_msg: - print("{}".format(error_msg)) - sys.exit(1) - - # collect data from all found hdf5 files + for pattern in file_patterns: + series = io.Series(pattern, io.Access.read_only) + + first_step = args.start_timestep + last_step = args.last_timestep + collect_results = None - print("Read files:") - for f in h5_file_list: - print(f) - t, cc_max, mean_abs, std, norm = deviation_charge_conservation(f) - data_tmp = np.array([[t, cc_max, mean_abs, std, norm]]) - if collect_results is None: - collect_results = data_tmp - else: - collect_results = np.append(collect_results, data_tmp, axis=0) + + for iteration in series.iterations: + if (iteration >= first_step and + (iteration <= last_step or last_step == -1)): + print("load iteration {:d}".format(iteration)) + cc_max, mean_abs, std, norm = deviation_charge_conservation( + series, series.iterations[iteration]) + data_tmp = np.array([[iteration, cc_max, mean_abs, std, norm]]) + if collect_results is None: + collect_results = data_tmp + else: + collect_results = np.append( + collect_results, data_tmp, axis=0) # sort data temporally collect_results = np.sort(collect_results, axis=0) @@ -255,19 +232,20 @@ def deviation_charge_conservation(h5file): norm = collect_results[0, 4] # first (t=0) norm # generate plot label based on directory and avoid underscore bug - plot_label = ("{:d}. ".format(sim_dir_counter) + - os.path.normpath(directory).split("/")[-1]) + plot_label = ("{:s}".format(pattern)) sim_dir_counter += 1 # add plot for maximum difference - ax1.plot(t, max_diff/norm, + ax1.plot(t, max_diff / norm, linestyle="-", lw=3, marker="+", ms=15, markeredgewidth=3, label=plot_label) # add plot for mean difference and std - ax2.errorbar(t, mean_abs/norm, yerr=std/norm, lw=3, markeredgewidth=3, - label=plot_label) + ax2.errorbar(t, mean_abs / norm, yerr=std / norm, lw=3, + markeredgewidth=3, label=plot_label) + + del series # finish plots ax1.legend(loc=0) diff --git a/src/tools/bin/png2video.sh b/src/tools/bin/png2video.sh index 18be27f3c2..f970fd8427 100755 --- a/src/tools/bin/png2video.sh +++ b/src/tools/bin/png2video.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Rene Widera +# Copyright 2013-2021 Rene Widera # # This file is part of PIConGPU. # diff --git a/src/tools/bin/position2Trace.sh b/src/tools/bin/position2Trace.sh index e50e2957f9..63d7398f79 100755 --- a/src/tools/bin/position2Trace.sh +++ b/src/tools/bin/position2Trace.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Rene Widera, Richard Pausch +# Copyright 2013-2021 Rene Widera, Richard Pausch # # This file is part of PIConGPU. # diff --git a/src/tools/bin/printField.py b/src/tools/bin/printField.py index bcab59a55e..75ee79f5b2 100755 --- a/src/tools/bin/printField.py +++ b/src/tools/bin/printField.py @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2013-2020 Richard Pausch +# Copyright 2013-2021 Richard Pausch # # This file is part of PIConGPU. # diff --git a/src/tools/bin/radiationSyntheticDetector b/src/tools/bin/radiationSyntheticDetector index cdf60d8d29..8a2221e520 100755 --- a/src/tools/bin/radiationSyntheticDetector +++ b/src/tools/bin/radiationSyntheticDetector @@ -1,6 +1,6 @@ #!/usr/bin/env python # -# Copyright 2013-2020 Richard Pausch +# Copyright 2013-2021 Richard Pausch # # This file is part of PIConGPU. # diff --git a/src/tools/bin/smooth.py b/src/tools/bin/smooth.py index 84737b1bc3..d579417286 100644 --- a/src/tools/bin/smooth.py +++ b/src/tools/bin/smooth.py @@ -1,5 +1,5 @@ # -# Copyright 2013-2020 Richard Pausch +# Copyright 2013-2021 Richard Pausch # # This file is part of PIConGPU. # diff --git a/src/tools/bin/splash2vtk.sh b/src/tools/bin/splash2vtk.sh index 41c68b612d..499c31e2be 100755 --- a/src/tools/bin/splash2vtk.sh +++ b/src/tools/bin/splash2vtk.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Axel Huebl +# Copyright 2013-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/src/tools/bin/transpose b/src/tools/bin/transpose index 48e11696bf..aba8741c61 100755 --- a/src/tools/bin/transpose +++ b/src/tools/bin/transpose @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2013-2020 Rene Widera +# Copyright 2013-2021 Rene Widera # # This file is part of PIConGPU. # diff --git a/src/tools/bin/uncrustifyMyCode b/src/tools/bin/uncrustifyMyCode index 957045f920..5dc1b664dc 100755 --- a/src/tools/bin/uncrustifyMyCode +++ b/src/tools/bin/uncrustifyMyCode @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Copyright 2015-2020 Rene Widera +# Copyright 2015-2021 Rene Widera # # This file is part of PIConGPU. # diff --git a/src/tools/png2gas/CMakeLists.txt b/src/tools/png2gas/CMakeLists.txt index 180255cf49..40a4dbe129 100644 --- a/src/tools/png2gas/CMakeLists.txt +++ b/src/tools/png2gas/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright 2014-2020 Axel Huebl, Benjamin Schneider, Felix Schmitt, Heiko Burau, Rene Widera +# Copyright 2014-2021 Axel Huebl, Benjamin Schneider, Felix Schmitt, Heiko Burau, Rene Widera # # This file is part of PIConGPU. # @@ -22,7 +22,7 @@ # Required cmake version ################################################################################ -cmake_minimum_required(VERSION 3.11.4) +cmake_minimum_required(VERSION 3.15.0) ################################################################################ @@ -68,10 +68,10 @@ endif() # Language Flags ############################################################################### -# enforce C++11 +# enforce C++14 set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) ################################################################################ diff --git a/src/tools/png2gas/png2gas.cpp b/src/tools/png2gas/png2gas.cpp index 7faaedb792..003264b80d 100644 --- a/src/tools/png2gas/png2gas.cpp +++ b/src/tools/png2gas/png2gas.cpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Felix Schmitt, Axel Huebl, Richard Pausch +/* Copyright 2014-2021 Felix Schmitt, Axel Huebl, Richard Pausch * * This file is part of PIConGPU. * diff --git a/src/tools/share/awk/BinEnergyPlot.awk b/src/tools/share/awk/BinEnergyPlot.awk index 3b7c9f6b4c..85af25d60f 100644 --- a/src/tools/share/awk/BinEnergyPlot.awk +++ b/src/tools/share/awk/BinEnergyPlot.awk @@ -1,5 +1,5 @@ # -# Copyright 2013-2020 Rene Widera +# Copyright 2013-2021 Rene Widera # # This file is part of PIConGPU. # diff --git a/src/tools/share/awk/SumEnergyRange.awk b/src/tools/share/awk/SumEnergyRange.awk index f80fe8a473..3ae077ff7d 100644 --- a/src/tools/share/awk/SumEnergyRange.awk +++ b/src/tools/share/awk/SumEnergyRange.awk @@ -1,5 +1,5 @@ # -# Copyright 2013-2020 Rene Widera +# Copyright 2013-2021 Rene Widera # # This file is part of PIConGPU. # diff --git a/src/tools/share/gnuplot/BinEnergyPlot.gnuplot b/src/tools/share/gnuplot/BinEnergyPlot.gnuplot index 0a3718c78b..38861cf073 100644 --- a/src/tools/share/gnuplot/BinEnergyPlot.gnuplot +++ b/src/tools/share/gnuplot/BinEnergyPlot.gnuplot @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Axel Huebl, Richard Pausch +# Copyright 2013-2021 Axel Huebl, Richard Pausch # # This file is part of PIConGPU. # diff --git a/src/tools/splash2txt/CMakeLists.txt b/src/tools/splash2txt/CMakeLists.txt index 64fdffd4f8..1a2d4ad715 100644 --- a/src/tools/splash2txt/CMakeLists.txt +++ b/src/tools/splash2txt/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2013-2020 Felix Schmitt, Axel Huebl, Rene Widera +# Copyright 2013-2021 Felix Schmitt, Axel Huebl, Rene Widera # # This file is part of splash2txt. # @@ -22,7 +22,7 @@ # Required cmake version ################################################################################ -cmake_minimum_required(VERSION 3.11.4) +cmake_minimum_required(VERSION 3.15.0) ################################################################################ @@ -62,10 +62,10 @@ endif() # Language Flags ############################################################################### -# enforce C++11 +# enforce C++14 set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD 14) ################################################################################ diff --git a/src/tools/splash2txt/include/ITools.hpp b/src/tools/splash2txt/include/ITools.hpp index e469919c37..1f9a671575 100644 --- a/src/tools/splash2txt/include/ITools.hpp +++ b/src/tools/splash2txt/include/ITools.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt +/* Copyright 2013-2021 Felix Schmitt * * This file is part of splash2txt. * diff --git a/src/tools/splash2txt/include/splash2txt.hpp b/src/tools/splash2txt/include/splash2txt.hpp index 1f184af073..8c91084377 100644 --- a/src/tools/splash2txt/include/splash2txt.hpp +++ b/src/tools/splash2txt/include/splash2txt.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Axel Huebl, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Axel Huebl, Rene Widera * * This file is part of splash2txt. * diff --git a/src/tools/splash2txt/include/tools_adios_parallel.hpp b/src/tools/splash2txt/include/tools_adios_parallel.hpp index b7d3b96365..f55e9b67af 100644 --- a/src/tools/splash2txt/include/tools_adios_parallel.hpp +++ b/src/tools/splash2txt/include/tools_adios_parallel.hpp @@ -1,5 +1,5 @@ /* - *Copyright 2014-2020 Felix Schmitt, Conrad Schumann + *Copyright 2014-2021 Felix Schmitt, Conrad Schumann * * This file is part of splash2txt. * diff --git a/src/tools/splash2txt/include/tools_splash_parallel.hpp b/src/tools/splash2txt/include/tools_splash_parallel.hpp index 5bbeab3139..b665b039da 100644 --- a/src/tools/splash2txt/include/tools_splash_parallel.hpp +++ b/src/tools/splash2txt/include/tools_splash_parallel.hpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt +/* Copyright 2013-2021 Felix Schmitt * * This file is part of splash2txt. * diff --git a/src/tools/splash2txt/splash2txt.cpp b/src/tools/splash2txt/splash2txt.cpp index 2c5cbbd7ec..f5a883077f 100644 --- a/src/tools/splash2txt/splash2txt.cpp +++ b/src/tools/splash2txt/splash2txt.cpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Axel Huebl, Rene Widera, +/* Copyright 2013-2021 Felix Schmitt, Axel Huebl, Rene Widera, * Alexander Grund * * This file is part of splash2txt. diff --git a/src/tools/splash2txt/tools_adios_parallel.cpp b/src/tools/splash2txt/tools_adios_parallel.cpp index c18e2da1d5..1fb08fecf1 100644 --- a/src/tools/splash2txt/tools_adios_parallel.cpp +++ b/src/tools/splash2txt/tools_adios_parallel.cpp @@ -1,4 +1,4 @@ -/* Copyright 2014-2020 Felix Schmitt, Conrad Schumann, Axel Huebl +/* Copyright 2014-2021 Felix Schmitt, Conrad Schumann, Axel Huebl * * This file is part of splash2txt. * diff --git a/src/tools/splash2txt/tools_splash_parallel.cpp b/src/tools/splash2txt/tools_splash_parallel.cpp index d3e7fe02fc..21a3bef50e 100644 --- a/src/tools/splash2txt/tools_splash_parallel.cpp +++ b/src/tools/splash2txt/tools_splash_parallel.cpp @@ -1,4 +1,4 @@ -/* Copyright 2013-2020 Felix Schmitt, Axel Huebl, Rene Widera +/* Copyright 2013-2021 Felix Schmitt, Axel Huebl, Rene Widera * * This file is part of splash2txt. * diff --git a/test/correctBranchPR b/test/correctBranchPR index 29345834b9..38ead49cfc 100755 --- a/test/correctBranchPR +++ b/test/correctBranchPR @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2017-2020 Axel Huebl +# Copyright 2017-2021 Axel Huebl # # This file is part of PIConGPU. # @@ -22,55 +22,71 @@ # Disallow PRs to `ComputationalRadiationPhysics/picongpu` branch `master` # if not an other mainline branch such as `dev` or `release-...` # -# See: https://docs.travis-ci.com/user/environment-variables/ -# https://developer.github.com/v3/pulls/#get-a-single-pull-request -# # -> only enforced for `master` branch # -> only enforced for mainline repo (not for forks) # -# This file needs to be sourced in .travis.yml to work. +# dependencies: curl, python3 # -# @result 0 if correct target (or not in travis CI for mainline), else 1 +# @result 0 if correct target, else 1 # -# Are we even in travis? Otherwise pass this test. -if [ "$TRAVIS" != "true" ] +#!/bin/bash + +set -e +set -o pipefail + +cd $CI_PROJECT_DIR + +is_pr=$(echo "$CI_BUILD_REF_NAME" | grep -q "^pr-" && echo 0 || echo 1) +# merge only pull requests + +mainline_slug="ComputationalRadiationPhysics/picongpu" + +# only enforced for PRs +if [ $is_pr -eq 0 ] then - echo "Not in travis, so I have nothing to do :)" -else + github_group_repo="ComputationalRadiationPhysics/picongpu" - mainline_slug="ComputationalRadiationPhysics/picongpu" + pr_id=$(echo "$CI_BUILD_REF_NAME" | cut -d"/" -f1 | cut -d"-" -f2) + # used a token without any rights from psychocoderHPC to avoid API query limitations + curl_data=$(curl -u psychocoderHPC:$GITHUB_TOKEN -X GET https://api.github.com/repos/${github_group_repo}/pulls/${pr_id} 2>/dev/null) + echo "--- curl data ---" + echo "$curl_data" + echo "-----------------" + # get the destination branch + destination_branch=$(echo "$curl_data" | python3 -c 'import json,sys;obj=json.loads(sys.stdin.read());print(obj["base"]["ref"])') + echo "destination_branch=${destination_branch}" - # only enforced for PRs - if [ "$TRAVIS_EVENT_TYPE" == "pull_request" ] + # only enforced for `master` branch + if [ "$destination_branch" == "master" ] then - # only enforced for `master` branch - if [ "$TRAVIS_BRANCH" == "master" ] + repo_slug=$(echo "$curl_data" | python3 -c 'import json,sys;obj=json.loads(sys.stdin.read());print(obj["base"]["repo"]["full_name"])') + echo "repo_slug=${repo_slug}" + # only enforced for mainline repo (not for forks) + if [ "$repo_slug" == "$mainline_slug" ] then - # only enforced for mainline repo (not for forks) - if [ "$TRAVIS_REPO_SLUG" == "$mainline_slug" ] + pull_request_slug=$(echo "$curl_data" | python3 -c 'import json,sys;obj=json.loads(sys.stdin.read());print(obj["head"]["repo"]["full_name"])') + echo "pull_request_slug=${pull_request_slug}" + # origin repo is not our mainline? so it's a PR from a fork! + if [ "$pull_request_slug" != "$mainline_slug" ] then - # origin repo is not our mainline? so it's a PR from a fork! - if [ "$TRAVIS_PULL_REQUEST_SLUG" != "$mainline_slug" ] - then - # the PR came from a fork owned by the first part of the slug - pr_author=$(echo "$TRAVIS_PULL_REQUEST_SLUG" | awk -F "/" '{print $1}') - pr_branch=$TRAVIS_PULL_REQUEST_BRANCH - echo "" - echo "Pull request opened to wrong branch!" - echo "" - echo "New features need to go to our 'dev' branch but your" - echo "pull-request from '"$TRAVIS_PULL_REQUEST_SLUG"' was" - echo "sent to 'master' which is only updated by our" - echo "maintainers for new stable releases." - echo "" - echo "Please re-open your pull-request against our 'dev' branch:" - echo " https://github.com/ComputationalRadiationPhysics/picongpu/compare/dev...$pr_author:$pr_branch?expand=1" - echo "" - echo "For further information, please see:" - echo " https://github.com/ComputationalRadiationPhysics/picongpu/blob/dev/CONTRIBUTING.md" - exit 1 - fi + # the PR came from a fork + pr_label=$(echo "$curl_data" | python3 -c 'import json,sys;obj=json.loads(sys.stdin.read());print(obj["head"]["label"])') + echo "pr_label=${pr_label}" + echo "" + echo "Pull request opened to wrong branch!" + echo "" + echo "New features need to go to our 'dev' branch but your" + echo "pull-request from '"$pull_request_slug"' was" + echo "sent to 'master' which is only updated by our" + echo "maintainers for new stable releases." + echo "" + echo "Please re-open your pull-request against our 'dev' branch:" + echo " https://github.com/ComputationalRadiationPhysics/picongpu/compare/dev...$pr_label?expand=1" + echo "" + echo "For further information, please see:" + echo " https://github.com/ComputationalRadiationPhysics/picongpu/blob/dev/CONTRIBUTING.md" + exit 1 fi fi fi diff --git a/test/hasCudaGlobalKeyword b/test/hasCudaGlobalKeyword index cd1fe46a7c..3aa8e7a7ee 100755 --- a/test/hasCudaGlobalKeyword +++ b/test/hasCudaGlobalKeyword @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2016-2020 Rene Widera +# Copyright 2016-2021 Rene Widera # # This file is part of PIConGPU. # diff --git a/test/hasEOLwhiteSpace b/test/hasEOLwhiteSpace index e6483e1861..8c70c790ab 100755 --- a/test/hasEOLwhiteSpace +++ b/test/hasEOLwhiteSpace @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2016-2020 Axel Huebl, Rene Widera +# Copyright 2016-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # @@ -33,7 +33,7 @@ files=() pattern="\.def$|\.h$|\.cpp$|\.cu$|\.hpp$|\.tpp$|\.kernel$|\.loader$|"\ "\.param$|\.unitless$|\.sh$|\.bash$|\.cfg$|\.tpl$|\.conf$|"\ "\.awk$|\.gnuplot$|\.cmake$|\.profile$|\.example$|\.py$|"\ -"cmakeFlags|CMakeLists\.txt|src/tools/bin" +"cmakeFlags$|CMakeLists\.txt|src/tools/bin" for i in $(find . \ -not -path "./.git/*" \ diff --git a/test/hasExtLibIncludeBrackets b/test/hasExtLibIncludeBrackets index a7df3bd55b..a9a566af95 100755 --- a/test/hasExtLibIncludeBrackets +++ b/test/hasExtLibIncludeBrackets @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2016-2020 Axel Huebl, Rene Widera +# Copyright 2016-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # diff --git a/test/hasNonASCII b/test/hasNonASCII index 6d1fea619d..0117ed3220 100755 --- a/test/hasNonASCII +++ b/test/hasNonASCII @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2016-2020 Axel Huebl, Rene Widera +# Copyright 2016-2021 Axel Huebl, Rene Widera # # This file is part of PIConGPU. # diff --git a/test/hasSpaceBeforePrecompiler b/test/hasSpaceBeforePrecompiler index 1a86c66d66..080f6c5cb8 100755 --- a/test/hasSpaceBeforePrecompiler +++ b/test/hasSpaceBeforePrecompiler @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2016-2020 Axel Huebl +# Copyright 2016-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/test/hasTabs b/test/hasTabs index 9f3a4b5185..9f11234f2e 100755 --- a/test/hasTabs +++ b/test/hasTabs @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright 2016-2020 Axel Huebl +# Copyright 2016-2021 Axel Huebl # # This file is part of PIConGPU. # diff --git a/thirdParty/alpaka/.dockerignore b/thirdParty/alpaka/.dockerignore deleted file mode 100644 index 6b8710a711..0000000000 --- a/thirdParty/alpaka/.dockerignore +++ /dev/null @@ -1 +0,0 @@ -.git diff --git a/thirdParty/alpaka/.gitignore b/thirdParty/alpaka/.gitignore deleted file mode 100644 index 7e8b50b81b..0000000000 --- a/thirdParty/alpaka/.gitignore +++ /dev/null @@ -1,21 +0,0 @@ -/doc/doxygen/* -!/doc/doxygen/Doxyfile -!/doc/doxygen/alpaka_doxygen.png -/doc/latex/* -**/build - -# tmp files -*~ - -# netbeans project files -/nbproject/ - -# Code::Blocks project files -/*.cbp -/*.layout - -# original backup files -*.orig - -# VIM project files -.vimrc diff --git a/thirdParty/alpaka/.travis.yml b/thirdParty/alpaka/.travis.yml deleted file mode 100644 index a11d5a1ac3..0000000000 --- a/thirdParty/alpaka/.travis.yml +++ /dev/null @@ -1,382 +0,0 @@ -# -# Copyright 2015-2019 Benjamin Worpitz, Erik Zenker -# -# This file is part of Alpaka. -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# - -os: linux -dist: xenial -language: generic -services: - - docker - -################################################################################ -# NOTE: Testing the full matrix is not practical. -# Therefore we aim to have each value been set in at lest one job. -# CXX : {g++, clang++, cl.exe} -# [g++] ALPAKA_CI_GCC_VER : {4.9, 5, 6, 7, 8, 9} -# [clang++] ALPAKA_CI_CLANG_VER : {4.0.0, 5.0.2, 6.0.1, 7.0.1, 8.0.0, 9.0.0} -# ALPAKA_CI_STDLIB : {libstdc++, [CXX==clang++]:libc++} -# [clang++] ALPAKA_CI_CLANG_LIBSTDCPP_VERSION : {5, 7} -# CMAKE_BUILD_TYPE : {Debug, Release} -# ALPAKA_CI : {TRAVIS} -# ALPAKA_CI_DOCKER_BASE_IMAGE_NAME : {ubuntu:14.04, ubuntu:16.04, ubuntu:18.04} -# ALPAKA_CI_BOOST_BRANCH : {[CXX!=cl.exe&&OS!=osx]:boost-1.62.0, [CXX!=cl.exe&&OS!=osx]:boost-1.63.0, [OS!=osx]boost-1.64.0, boost-1.65.1, boost-1.66.0, boost-1.67.0, boost-1.68.0, boost-1.69.0, boost-1.70.0, boost-1.71.0} -# ALPAKA_CI_CMAKE_VER : {3.11.4, 3.12.4, 3.13.5, 3.14.7, 3.15.5, 3.16.0} -# ALPAKA_CI_SANITIZERS : {ASan, UBsan, TSan} -# TSan is not currently used because it produces many unexpected errors -# ALPAKA_CI_ANALYSIS : {ON, OFF} -# ALPAKA_DEBUG : {0, 1, 2} -# ALPAKA_ACC_GPU_CUDA_ONLY_MODE : {ON, OFF} -# ALPAKA_ACC_GPU_HIP_ONLY_MODE : {ON, OFF} -# ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE : {ON, OFF} -# ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE : {ON, OFF} -# ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE : {ON, OFF} -# ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE : {ON, OFF} -# [ON] OMP_NUM_THREADS : {1, 2, 3, 4} -# ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE : {ON, OFF} -# [ON] OMP_NUM_THREADS : {1, 2, 3, 4} -# ALPAKA_ACC_CPU_BT_OMP4_ENABLE : {ON, OFF} -# [ON] OMP_NUM_THREADS : {1, 2, 3, 4} -# ALPAKA_ACC_GPU_CUDA_ENABLE : {ON, OFF} -# [ON] ALPAKA_CUDA_VERSION : {8.0, 9.0, 9.1, 9.2, 10.0, 10.1, 10.2} -# [ON] ALPAKA_CUDA_COMPILER : {nvcc, [CXX==clang++]:clang} -# ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE : {ON, OFF} -# ALPAKA_ACC_GPU_HIP_ENABLE : {ON, OFF} -# [ON] ALPAKA_CI_HIP_BRANCH : {master} -# [ON] ALPAKA_HIP_PLATFORM : {nvcc} -env: - global: - - ALPAKA_CI=TRAVIS - - ALPAKA_CI_DOCKER_IMAGE_NAME=alpaka_ubuntu - - ALPAKA_CI_DOCKER_CACHE_DIR=${HOME}/cache/docker - - ALPAKA_CI_DOCKER_CACHE_IMAGE_FILE_PATH=${ALPAKA_CI_DOCKER_CACHE_DIR}/${ALPAKA_CI_DOCKER_IMAGE_NAME}.tar.gz - - BOOST_ROOT=${HOME}/boost - - ALPAKA_CI_BOOST_LIB_DIR=${HOME}/boost_libs/ - - ALPAKA_CI_CLANG_DIR=${HOME}/llvm - - ALPAKA_CI_CMAKE_DIR=${HOME}/CMake - - ALPAKA_CI_CUDA_DIR=${HOME}/CUDA - - ALPAKA_CI_HIP_ROOT_DIR=${HOME}/hip - - TBB_ROOT_DIR=${HOME}/tbb - - ALPAKA_CI_SANITIZERS= - - ALPAKA_CI_ANALYSIS=OFF - - ALPAKA_CI_CLANG_LIBSTDCPP_VERSION=5 - - ALPAKA_ACC_GPU_CUDA_ENABLE=OFF - - ALPAKA_ACC_GPU_HIP_ENABLE=OFF - -matrix: - include: - ### Analysis builds - - name: nvcc-9.1 + gcc-4.9 Debug Analysis - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:14.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=4.9 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_CI_ANALYSIS=ON ALPAKA_DEBUG=2 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.1 ALPAKA_CUDA_COMPILER=nvcc - - name: gcc-8 Debug Analysis - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=8 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.66.0 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_CI_ANALYSIS=ON ALPAKA_DEBUG=2 - - name: clang-4 + CUDA-8.0 Debug Analysis - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=4.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.15.5 ALPAKA_CI_ANALYSIS=ON ALPAKA_DEBUG=1 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=8.0 ALPAKA_CUDA_COMPILER=clang - - name: clang-6 Debug Analysis - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=6.0.1 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.68.0 ALPAKA_CI_CMAKE_VER=3.13.5 ALPAKA_CI_ANALYSIS=ON ALPAKA_DEBUG=2 - - name: macOS 10.14 Xcode 11.2 Debug Analysis - os: osx - osx_image: xcode11.2 - env: CXX=g++ CC=gcc CMAKE_BUILD_TYPE=Debug ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_ANALYSIS=ON ALPAKA_DEBUG=2 ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF - - name: MSVC-2017 Debug Analysis - os: windows - dist: 1803-containers - language: cpp - env: CXX=cl.exe CC=cl.exe CMAKE_BUILD_TYPE=Debug ALPAKA_CI_BOOST_BRANCH=boost-1.69.0 ALPAKA_CI_CMAKE_VER=3.13.5 ALPAKA_CI_ANALYSIS=ON ALPAKA_DEBUG=2 - - ### macOS - - name: macOS 10.14 Xcode 10.2.1 Debug - os: osx - osx_image: xcode10.2 - env: CXX=g++ CC=gcc CMAKE_BUILD_TYPE=Debug ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF ALPAKA_CXX_STANDARD=14 - - name: macOS 10.14 Xcode 10.2.1 Release - os: osx - osx_image: xcode10.2 - env: CXX=g++ CC=gcc CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF - - - name: macOS 10.14.4 Xcode 10.3 Debug - os: osx - osx_image: xcode10.3 - env: CXX=g++ CC=gcc CMAKE_BUILD_TYPE=Debug ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF ALPAKA_CXX_STANDARD=14 - - name: macOS 10.14.4 Xcode 10.3 Release - os: osx - osx_image: xcode10.3 - env: CXX=g++ CC=gcc CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF - - - name: macOS 10.14 Xcode 11.0 Debug - os: osx - osx_image: xcode11 - env: CXX=g++ CC=gcc CMAKE_BUILD_TYPE=Debug ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF ALPAKA_CXX_STANDARD=14 - - name: macOS 10.14 Xcode 11.0 Release - os: osx - osx_image: xcode11 - env: CXX=g++ CC=gcc CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF - - - name: macOS 10.14 Xcode 11.1 Debug - os: osx - osx_image: xcode11.1 - env: CXX=g++ CC=gcc CMAKE_BUILD_TYPE=Debug ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF ALPAKA_CXX_STANDARD=14 - - name: macOS 10.14 Xcode 11.1 Release - os: osx - osx_image: xcode11.1 - env: CXX=g++ CC=gcc CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF - - - name: macOS 10.14 Xcode 11.2 Debug - os: osx - osx_image: xcode11.2 - env: CXX=g++ CC=gcc CMAKE_BUILD_TYPE=Debug ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF ALPAKA_CXX_STANDARD=14 - - name: macOS 10.14 Xcode 11.2 Release - os: osx - osx_image: xcode11.2 - env: CXX=g++ CC=gcc CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF - - ### Windows - - name: MSVC-2017 Release - os: windows - dist: 1803-containers - language: cpp - env: CXX=cl.exe CC=cl.exe CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.14.7 OMP_NUM_THREADS=4 ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE=OFF - - name: MSVC-2017 Debug - os: windows - dist: 1803-containers - language: cpp - env: CXX=cl.exe CC=cl.exe CMAKE_BUILD_TYPE=Debug ALPAKA_CI_BOOST_BRANCH=boost-1.64.0 ALPAKA_CI_CMAKE_VER=3.11.4 OMP_NUM_THREADS=4 ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE=OFF ALPAKA_CXX_STANDARD=14 - - ### Ubuntu - ## native - # g++ - # We can not enable UBSan when using gcc because it does not have a -fsanitize-blacklist option to suppress errors in boost etc. - # gcc 6 ASan is triggered within libtbb.so - # gcc 7 ASan introduced 'stack-use-after-scope' which is triggered by GOMP_parallel - - name: gcc-4.9 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=4.9 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.62.0 ALPAKA_CI_CMAKE_VER=3.11.4 OMP_NUM_THREADS=4 ALPAKA_CXX_STANDARD=11 - - name: gcc-5 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=5 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.66.0 ALPAKA_CI_CMAKE_VER=3.16.0 OMP_NUM_THREADS=3 - - name: gcc-6 Debug c++14 - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=6 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.70.0 ALPAKA_CI_CMAKE_VER=3.14.7 OMP_NUM_THREADS=2 ALPAKA_CXX_STANDARD=14 - - name: gcc-7 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:14.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=7 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.63.0 ALPAKA_CI_CMAKE_VER=3.13.5 OMP_NUM_THREADS=3 - - name: gcc-8 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=8 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.12.4 OMP_NUM_THREADS=4 - - name: gcc-9 Debug c++17 - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=9 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.68.0 ALPAKA_CI_CMAKE_VER=3.15.5 OMP_NUM_THREADS=3 ALPAKA_CXX_STANDARD=17 ALPAKA_ACC_CPU_BT_OMP4_ENABLE=OFF - - # clang++ - - name: clang-4 Debug UBSan - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=4.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.69.0 ALPAKA_CI_CMAKE_VER=3.11.4 OMP_NUM_THREADS=4 ALPAKA_CI_SANITIZERS=UBSan - - name: clang-5 Debug c++14 - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=5.0.2 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.63.0 ALPAKA_CI_CMAKE_VER=3.14.7 OMP_NUM_THREADS=3 ALPAKA_CXX_STANDARD=14 - - name: clang-6 Release ASan C++17 - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=6.0.1 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.15.5 OMP_NUM_THREADS=2 ALPAKA_CI_SANITIZERS=ASan ALPAKA_CXX_STANDARD=17 - - name: clang-7 Release c++17 - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=7.0.1 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.13.5 OMP_NUM_THREADS=2 ALPAKA_CXX_STANDARD=17 ALPAKA_CI_CLANG_LIBSTDCPP_VERSION=7 - - name: clang-8 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=8.0.0 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.70.0 ALPAKA_CI_CMAKE_VER=3.12.4 OMP_NUM_THREADS=4 - - name: clang-9 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=9.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.16.0 OMP_NUM_THREADS=3 - - ## CUDA 8.0 - # nvcc + g++ - - name: nvcc-8.0 + gcc-4.9 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:14.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=4.9 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.62.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=8.0 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="20;60" ALPAKA_CXX_STANDARD=11 - # clang++ - - name: clang-4 + CUDA-8.0 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=4.0.0 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.68.0 ALPAKA_CI_CMAKE_VER=3.12.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=8.0 ALPAKA_CUDA_COMPILER=clang - - name: clang-5 + CUDA-8.0 Release ALPAKA_ACC_GPU_CUDA_ONLY_MODE - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=5.0.2 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.13.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=8.0 ALPAKA_CUDA_COMPILER=clang ALPAKA_CUDA_ARCH="20;35" ALPAKA_ACC_GPU_CUDA_ONLY_MODE=ON - - name: clang-6 + CUDA-8.0 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=6.0.1 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.64.0 ALPAKA_CI_CMAKE_VER=3.16.0 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=8.0 ALPAKA_CUDA_COMPILER=clang - - name: clang-7 + CUDA-8.0 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=7.0.1 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.70.0 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=8.0 ALPAKA_CUDA_COMPILER=clang - - name: clang-8 + CUDA-8.0 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=8.0.0 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.15.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=8.0 ALPAKA_CUDA_COMPILER=clang - - ## CUDA 9.0 - # nvcc + g++ - - name: nvcc-9.0 + gcc-4.9 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=4.9 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.70.0 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.0 ALPAKA_CUDA_COMPILER=nvcc - - name: nvcc-9.0 + gcc-5 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=5 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.0 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="70" - # clang++ - - name: clang-6 + CUDA-9.0 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=6.0.1 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.13.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.0 ALPAKA_CUDA_COMPILER=clang ALPAKA_CUDA_ARCH="35" - - name: clang-7 + CUDA-9.0 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=7.0.1 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.15.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.0 ALPAKA_CUDA_COMPILER=clang ALPAKA_CUDA_ARCH="35;70" - - name: clang-8 + CUDA-9.0 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=8.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.68.0 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.0 ALPAKA_CUDA_COMPILER=clang - - ## CUDA 9.1 - # nvcc + g++ - - name: nvcc-9.1 + gcc-4.9 Debug ALPAKA_ACC_GPU_CUDA_ONLY_MODE - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:14.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=4.9 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.13.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.1 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;72" ALPAKA_ACC_GPU_CUDA_ONLY_MODE=ON - - name: nvcc-9.1 + gcc-5 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=5 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.1 ALPAKA_CUDA_COMPILER=nvcc - # nvcc + clang++ - - name: nvcc-9.1 + clang-4 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=4.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.70.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.1 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;70" - # clang++ - - name: clang-7 + CUDA-9.1 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=7.0.1 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.68.0 ALPAKA_CI_CMAKE_VER=3.15.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.1 ALPAKA_CUDA_COMPILER=clang ALPAKA_CUDA_ARCH="35;72" - - name: clang-8 + CUDA-9.1 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=8.0.0 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.1 ALPAKA_CUDA_COMPILER=clang - - ## CUDA 9.2 - # nvcc + g++ - - name: nvcc-9.2 + gcc-4.9 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=4.9 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;72" - - name: nvcc-9.2 + gcc-5 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:14.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=5 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.68.0 ALPAKA_CI_CMAKE_VER=3.12.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.2 ALPAKA_CUDA_COMPILER=nvcc - - name: nvcc-9.2 + gcc-6 Debug separable compilation - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=6 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.15.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_NVCC_SEPARABLE_COMPILATION=ON - - name: nvcc-9.2 + gcc-7 Release + relaxed constexpr off + extended lambda off - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=7 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.69.0 ALPAKA_CI_CMAKE_VER=3.13.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;35" ALPAKA_CUDA_NVCC_EXPT_RELAXED_CONSTEXPR=OFF ALPAKA_CUDA_NVCC_EXPT_EXTENDED_LAMBDA=OFF - # nvcc + clang++ - - name: nvcc-9.2 + clang-4 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=4.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;70" - # clang++ - - name: clang-7 + CUDA-9.2 Release c++17 - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=7.0.1 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.2 ALPAKA_CUDA_COMPILER=clang ALPAKA_CUDA_ARCH="35;72" ALPAKA_CXX_STANDARD=17 - - name: clang-8 + CUDA-9.2 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=8.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.70.0 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.2 ALPAKA_CUDA_COMPILER=clang - - name: clang-9 + CUDA-9.2 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=9.0.0 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.69.0 ALPAKA_CI_CMAKE_VER=3.16.0 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=9.2 ALPAKA_CUDA_COMPILER=clang - - ## CUDA 10.0 - # nvcc + g++ - - name: nvcc-10.0 + gcc-4.9 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:14.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=4.9 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.69.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.0 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;75" - - name: nvcc-10.0 + gcc-5 Release c++14 - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=5 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.68.0 ALPAKA_CI_CMAKE_VER=3.12.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.0 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CXX_STANDARD=14 - - name: nvcc-10.0 + gcc-6 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:14.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=6 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.15.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.0 ALPAKA_CUDA_COMPILER=nvcc - - name: nvcc-10.0 + gcc-7 Release c++14 - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=7 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.0 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;35" ALPAKA_CXX_STANDARD=14 - # nvcc + clang++ - - name: nvcc-10.0 + clang-4 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=4.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.16.0 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.0 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;60" - - name: nvcc-10.0 + clang-5 Debug separable compilation - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=5.0.2 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.13.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.0 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="75" ALPAKA_CUDA_NVCC_SEPARABLE_COMPILATION=ON - - name: nvcc-10.0 + clang-6 Debug c++14 - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=6.0.1 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.0 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="70" ALPAKA_CXX_STANDARD=14 - # nvcc + MSVC - - name: nvcc-10.0 + MSVC-2017 Release ALPAKA_ACC_GPU_CUDA_ONLY_MODE separable compilation - os: windows - dist: 1803-containers - language: cpp - env: CXX=cl.exe CC=cl.exe CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.0 ALPAKA_CUDA_ARCH="30;75" ALPAKA_ACC_GPU_CUDA_ONLY_MODE=ON ALPAKA_CUDA_NVCC_SEPARABLE_COMPILATION=ON - - name: nvcc-10.0 + MSVC-2017 Debug (Only one CPU backend enabled due to compile time) - os: windows - dist: 1803-containers - language: cpp - env: CXX=cl.exe CC=cl.exe CMAKE_BUILD_TYPE=Debug ALPAKA_CI_BOOST_BRANCH=boost-1.66.0 ALPAKA_CI_CMAKE_VER=3.15.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.0 ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE=OFF ALPAKA_ACC_CPU_BT_OMP4_ENABLE=OFF ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE=OFF - # clang++ - - name: clang-8 + CUDA-10.0 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=8.0.0 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.0 ALPAKA_CUDA_COMPILER=clang - - name: clang-9 + CUDA-10.0 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=9.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.0 ALPAKA_CUDA_COMPILER=clang - - ## CUDA 10.1 - # nvcc + g++ - - name: nvcc-10.1 + gcc-4.9 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:14.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=4.9 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;75" - - name: nvcc-10.1 + gcc-5 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=5 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.68.0 ALPAKA_CI_CMAKE_VER=3.12.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_COMPILER=nvcc - - name: nvcc-10.1 + gcc-6 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:14.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=6 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.15.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_COMPILER=nvcc - - name: nvcc-10.1 + gcc-7 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=7 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;35" - - name: nvcc-10.1 + gcc-8 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=8 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.69.0 ALPAKA_CI_CMAKE_VER=3.13.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;35" - # nvcc + clang++ - - name: nvcc-10.1 + clang-4 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=4.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.15.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;60" - - name: nvcc-10.1 + clang-5 Release ALPAKA_ACC_GPU_CUDA_ONLY_MODE - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=5.0.2 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libc++ ALPAKA_CI_BOOST_BRANCH=boost-1.69.0 ALPAKA_CI_CMAKE_VER=3.13.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="75" ALPAKA_ACC_GPU_CUDA_ONLY_MODE=ON - - name: nvcc-10.1 + clang-6 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=6.0.1 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="70" - - name: nvcc-10.1 + clang-7 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=7.0.1 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.69.0 ALPAKA_CI_CMAKE_VER=3.12.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="70" - - name: nvcc-10.1 + clang-8 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=8.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.70.0 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="75" - # nvcc + MSVC - - name: nvcc-10.1 + MSVC-2017 Debug ALPAKA_ACC_GPU_CUDA_ONLY_MODE - os: windows - dist: 1803-containers - language: cpp - env: CXX=cl.exe CC=cl.exe CMAKE_BUILD_TYPE=Debug ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_ARCH="30;75" ALPAKA_ACC_GPU_CUDA_ONLY_MODE=ON - - name: nvcc-10.1 + MSVC-2017 Release (Only one CPU backend enabled due to compile time) - os: windows - dist: 1803-containers - language: cpp - env: CXX=cl.exe CC=cl.exe CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE=OFF ALPAKA_ACC_CPU_BT_OMP4_ENABLE=OFF ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE=OFF - # clang++ - - name: clang-9 + CUDA-10.1 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=9.0.0 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.1 ALPAKA_CUDA_COMPILER=clang - - ## CUDA 10.2 - # nvcc + g++ - - name: nvcc-10.2 + gcc-4.9 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:14.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=4.9 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;75" - - name: nvcc-10.2 + gcc-5 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=5 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.68.0 ALPAKA_CI_CMAKE_VER=3.12.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;35" - - name: nvcc-10.2 + gcc-6 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:14.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=6 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.16.0 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_CUDA_COMPILER=nvcc - - name: nvcc-10.2 + gcc-7 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=7 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_CUDA_COMPILER=nvcc - - name: nvcc-10.2 + gcc-8 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:18.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=8 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.69.0 ALPAKA_CI_CMAKE_VER=3.13.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;35" - # nvcc + clang++ - - name: nvcc-10.2 + clang-4 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=4.0.0 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.15.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="30;60" - - name: nvcc-10.2 + clang-5 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=5.0.2 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.13.5 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="75" - - name: nvcc-10.2 + clang-6 Release ALPAKA_ACC_GPU_CUDA_ONLY_MODE - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=6.0.1 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.69.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="70" ALPAKA_ACC_GPU_CUDA_ONLY_MODE=ON - - name: nvcc-10.2 + clang-7 Debug - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=7.0.1 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.70.0 ALPAKA_CI_CMAKE_VER=3.12.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="75" - - name: nvcc-10.2 + clang-8 Release - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=clang++ CC=clang ALPAKA_CI_CLANG_VER=8.0.0 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.70.0 ALPAKA_CI_CMAKE_VER=3.12.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_ARCH="75" - # nvcc + MSVC - - name: nvcc-10.2 + MSVC-2017 Debug ALPAKA_ACC_GPU_CUDA_ONLY_MODE - os: windows - dist: 1803-containers - language: cpp - env: CXX=cl.exe CC=cl.exe CMAKE_BUILD_TYPE=Debug ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_CUDA_ARCH="30;75" ALPAKA_ACC_GPU_CUDA_ONLY_MODE=ON - - name: nvcc-10.2 + MSVC-2017 Release (Only one CPU backend enabled due to compile time) - os: windows - dist: 1803-containers - language: cpp - env: CXX=cl.exe CC=cl.exe CMAKE_BUILD_TYPE=Release ALPAKA_CI_BOOST_BRANCH=boost-1.65.1 ALPAKA_CI_CMAKE_VER=3.14.7 ALPAKA_ACC_GPU_CUDA_ENABLE=ON ALPAKA_CUDA_VERSION=10.2 ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE=OFF ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE=OFF ALPAKA_ACC_CPU_BT_OMP4_ENABLE=OFF ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE=OFF - - ## HIP - - name: HIP(nvcc9.2) + gcc-5 Debug ALPAKA_ACC_GPU_HIP_ONLY_MODE - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=5 CMAKE_BUILD_TYPE=Debug ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.67.0 ALPAKA_CI_CMAKE_VER=3.11.4 ALPAKA_ACC_GPU_HIP_ENABLE=ON ALPAKA_ACC_GPU_HIP_ONLY_MODE=ON ALPAKA_CI_HIP_BRANCH="roc-2.8.0" ALPAKA_HIP_PLATFORM=nvcc ALPAKA_CUDA_ARCH="30;35" ALPAKA_CUDA_VERSION=9.2 ALPAKA_CUDA_COMPILER=nvcc ALPAKA_CUDA_NVCC_EXPT_RELAXED_CONSTEXPR=OFF ALPAKA_CUDA_NVCC_EXPT_EXTENDED_LAMBDA=OFF - - name: HIP(nvcc9.2) + gcc-5 Release ALPAKA_ACC_GPU_HIP_ONLY_MODE - env: ALPAKA_CI_DOCKER_BASE_IMAGE_NAME=ubuntu:16.04 CXX=g++ CC=gcc ALPAKA_CI_GCC_VER=5 CMAKE_BUILD_TYPE=Release ALPAKA_CI_STDLIB=libstdc++ ALPAKA_CI_BOOST_BRANCH=boost-1.71.0 ALPAKA_CI_CMAKE_VER=3.15.5 ALPAKA_ACC_GPU_HIP_ENABLE=ON ALPAKA_ACC_GPU_HIP_ONLY_MODE=ON ALPAKA_CI_HIP_BRANCH="roc-2.8.0" ALPAKA_HIP_PLATFORM=nvcc ALPAKA_CUDA_ARCH="30;35" ALPAKA_CUDA_VERSION=9.2 ALPAKA_CUDA_COMPILER=nvcc - -branches: - except: - - gh-pages - -cache: - directories: - - $ALPAKA_CI_DOCKER_CACHE_DIR - -script: - - set -eovx pipefail - - if [ "$TRAVIS_OS_NAME" = "linux" ] ;then sudo apt-get -y --quiet --allow-unauthenticated --no-install-recommends install smem ;fi - - if [ "$TRAVIS_OS_NAME" = "linux" ] ;then sudo apt-get -y --quiet --allow-unauthenticated --no-install-recommends install moreutils ;fi - - if [ "$TRAVIS_OS_NAME" = "osx" ] ;then brew install moreutils ;fi - - if [ "$TRAVIS_OS_NAME" = "linux" ] || [ "$TRAVIS_OS_NAME" = "osx" ] ;then ./script/travis/script.sh | ts ;fi - - if [ "$TRAVIS_OS_NAME" = "windows" ] ;then ./script/travis/script.sh ;fi - -after_failure: - - ./script/travis/after_failure.sh - -notifications: - email: false diff --git a/thirdParty/alpaka/.zenodo.json b/thirdParty/alpaka/.zenodo.json deleted file mode 100644 index 80d29a6ce4..0000000000 --- a/thirdParty/alpaka/.zenodo.json +++ /dev/null @@ -1,66 +0,0 @@ -{ - "title": "Alpaka: Abstraction Library for Parallel Kernel Acceleration", - "description": "The alpaka library is a header-only C++11 abstraction library for accelerator development. Its aim is to provide performance portability across accelerators through the abstraction (not hiding!) of the underlying levels of parallelism.", - "creators": [ - { - "affiliation": "LogMeIn, Inc.", - "name": "Worpitz, Benjamin" - }, - { - "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf, TU Dresden", - "name": "Matthes, Alexander", - "orcid": "0000-0002-6702-2015" - }, - { - "affiliation": "LogMeIn, Inc.", - "name": "Zenker, Erik", - "orcid": "0000-0001-9417-8712" - }, - { - "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf, TU Dresden", - "name": "Huebl, Axel", - "orcid": "0000-0003-1943-7141" - }, - { - "affiliation": "Helmholtz-Zentrum Dresden-Rossendorf", - "name": "Widera, René", - "orcid": "0000-0003-1642-0459" - } - ], - "access_right": "open", - "keywords": [ - "HPC", - "CUDA", - "OpenMP", - "C++", - "GPU", - "HIP", - "heterogeneous computing", - "performance portability" - ], - "license": "MPL-2.0", - "upload_type": "software", - "grants": [ - { - "id": "654220" - } - ], - "related_identifiers": [ - { - "identifier": "DOI:10.5281/zenodo.49768", - "relation": "isCitedBy" - }, - { - "identifier": "DOI:10.1007/978-3-319-46079-6_21", - "relation": "cites" - }, - { - "identifier": "DOI:10.1109/IPDPSW.2016.50", - "relation": "isCitedBy" - }, - { - "identifier": "DOI:10.1007/978-3-319-67630-2_36", - "relation": "isCitedBy" - } - ] -} diff --git a/thirdParty/alpaka/CMakeLists.txt b/thirdParty/alpaka/CMakeLists.txt deleted file mode 100644 index 0f7a6c59b5..0000000000 --- a/thirdParty/alpaka/CMakeLists.txt +++ /dev/null @@ -1,46 +0,0 @@ -# -# Copyright 2015-2019 Benjamin Worpitz -# -# This file is part of alpaka. -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# - -################################################################################ -# Required CMake version - -cmake_minimum_required(VERSION 3.11.4) - -project("alpakaAll") - -SET_PROPERTY(GLOBAL PROPERTY USE_FOLDERS ON) - -################################################################################ -# CMake policies -# -# Search in _ROOT: -# https://cmake.org/cmake/help/v3.12/policy/CMP0074.html - -if(POLICY CMP0074) - cmake_policy(SET CMP0074 NEW) -endif() - -################################################################################ -# Options and Variants - -option(alpaka_BUILD_EXAMPLES "Build the examples" ON) - -include(CTest) -# automatically defines: BUILD_TESTING, default is ON - -################################################################################ -# Add subdirectories - -if(alpaka_BUILD_EXAMPLES) - add_subdirectory("example/") -endif() -if(BUILD_TESTING) - add_subdirectory("test/") -endif() diff --git a/thirdParty/alpaka/Findalpaka.cmake b/thirdParty/alpaka/Findalpaka.cmake deleted file mode 100644 index 94d0187356..0000000000 --- a/thirdParty/alpaka/Findalpaka.cmake +++ /dev/null @@ -1,109 +0,0 @@ -#.rst: -# Findalpaka -# ---------- -# -# Abstraction library for parallel kernel acceleration -# https://github.com/ComputationalRadiationPhysics/alpaka -# -# Finding and Using alpaka -# ^^^^^^^^^^^^^^^^^^^^^ -# -# .. code-block:: cmake -# -# FIND_PACKAGE(alpaka -# [version] [EXACT] # Minimum or EXACT version, e.g. 1.0.0 -# [REQUIRED] # Fail with an error if alpaka or a required -# # component is not found -# [QUIET] # Do not warn if this module was not found -# [COMPONENTS <...>] # Compiled in components: ignored -# ) -# TARGET_LINK_LIBRARIES( PUBLIC alpaka) -# -# To provide a hint to this module where to find the alpaka installation, -# set the ALPAKA_ROOT variable. -# -# This module requires Boost. Make sure to provide a valid install of it -# under the environment variable BOOST_ROOT. -# -# ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE will require Boost.Fiber to be built. -# ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE and ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE will require a OpenMP 2.0+ capable compiler. -# ALPAKA_ACC_CPU_BT_OMP4_ENABLE will require a OpenMP 4.0+ capable compiler. -# ALPAKA_ACC_GPU_CUDA_ENABLE will require CUDA 8.0+ to be installed. -# ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE will require TBB 2.2+ to be installed -# -# Set the following CMake variables BEFORE calling find_packages to -# change the behaviour of this module: -# - ``ALPAKA_ACC_GPU_CUDA_ONLY_MODE`` {ON, OFF} -# - ``ALPAKA_ACC_GPU_HIP_ONLY_MODE`` {ON, OFF} -# - ``ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE`` {ON, OFF} -# - ``ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE`` {ON, OFF} -# - ``ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE`` {ON, OFF} -# - ``ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE`` {ON, OFF} -# - ``ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE`` {ON, OFF} -# - ``ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE`` {ON, OFF} -# - ``ALPAKA_ACC_CPU_BT_OMP4_ENABLE`` {ON, OFF} -# - ``ALPAKA_ACC_GPU_CUDA_ENABLE`` {ON, OFF} -# - ``ALPAKA_ACC_GPU_HIP_ENABLE`` {ON, OFF} -# - ``ALPAKA_CUDA_VERSION`` {8.0, ...} -# - ``ALPAKA_CUDA_ARCH`` {sm_20, sm...} -# - ``ALPAKA_CUDA_FAST_MATH`` {ON, OFF} -# - ``ALPAKA_CUDA_FTZ`` {ON, OFF} -# - ``ALPAKA_CUDA_SHOW_REGISTER`` {ON, OFF} -# - ``ALPAKA_CUDA_KEEP_FILES`` {ON, OFF} -# - ``ALPAKA_CUDA_SHOW_CODELINES`` {ON, OFF} -# - ``ALPAKA_DEBUG`` {0, 1, 2} -# - ``ALPAKA_CXX_STANDARD`` {11, 14, 17} -# -# Result Variables -# ^^^^^^^^^^^^^^^^ -# -# - ``alpaka_FOUND`` -# TRUE if alpaka found a working install. -# - ``alpaka_VERSION`` -# Version in format Major.Minor.Patch -# - ``alpaka_COMPILE_OPTIONS`` -# Compiler options. -# - ``alpaka_COMPILE_DEFINITIONS`` -# Compiler definitions (without "-D" prefix!). -# - ``alpaka_DEFINITIONS`` -# Deprecated old compiler definitions. Combination of alpaka_COMPILE_OPTIONS and alpaka_COMPILE_DEFINITIONS prefixed with "-D". -# - ``alpaka_INCLUDE_DIRS`` -# Include directories required by the alpaka headers. -# - ``alpaka_LIBRARIES`` -# Libraries required to link against to use alpaka. -# -# -# IMPORTED Targets -# ^^^^^^^^^^^^^^^^ -# -# This module defines the :prop_tgt:`IMPORTED` target ``alpaka``, if alpaka has -# been found. -# - - -################################################################################ -# Copyright 2015-2019 Benjamin Worpitz -# -# Permission to use, copy, modify, and/or distribute this software for any -# purpose with or without fee is hereby granted, provided that the above -# copyright notice and this permission notice appear in all copies. -# -# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER -# RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, -# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE -# USE OR PERFORMANCE OF THIS SOFTWARE. - -FIND_PATH( - _ALPAKA_ROOT_DIR - NAMES "include/alpaka/alpaka.hpp" - HINTS "${ALPAKA_ROOT}" ENV ALPAKA_ROOT - DOC "alpaka ROOT location") - -IF(_ALPAKA_ROOT_DIR) - INCLUDE("${_ALPAKA_ROOT_DIR}/alpakaConfig.cmake") -ELSE() - MESSAGE(FATAL_ERROR "alpaka could not be found!") -ENDIF() diff --git a/thirdParty/alpaka/LICENSE b/thirdParty/alpaka/LICENSE deleted file mode 100644 index a612ad9813..0000000000 --- a/thirdParty/alpaka/LICENSE +++ /dev/null @@ -1,373 +0,0 @@ -Mozilla Public License Version 2.0 -================================== - -1. Definitions --------------- - -1.1. "Contributor" - means each individual or legal entity that creates, contributes to - the creation of, or owns Covered Software. - -1.2. "Contributor Version" - means the combination of the Contributions of others (if any) used - by a Contributor and that particular Contributor's Contribution. - -1.3. "Contribution" - means Covered Software of a particular Contributor. - -1.4. "Covered Software" - means Source Code Form to which the initial Contributor has attached - the notice in Exhibit A, the Executable Form of such Source Code - Form, and Modifications of such Source Code Form, in each case - including portions thereof. - -1.5. "Incompatible With Secondary Licenses" - means - - (a) that the initial Contributor has attached the notice described - in Exhibit B to the Covered Software; or - - (b) that the Covered Software was made available under the terms of - version 1.1 or earlier of the License, but not also under the - terms of a Secondary License. - -1.6. "Executable Form" - means any form of the work other than Source Code Form. - -1.7. "Larger Work" - means a work that combines Covered Software with other material, in - a separate file or files, that is not Covered Software. - -1.8. "License" - means this document. - -1.9. "Licensable" - means having the right to grant, to the maximum extent possible, - whether at the time of the initial grant or subsequently, any and - all of the rights conveyed by this License. - -1.10. "Modifications" - means any of the following: - - (a) any file in Source Code Form that results from an addition to, - deletion from, or modification of the contents of Covered - Software; or - - (b) any new file in Source Code Form that contains any Covered - Software. - -1.11. "Patent Claims" of a Contributor - means any patent claim(s), including without limitation, method, - process, and apparatus claims, in any patent Licensable by such - Contributor that would be infringed, but for the grant of the - License, by the making, using, selling, offering for sale, having - made, import, or transfer of either its Contributions or its - Contributor Version. - -1.12. "Secondary License" - means either the GNU General Public License, Version 2.0, the GNU - Lesser General Public License, Version 2.1, the GNU Affero General - Public License, Version 3.0, or any later versions of those - licenses. - -1.13. "Source Code Form" - means the form of the work preferred for making modifications. - -1.14. "You" (or "Your") - means an individual or a legal entity exercising rights under this - License. For legal entities, "You" includes any entity that - controls, is controlled by, or is under common control with You. For - purposes of this definition, "control" means (a) the power, direct - or indirect, to cause the direction or management of such entity, - whether by contract or otherwise, or (b) ownership of more than - fifty percent (50%) of the outstanding shares or beneficial - ownership of such entity. - -2. License Grants and Conditions --------------------------------- - -2.1. Grants - -Each Contributor hereby grants You a world-wide, royalty-free, -non-exclusive license: - -(a) under intellectual property rights (other than patent or trademark) - Licensable by such Contributor to use, reproduce, make available, - modify, display, perform, distribute, and otherwise exploit its - Contributions, either on an unmodified basis, with Modifications, or - as part of a Larger Work; and - -(b) under Patent Claims of such Contributor to make, use, sell, offer - for sale, have made, import, and otherwise transfer either its - Contributions or its Contributor Version. - -2.2. Effective Date - -The licenses granted in Section 2.1 with respect to any Contribution -become effective for each Contribution on the date the Contributor first -distributes such Contribution. - -2.3. Limitations on Grant Scope - -The licenses granted in this Section 2 are the only rights granted under -this License. No additional rights or licenses will be implied from the -distribution or licensing of Covered Software under this License. -Notwithstanding Section 2.1(b) above, no patent license is granted by a -Contributor: - -(a) for any code that a Contributor has removed from Covered Software; - or - -(b) for infringements caused by: (i) Your and any other third party's - modifications of Covered Software, or (ii) the combination of its - Contributions with other software (except as part of its Contributor - Version); or - -(c) under Patent Claims infringed by Covered Software in the absence of - its Contributions. - -This License does not grant any rights in the trademarks, service marks, -or logos of any Contributor (except as may be necessary to comply with -the notice requirements in Section 3.4). - -2.4. Subsequent Licenses - -No Contributor makes additional grants as a result of Your choice to -distribute the Covered Software under a subsequent version of this -License (see Section 10.2) or under the terms of a Secondary License (if -permitted under the terms of Section 3.3). - -2.5. Representation - -Each Contributor represents that the Contributor believes its -Contributions are its original creation(s) or it has sufficient rights -to grant the rights to its Contributions conveyed by this License. - -2.6. Fair Use - -This License is not intended to limit any rights You have under -applicable copyright doctrines of fair use, fair dealing, or other -equivalents. - -2.7. Conditions - -Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted -in Section 2.1. - -3. Responsibilities -------------------- - -3.1. Distribution of Source Form - -All distribution of Covered Software in Source Code Form, including any -Modifications that You create or to which You contribute, must be under -the terms of this License. You must inform recipients that the Source -Code Form of the Covered Software is governed by the terms of this -License, and how they can obtain a copy of this License. You may not -attempt to alter or restrict the recipients' rights in the Source Code -Form. - -3.2. Distribution of Executable Form - -If You distribute Covered Software in Executable Form then: - -(a) such Covered Software must also be made available in Source Code - Form, as described in Section 3.1, and You must inform recipients of - the Executable Form how they can obtain a copy of such Source Code - Form by reasonable means in a timely manner, at a charge no more - than the cost of distribution to the recipient; and - -(b) You may distribute such Executable Form under the terms of this - License, or sublicense it under different terms, provided that the - license for the Executable Form does not attempt to limit or alter - the recipients' rights in the Source Code Form under this License. - -3.3. Distribution of a Larger Work - -You may create and distribute a Larger Work under terms of Your choice, -provided that You also comply with the requirements of this License for -the Covered Software. If the Larger Work is a combination of Covered -Software with a work governed by one or more Secondary Licenses, and the -Covered Software is not Incompatible With Secondary Licenses, this -License permits You to additionally distribute such Covered Software -under the terms of such Secondary License(s), so that the recipient of -the Larger Work may, at their option, further distribute the Covered -Software under the terms of either this License or such Secondary -License(s). - -3.4. Notices - -You may not remove or alter the substance of any license notices -(including copyright notices, patent notices, disclaimers of warranty, -or limitations of liability) contained within the Source Code Form of -the Covered Software, except that You may alter any license notices to -the extent required to remedy known factual inaccuracies. - -3.5. Application of Additional Terms - -You may choose to offer, and to charge a fee for, warranty, support, -indemnity or liability obligations to one or more recipients of Covered -Software. However, You may do so only on Your own behalf, and not on -behalf of any Contributor. You must make it absolutely clear that any -such warranty, support, indemnity, or liability obligation is offered by -You alone, and You hereby agree to indemnify every Contributor for any -liability incurred by such Contributor as a result of warranty, support, -indemnity or liability terms You offer. You may include additional -disclaimers of warranty and limitations of liability specific to any -jurisdiction. - -4. Inability to Comply Due to Statute or Regulation ---------------------------------------------------- - -If it is impossible for You to comply with any of the terms of this -License with respect to some or all of the Covered Software due to -statute, judicial order, or regulation then You must: (a) comply with -the terms of this License to the maximum extent possible; and (b) -describe the limitations and the code they affect. Such description must -be placed in a text file included with all distributions of the Covered -Software under this License. Except to the extent prohibited by statute -or regulation, such description must be sufficiently detailed for a -recipient of ordinary skill to be able to understand it. - -5. Termination --------------- - -5.1. The rights granted under this License will terminate automatically -if You fail to comply with any of its terms. However, if You become -compliant, then the rights granted under this License from a particular -Contributor are reinstated (a) provisionally, unless and until such -Contributor explicitly and finally terminates Your grants, and (b) on an -ongoing basis, if such Contributor fails to notify You of the -non-compliance by some reasonable means prior to 60 days after You have -come back into compliance. Moreover, Your grants from a particular -Contributor are reinstated on an ongoing basis if such Contributor -notifies You of the non-compliance by some reasonable means, this is the -first time You have received notice of non-compliance with this License -from such Contributor, and You become compliant prior to 30 days after -Your receipt of the notice. - -5.2. If You initiate litigation against any entity by asserting a patent -infringement claim (excluding declaratory judgment actions, -counter-claims, and cross-claims) alleging that a Contributor Version -directly or indirectly infringes any patent, then the rights granted to -You by any and all Contributors for the Covered Software under Section -2.1 of this License shall terminate. - -5.3. In the event of termination under Sections 5.1 or 5.2 above, all -end user license agreements (excluding distributors and resellers) which -have been validly granted by You or Your distributors under this License -prior to termination shall survive termination. - -************************************************************************ -* * -* 6. Disclaimer of Warranty * -* ------------------------- * -* * -* Covered Software is provided under this License on an "as is" * -* basis, without warranty of any kind, either expressed, implied, or * -* statutory, including, without limitation, warranties that the * -* Covered Software is free of defects, merchantable, fit for a * -* particular purpose or non-infringing. The entire risk as to the * -* quality and performance of the Covered Software is with You. * -* Should any Covered Software prove defective in any respect, You * -* (not any Contributor) assume the cost of any necessary servicing, * -* repair, or correction. This disclaimer of warranty constitutes an * -* essential part of this License. No use of any Covered Software is * -* authorized under this License except under this disclaimer. * -* * -************************************************************************ - -************************************************************************ -* * -* 7. Limitation of Liability * -* -------------------------- * -* * -* Under no circumstances and under no legal theory, whether tort * -* (including negligence), contract, or otherwise, shall any * -* Contributor, or anyone who distributes Covered Software as * -* permitted above, be liable to You for any direct, indirect, * -* special, incidental, or consequential damages of any character * -* including, without limitation, damages for lost profits, loss of * -* goodwill, work stoppage, computer failure or malfunction, or any * -* and all other commercial damages or losses, even if such party * -* shall have been informed of the possibility of such damages. This * -* limitation of liability shall not apply to liability for death or * -* personal injury resulting from such party's negligence to the * -* extent applicable law prohibits such limitation. Some * -* jurisdictions do not allow the exclusion or limitation of * -* incidental or consequential damages, so this exclusion and * -* limitation may not apply to You. * -* * -************************************************************************ - -8. Litigation -------------- - -Any litigation relating to this License may be brought only in the -courts of a jurisdiction where the defendant maintains its principal -place of business and such litigation shall be governed by laws of that -jurisdiction, without reference to its conflict-of-law provisions. -Nothing in this Section shall prevent a party's ability to bring -cross-claims or counter-claims. - -9. Miscellaneous ----------------- - -This License represents the complete agreement concerning the subject -matter hereof. If any provision of this License is held to be -unenforceable, such provision shall be reformed only to the extent -necessary to make it enforceable. Any law or regulation which provides -that the language of a contract shall be construed against the drafter -shall not be used to construe this License against a Contributor. - -10. Versions of the License ---------------------------- - -10.1. New Versions - -Mozilla Foundation is the license steward. Except as provided in Section -10.3, no one other than the license steward has the right to modify or -publish new versions of this License. Each version will be given a -distinguishing version number. - -10.2. Effect of New Versions - -You may distribute the Covered Software under the terms of the version -of the License under which You originally received the Covered Software, -or under the terms of any subsequent version published by the license -steward. - -10.3. Modified Versions - -If you create software not governed by this License, and you want to -create a new license for such software, you may create and use a -modified version of this License if you rename the license and remove -any references to the name of the license steward (except to note that -such modified license differs from this License). - -10.4. Distributing Source Code Form that is Incompatible With Secondary -Licenses - -If You choose to distribute Source Code Form that is Incompatible With -Secondary Licenses under the terms of this version of the License, the -notice described in Exhibit B of this License must be attached. - -Exhibit A - Source Code Form License Notice -------------------------------------------- - - This Source Code Form is subject to the terms of the Mozilla Public - License, v. 2.0. If a copy of the MPL was not distributed with this - file, You can obtain one at http://mozilla.org/MPL/2.0/. - -If it is not possible or desirable to put the notice in a particular -file, then You may include the notice in a location (such as a LICENSE -file in a relevant directory) where a recipient would be likely to look -for such a notice. - -You may add additional accurate notices of copyright ownership. - -Exhibit B - "Incompatible With Secondary Licenses" Notice ---------------------------------------------------------- - - This Source Code Form is "Incompatible With Secondary Licenses", as - defined by the Mozilla Public License, v. 2.0. diff --git a/thirdParty/alpaka/README.md b/thirdParty/alpaka/README.md deleted file mode 100644 index 8370dac9c4..0000000000 --- a/thirdParty/alpaka/README.md +++ /dev/null @@ -1,210 +0,0 @@ -**alpaka** - Abstraction Library for Parallel Kernel Acceleration -================================================================= - -[![Travis CI Build Status](https://travis-ci.org/ComputationalRadiationPhysics/alpaka.svg?branch=develop)](https://travis-ci.org/ComputationalRadiationPhysics/alpaka) -[![Language](https://img.shields.io/badge/language-C%2B%2B11-orange.svg)](https://isocpp.org/) -[![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20windows%20%7C%20mac-lightgrey.svg)](https://github.com/ComputationalRadiationPhysics/alpaka) -[![License](https://img.shields.io/badge/license-MPL--2.0-blue.svg)](https://www.mozilla.org/en-US/MPL/2.0/) - -![Alpaka](doc/images/alpaka_401x135.png) - -The **alpaka** library is a header-only C++11 abstraction library for accelerator development. - -Its aim is to provide performance portability across accelerators through the abstraction (not hiding!) of the underlying levels of parallelism. - -It is platform independent and supports the concurrent and cooperative use of multiple devices such as the hosts CPU as well as attached accelerators as for instance CUDA GPUs and Xeon Phis (currently native execution only). -A multitude of accelerator back-end variants using CUDA, OpenMP (2.0/4.0), Boost.Fiber, std::thread and also serial execution is provided and can be selected depending on the device. -Only one implementation of the user kernel is required by representing them as function objects with a special interface. -There is no need to write special CUDA, OpenMP or custom threading code. -Accelerator back-ends can be mixed within a device queue. -The decision which accelerator back-end executes which kernel can be made at runtime. - -The abstraction used is very similar to the CUDA grid-blocks-threads division strategy. -Algorithms that should be parallelized have to be divided into a multi-dimensional grid consisting of small uniform work items. -These functions are called kernels and are executed in parallel threads. -The threads in the grid are organized in blocks. -All threads in a block are executed in parallel and can interact via fast shared memory. -Blocks are executed independently and can not interact in any way. -The block execution order is unspecified and depends on the accelerator in use. -By using this abstraction the execution can be optimally adapted to the available hardware. - - -Software License ----------------- - -**alpaka** is licensed under **MPL-2.0**. - - -Documentation -------------- - -The [general documentation](doc/markdown/Index.md) is located within the `doc/markdown` subfolder of the repository. -The [source code documentation](http://computationalradiationphysics.github.io/alpaka/) is generated with [doxygen](http://www.doxygen.org). - - -Accelerator Back-ends ---------------------- - -|Accelerator Back-end|Lib/API|Devices|Execution strategy grid-blocks|Execution strategy block-threads| -|---|---|---|---|---| -|Serial|n/a|Host CPU (single core)|sequential|sequential (only 1 thread per block)| -|OpenMP 2.0+ blocks|OpenMP 2.0+|Host CPU (multi core)|parallel (preemptive multitasking)|sequential (only 1 thread per block)| -|OpenMP 2.0+ threads|OpenMP 2.0+|Host CPU (multi core)|sequential|parallel (preemptive multitasking)| -|OpenMP 4.0+ (CPU)|OpenMP 4.0+|Host CPU (multi core)|parallel (undefined)|parallel (preemptive multitasking)| -| std::thread | std::thread |Host CPU (multi core)|sequential|parallel (preemptive multitasking)| -| Boost.Fiber | boost::fibers::fiber |Host CPU (single core)|sequential|parallel (cooperative multitasking)| -|TBB|TBB 2.2+|Host CPU (multi core)|parallel (preemptive multitasking)|sequential (only 1 thread per block)| -|CUDA|CUDA 8.0-10.2|NVIDIA GPUs|parallel (undefined)|parallel (lock-step within warps)| -|HIP(nvcc)|[HIP 1.5+](https://github.com/ROCm-Developer-Tools/HIP)|NVIDIA GPUs SM 2.0+|parallel (undefined)|parallel (lock-step within warps)| - - -Supported Compilers -------------------- - -This library uses C++11 (or newer when available). - -|Accelerator Back-end|gcc 4.9.4
(Linux)|gcc 5.5
(Linux)|gcc 6.4/7.3
(Linux)|gcc 8.1/9.1
(Linux)|clang 4
(Linux)|clang 5
(Linux)|clang 6
(Linux)|clang 7
(Linux)|clang 8
(Linux)|clang 9
(Linux)|Apple LLVM 10.2-11.2
(macOS)|MSVC 2017.9
(Windows)| -|---|---|---|---|---|---|---|---|---|---|---|---|---| -|Serial|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:| -|OpenMP 2.0+ blocks|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:x:|:white_check_mark:| -|OpenMP 2.0+ threads|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:x:|:white_check_mark:| -|OpenMP 4.0+ (CPU)|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:x:|:x:| -| std::thread |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:| -| Boost.Fiber |:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:x:|:white_check_mark:| -|TBB|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:|:white_check_mark:| -|CUDA (nvcc)|:white_check_mark:
(CUDA 8.0-10.2)|:white_check_mark:
(CUDA 9.0-10.2)|:white_check_mark:
(CUDA 9.2-10.2) |:x:|:white_check_mark:
(CUDA 9.1-10.2)|:white_check_mark:
(CUDA 10.1-10.2)|:white_check_mark:
(CUDA 10.1-10.2)|:white_check_mark:
(CUDA 10.1-10.2)|:white_check_mark:
(CUDA 10.1-10.2)|:x:|:x:|:white_check_mark:
(CUDA 10.0-10.2)| -|CUDA (clang) | - | - | - | - | :white_check_mark:
(CUDA 8.0)| :white_check_mark:
(CUDA 8.0)| :white_check_mark:
(CUDA 8.0-9.0) | :white_check_mark:
(CUDA 8.0-9.2) | :white_check_mark:
(CUDA 8.0-10.0) | :white_check_mark:
(CUDA 9.2-10.1) | - | - | -|[HIP](doc/markdown/user/implementation/mapping/HIP.md) (nvcc)|:white_check_mark:
(nvcc 9.0+)|:x:|:x:|:x:|:x:|:x:|:x:|:x:|:x:|:x:|:x:|:x:| - - -Other compilers or combinations marked with :x: in the table above may work but are not tested in CI and are therefore not explicitly supported. - -Dependencies ------------- - -[Boost](https://boost.org/) 1.62+ is the only mandatory external dependency (for CUDA 9+ Boost >=1.65.1 is required). -The **alpaka** library itself just requires header-only libraries. -However some of the accelerator back-end implementations require different boost libraries to be built. - -When an accelerator back-end using *Boost.Fiber* is enabled, `boost-fiber` and all of its dependencies are required to be built in C++11 mode `./b2 cxxflags="-std=c++11"`. -When *Boost.Fiber* is enabled and alpaka is built in C++17 mode with clang and libstc++, Boost >= 1.67.0 is required. - -When an accelerator back-end using *CUDA* is enabled, version *8.0* of the *CUDA SDK* is the minimum requirement. -*NOTE*: When using nvcc as *CUDA* compiler, the *CUDA accelerator back-end* can not be enabled together with the *Boost.Fiber accelerator back-end* due to bugs in the nvcc compiler. -*NOTE*: When using clang as a native *CUDA* compiler, the *CUDA accelerator back-end* can not be enabled together with any *OpenMP accelerator back-end* because this combination is currently unsupported. -*NOTE*: Separable compilation is only supported when using nvcc, not with clang as native *CUDA* compiler. It is disabled by default and can be enabled via the CMake flag `ALPAKA_CUDA_NVCC_SEPARABLE_COMPILATION`. - -When an accelerator back-end using *OpenMP* is enabled, the compiler and the platform have to support the corresponding minimum *OpenMP* version. - -When an accelerator back-end using *TBB* is enabled, the compiler and the platform have to support the corresponding minimum *TBB* version. - - -Usage ------ - -The library is header only so nothing has to be built. -CMake 3.11.4+ is required to provide the correct defines and include paths. -Just call `ALPAKA_ADD_EXECUTABLE` instead of `CUDA_ADD_EXECUTABLE` or `ADD_EXECUTABLE` and the difficulties of the CUDA nvcc compiler in handling `.cu` and `.cpp` files are automatically taken care of. -Source files do not need any special file ending. -Examples of how to utilize alpaka within CMake can be found in the `example` folder. - -The whole alpaka library can be included with: `#include ` -Code that is not intended to be utilized by the user is hidden in the `detail` namespace. - - -Introduction ------------- - -For a quick introduction, feel free to playback the recording of our presentation at -[GTC 2016](http://mygtc.gputechconf.com/quicklink/858sI36): - - - E. Zenker, R. Widera, G. Juckeland et al., - *Porting the Plasma Simulation PIConGPU to Heterogeneous Architectures with Alpaka*, - [video link (39 min)](http://on-demand.gputechconf.com/gtc/2016/video/S6298.html) - - -Citing alpaka -------------- - -Currently all authors of **alpaka** are scientists or connected with -research. For us to justify the importance and impact of our work, please -consider citing us accordingly in your derived work and publications: - -```latex -% Peer-Reviewed Publication %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% Peer reviewed and accepted publication in -% "2nd International Workshop on Performance Portable -% Programming Models for Accelerators (P^3MA)" -% colocated with the -% "2017 ISC High Performance Conference" -% in Frankfurt, Germany -@inproceedings{MathesP3MA2017, - author = {{Matthes}, A. and {Widera}, R. and {Zenker}, E. and {Worpitz}, B. and - {Huebl}, A. and {Bussmann}, M.}, - title = {Tuning and optimization for a variety of many-core architectures without changing a single line of implementation code - using the Alpaka library}, - archivePrefix = "arXiv", - eprint = {1706.10086}, - keywords = {Computer Science - Distributed, Parallel, and Cluster Computing}, - day = {30}, - month = {Jun}, - year = {2017}, - url = {https://arxiv.org/abs/1706.10086}, -} - -% Peer-Reviewed Publication %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% -% Peer reviewed and accepted publication in -% "The Sixth International Workshop on -% Accelerators and Hybrid Exascale Systems (AsHES)" -% at the -% "30th IEEE International Parallel and Distributed -% Processing Symposium" in Chicago, IL, USA -@inproceedings{ZenkerAsHES2016, - author = {Erik Zenker and Benjamin Worpitz and Ren{\'{e}} Widera - and Axel Huebl and Guido Juckeland and - Andreas Kn{\"{u}}pfer and Wolfgang E. Nagel and Michael Bussmann}, - title = {Alpaka - An Abstraction Library for Parallel Kernel Acceleration}, - archivePrefix = "arXiv", - eprint = {1602.08477}, - keywords = {Computer science;CUDA;Mathematical Software;nVidia;OpenMP;Package; - performance portability;Portability;Tesla K20;Tesla K80}, - day = {23}, - month = {May}, - year = {2016}, - publisher = {IEEE Computer Society}, - url = {http://arxiv.org/abs/1602.08477}, -} - - -% Original Work: Benjamin Worpitz' Master Thesis %%%%%%%%%% -% -@MasterThesis{Worpitz2015, - author = {Benjamin Worpitz}, - title = {Investigating performance portability of a highly scalable - particle-in-cell simulation code on various multi-core - architectures}, - school = {{Technische Universit{\"{a}}t Dresden}}, - month = {Sep}, - year = {2015}, - type = {Master Thesis}, - doi = {10.5281/zenodo.49768}, - url = {http://dx.doi.org/10.5281/zenodo.49768} -} -``` - - -Authors -------- - -### Maintainers and Core Developers - -- Benjamin Worpitz (original author) -- Rene Widera - -### Former Members, Contributions and Thanks - -- Dr. Michael Bussmann -- Axel Huebl -- Erik Zenker diff --git a/thirdParty/alpaka/alpakaConfig.cmake b/thirdParty/alpaka/alpakaConfig.cmake deleted file mode 100644 index ad66efd118..0000000000 --- a/thirdParty/alpaka/alpakaConfig.cmake +++ /dev/null @@ -1,1220 +0,0 @@ -# -# Copyright 2014-2019 Benjamin Worpitz, Erik Zenker, Axel Huebl -# -# This file is part of Alpaka. -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# - -################################################################################ -# Required cmake version. - -CMAKE_MINIMUM_REQUIRED(VERSION 3.11.4) - -################################################################################ -# CMake policies -# -# Search in _ROOT: -# https://cmake.org/cmake/help/v3.12/policy/CMP0074.html - -if(POLICY CMP0074) - cmake_policy(SET CMP0074 NEW) -endif() - -################################################################################ -# alpaka. - -# Return values. -UNSET(alpaka_FOUND) -UNSET(alpaka_VERSION) -UNSET(alpaka_COMPILE_OPTIONS) -UNSET(alpaka_COMPILE_DEFINITIONS) -UNSET(alpaka_DEFINITIONS) -UNSET(alpaka_INCLUDE_DIR) -UNSET(alpaka_INCLUDE_DIRS) -UNSET(alpaka_LIBRARY) -UNSET(alpaka_LIBRARIES) - -# Internal usage. -UNSET(_ALPAKA_FOUND) -UNSET(_ALPAKA_COMPILE_OPTIONS_PUBLIC) -UNSET(_ALPAKA_COMPILE_DEFINITIONS_PUBLIC) -UNSET(_ALPAKA_INCLUDE_DIRECTORY) -UNSET(_ALPAKA_INCLUDE_DIRECTORIES_PUBLIC) -UNSET(_ALPAKA_LINK_LIBRARIES_PUBLIC) -UNSET(_ALPAKA_LINK_FLAGS_PUBLIC) -UNSET(_ALPAKA_COMMON_FILE) -UNSET(_ALPAKA_ADD_EXECUTABLE_FILE) -UNSET(_ALPAKA_ADD_LIBRRAY_FILE) -UNSET(_ALPAKA_FILES_HEADER) -UNSET(_ALPAKA_FILES_OTHER) - -#------------------------------------------------------------------------------- -# Common. - -# Directory of this file. -SET(_ALPAKA_ROOT_DIR ${CMAKE_CURRENT_LIST_DIR}) -# Normalize the path (e.g. remove ../) -GET_FILENAME_COMPONENT(_ALPAKA_ROOT_DIR "${_ALPAKA_ROOT_DIR}" ABSOLUTE) - -# Add common functions. -SET(_ALPAKA_COMMON_FILE "${_ALPAKA_ROOT_DIR}/cmake/common.cmake") -INCLUDE("${_ALPAKA_COMMON_FILE}") - -# Add ALPAKA_ADD_EXECUTABLE function. -SET(_ALPAKA_ADD_EXECUTABLE_FILE "${_ALPAKA_ROOT_DIR}/cmake/addExecutable.cmake") -INCLUDE("${_ALPAKA_ADD_EXECUTABLE_FILE}") - -# Add ALPAKA_ADD_LIBRARY function. -SET(_ALPAKA_ADD_LIBRARY_FILE "${_ALPAKA_ROOT_DIR}/cmake/addLibrary.cmake") -INCLUDE("${_ALPAKA_ADD_LIBRARY_FILE}") - -# Set found to true initially and set it to false if a required dependency is missing. -SET(_ALPAKA_FOUND TRUE) - -# Add module search path -SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${_ALPAKA_ROOT_DIR}/cmake/modules/") - -#------------------------------------------------------------------------------- -# Options. -SET(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE_DEFAULT ON) -SET(ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE_DEFAULT ON) -SET(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE_DEFAULT ON) -SET(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE_DEFAULT ON) -SET(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE_DEFAULT ON) -SET(ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE_DEFAULT ON) -SET(ALPAKA_ACC_CPU_BT_OMP4_ENABLE_DEFAULT ON) - -# HIP and platform selection and warning about unsupported features -OPTION(ALPAKA_ACC_GPU_HIP_ENABLE "Enable the HIP back-end (all other back-ends must be disabled)" OFF) -OPTION(ALPAKA_ACC_GPU_HIP_ONLY_MODE "Only back-ends using HIP can be enabled in this mode." OFF) # HIP only runs without other back-ends - -# Drop-down combo box in cmake-gui for HIP platforms. -SET(ALPAKA_HIP_PLATFORM "nvcc" CACHE STRING "Specify HIP platform") -SET_PROPERTY(CACHE ALPAKA_HIP_PLATFORM PROPERTY STRINGS "nvcc;hcc;clang") - -IF(ALPAKA_ACC_GPU_HIP_ENABLE AND NOT ALPAKA_ACC_GPU_HIP_ONLY_MODE) - MESSAGE(WARNING "HIP back-end must be used together with ALPAKA_ACC_GPU_HIP_ONLY_MODE") - SET(ALPAKA_ACC_GPU_HIP_ENABLE OFF CACHE BOOL "" FORCE) -ENDIF() - -IF(ALPAKA_ACC_GPU_HIP_ENABLE AND (ALPAKA_HIP_PLATFORM MATCHES "hcc" OR ALPAKA_HIP_PLATFORM MATCHES "clang")) - MESSAGE(WARNING - "The HIP back-end is currently experimental, especially for HCC. " - "In alpaka HIP(HCC) has a few workarounds and does not support 3D memory and constant memory. " - ) -ENDIF() - -OPTION(ALPAKA_ACC_GPU_CUDA_ONLY_MODE "Only back-ends using CUDA can be enabled in this mode (This allows to mix alpaka code with native CUDA code)." OFF) -# If CUDA-only mode is enabled, we set the defaults for all CPU back-ends to OFF. -# If they are explicitly set via the command line, the user will get an error later on. -IF(ALPAKA_ACC_GPU_CUDA_ONLY_MODE OR ALPAKA_ACC_GPU_HIP_ONLY_MODE) # CUDA-only or HIP-only - SET(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE_DEFAULT OFF) - SET(ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE_DEFAULT OFF) - SET(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE_DEFAULT OFF) - SET(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE_DEFAULT OFF) - SET(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE_DEFAULT OFF) - SET(ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE_DEFAULT OFF) - SET(ALPAKA_ACC_CPU_BT_OMP4_ENABLE_DEFAULT OFF) -ENDIF() - -OPTION(ALPAKA_ACC_GPU_CUDA_ENABLE "Enable the CUDA GPU back-end" ON) - -# If CUDA is enabled, we set the defaults for some unsupported back-ends to OFF. -# If they are explicitly set via the command line, the user will get an error later on. -IF(ALPAKA_ACC_GPU_CUDA_ENABLE) - SET(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE_DEFAULT OFF) - IF(ALPAKA_CUDA_COMPILER MATCHES "clang") - SET(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE_DEFAULT OFF) - SET(ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE_DEFAULT OFF) - SET(ALPAKA_ACC_CPU_BT_OMP4_ENABLE_DEFAULT OFF) - ENDIF() -ENDIF() - -IF(ALPAKA_ACC_GPU_CUDA_ONLY_MODE AND NOT ALPAKA_ACC_GPU_CUDA_ENABLE) - MESSAGE(WARNING "If ALPAKA_ACC_GPU_CUDA_ONLY_MODE is enabled, ALPAKA_ACC_GPU_CUDA_ENABLE has to be enabled as well.") - SET(_ALPAKA_FOUND FALSE) -ENDIF() -IF(ALPAKA_ACC_GPU_HIP_ONLY_MODE AND NOT ALPAKA_ACC_GPU_HIP_ENABLE) - MESSAGE(WARNING "If ALPAKA_ACC_GPU_HIP_ONLY_MODE is enabled, ALPAKA_ACC_GPU_HIP_ENABLE has to be enabled as well.") - SET(_ALPAKA_FOUND FALSE) -ENDIF() - - -OPTION(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE "Enable the serial CPU back-end" ${ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE_DEFAULT}) -OPTION(ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE "Enable the threads CPU block thread back-end" ${ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE_DEFAULT}) -OPTION(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE "Enable the fibers CPU block thread back-end" ${ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE_DEFAULT}) -OPTION(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE "Enable the TBB CPU grid block back-end" ${ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE_DEFAULT}) -OPTION(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE "Enable the OpenMP 2.0 CPU grid block back-end" ${ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE_DEFAULT}) -OPTION(ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE "Enable the OpenMP 2.0 CPU block thread back-end" ${ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE_DEFAULT}) -OPTION(ALPAKA_ACC_CPU_BT_OMP4_ENABLE "Enable the OpenMP 4.0 CPU block and block thread back-end" ${ALPAKA_ACC_CPU_BT_OMP4_ENABLE_DEFAULT}) - -IF((ALPAKA_ACC_GPU_CUDA_ONLY_MODE OR ALPAKA_ACC_GPU_HIP_ONLY_MODE) - AND - (ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE OR - ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE OR - ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE OR - ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE OR - ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE OR - ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE OR - ALPAKA_ACC_CPU_BT_OMP4_ENABLE)) - IF(ALPAKA_ACC_GPU_CUDA_ONLY_MODE) - MESSAGE(WARNING "If ALPAKA_ACC_GPU_CUDA_ONLY_MODE is enabled, only back-ends using CUDA can be enabled! This allows to mix alpaka code with native CUDA code. However, this prevents any non-CUDA back-ends from being enabled.") - ENDIF() - IF(ALPAKA_ACC_GPU_HIP_ONLY_MODE) - MESSAGE(WARNING "If ALPAKA_ACC_GPU_HIP_ONLY_MODE is enabled, only back-ends using HIP can be enabled!") - ENDIF() - SET(_ALPAKA_FOUND FALSE) -ENDIF() - -# avoids CUDA+HIP conflict -IF(ALPAKA_ACC_GPU_HIP_ENABLE AND ALPAKA_ACC_GPU_CUDA_ENABLE) - MESSAGE(FATAL_ERROR "CUDA and HIP can not be enabled both at the same time.") -ENDIF() - -# HIP is only supported on Linux -IF(ALPAKA_ACC_GPU_HIP_ENABLE AND (MSVC OR WIN32)) - MESSAGE(WARNING "Optional alpaka dependency HIP can not be built on Windows! HIP back-end disabled!") - SET(ALPAKA_ACC_GPU_HIP_ENABLE OFF CACHE BOOL "Enable the HIP GPU back-end" FORCE) -ENDIF() - -# Drop-down combo box in cmake-gui. -SET(ALPAKA_DEBUG "0" CACHE STRING "Debug level") -SET_PROPERTY(CACHE ALPAKA_DEBUG PROPERTY STRINGS "0;1;2") - -SET(ALPAKA_CXX_STANDARD "11" CACHE STRING "C++ standard version") -SET_PROPERTY(CACHE ALPAKA_CXX_STANDARD PROPERTY STRINGS "11;14;17") - -#------------------------------------------------------------------------------- -# Debug output of common variables. -IF(${ALPAKA_DEBUG} GREATER 1) - MESSAGE(STATUS "_ALPAKA_ROOT_DIR : ${_ALPAKA_ROOT_DIR}") - MESSAGE(STATUS "_ALPAKA_COMMON_FILE : ${_ALPAKA_COMMON_FILE}") - MESSAGE(STATUS "_ALPAKA_ADD_EXECUTABLE_FILE : ${_ALPAKA_ADD_EXECUTABLE_FILE}") - MESSAGE(STATUS "_ALPAKA_ADD_LIBRARY_FILE : ${_ALPAKA_ADD_LIBRARY_FILE}") - MESSAGE(STATUS "CMAKE_BUILD_TYPE : ${CMAKE_BUILD_TYPE}") -ENDIF() - -#------------------------------------------------------------------------------- -# Check supported compilers. -IF(CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0) - MESSAGE(FATAL_ERROR "Clang versions < 4.0 are not supported!") - SET(_ALPAKA_FOUND FALSE) -ENDIF() - -IF(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE AND (ALPAKA_ACC_GPU_CUDA_ENABLE OR ALPAKA_ACC_GPU_HIP_ENABLE)) - MESSAGE(FATAL_ERROR "Fibers and CUDA or HIP back-end can not be enabled both at the same time.") - SET(_ALPAKA_FOUND FALSE) -ENDIF() - -#------------------------------------------------------------------------------- -# Compiler settings. -IF(MSVC) - # Empty append to define it if it does not already exist. - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC) - - IF(ALPAKA_ACC_GPU_CUDA_ONLY_MODE) - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "/wd4505") # CUDA\v9.2\include\crt/host_runtime.h(265): warning C4505: '__cudaUnregisterBinaryUtil': unreferenced local function has been removed - ENDIF() -ELSE() - # Add linker options. - # lipthread: - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC "general;pthread") - IF(NOT APPLE) - # librt: undefined reference to `clock_gettime' - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC "general;rt") - ENDIF() - - # Clang<4.0 or AppleClang<9.0 - # https://bugs.llvm.org/show_bug.cgi?id=18417 - # https://github.com/llvm/llvm-project/commit/e55b4737c026ea2e0b44829e4115d208577a67b2 - IF(("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang" AND - CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.1) OR - ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" AND - CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0)) - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "-ftemplate-depth=1024") - ENDIF() -ENDIF() - -#------------------------------------------------------------------------------- -# Find Boost. -SET(_ALPAKA_BOOST_MIN_VER "1.62.0") -IF(${ALPAKA_DEBUG} GREATER 1) - SET(Boost_DEBUG ON) - SET(Boost_DETAILED_FAILURE_MSG ON) -ENDIF() -IF(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE) - FIND_PACKAGE(Boost ${_ALPAKA_BOOST_MIN_VER} QUIET COMPONENTS fiber context system thread atomic chrono date_time) - IF(NOT Boost_FIBER_FOUND) - MESSAGE(STATUS "Optional alpaka dependency Boost fiber could not be found! Fibers back-end disabled!") - SET(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE OFF CACHE BOOL "Enable the Fibers CPU back-end" FORCE) - FIND_PACKAGE(Boost ${_ALPAKA_BOOST_MIN_VER} QUIET) - ELSE() - # On Win32 boost context triggers: - # libboost_context-vc141-mt-gd-1_64.lib(jump_i386_ms_pe_masm.obj) : error LNK2026: module unsafe for SAFESEH image. - IF(MSVC) - IF(CMAKE_SIZEOF_VOID_P EQUAL 4) - SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /SAFESEH:NO") - SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /SAFESEH:NO") - SET(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /SAFESEH:NO") - ENDIF() - ENDIF() - ENDIF() - -ELSE() - FIND_PACKAGE(Boost ${_ALPAKA_BOOST_MIN_VER} QUIET) -ENDIF() - -IF(${ALPAKA_DEBUG} GREATER 1) - MESSAGE(STATUS "Boost in:") - MESSAGE(STATUS "BOOST_ROOT : ${BOOST_ROOT}") - MESSAGE(STATUS "BOOSTROOT : ${BOOSTROOT}") - MESSAGE(STATUS "BOOST_INCLUDEDIR: ${BOOST_INCLUDEDIR}") - MESSAGE(STATUS "BOOST_LIBRARYDIR: ${BOOST_LIBRARYDIR}") - MESSAGE(STATUS "Boost_NO_SYSTEM_PATHS: ${Boost_NO_SYSTEM_PATHS}") - MESSAGE(STATUS "Boost_ADDITIONAL_VERSIONS: ${Boost_ADDITIONAL_VERSIONS}") - MESSAGE(STATUS "Boost_USE_MULTITHREADED: ${Boost_USE_MULTITHREADED}") - MESSAGE(STATUS "Boost_USE_STATIC_LIBS: ${Boost_USE_STATIC_LIBS}") - MESSAGE(STATUS "Boost_USE_STATIC_RUNTIME: ${Boost_USE_STATIC_RUNTIME}") - MESSAGE(STATUS "Boost_USE_DEBUG_RUNTIME: ${Boost_USE_DEBUG_RUNTIME}") - MESSAGE(STATUS "Boost_USE_DEBUG_PYTHON: ${Boost_USE_DEBUG_PYTHON}") - MESSAGE(STATUS "Boost_USE_STLPORT: ${Boost_USE_STLPORT}") - MESSAGE(STATUS "Boost_USE_STLPORT_DEPRECATED_NATIVE_IOSTREAMS: ${Boost_USE_STLPORT_DEPRECATED_NATIVE_IOSTREAMS}") - MESSAGE(STATUS "Boost_COMPILER: ${Boost_COMPILER}") - MESSAGE(STATUS "Boost_THREADAPI: ${Boost_THREADAPI}") - MESSAGE(STATUS "Boost_NAMESPACE: ${Boost_NAMESPACE}") - MESSAGE(STATUS "Boost_DEBUG: ${Boost_DEBUG}") - MESSAGE(STATUS "Boost_DETAILED_FAILURE_MSG: ${Boost_DETAILED_FAILURE_MSG}") - MESSAGE(STATUS "Boost_REALPATH: ${Boost_REALPATH}") - MESSAGE(STATUS "Boost_NO_BOOST_CMAKE: ${Boost_NO_BOOST_CMAKE}") - MESSAGE(STATUS "Boost out:") - MESSAGE(STATUS "Boost_FOUND: ${Boost_FOUND}") - MESSAGE(STATUS "Boost_INCLUDE_DIRS: ${Boost_INCLUDE_DIRS}") - MESSAGE(STATUS "Boost_LIBRARY_DIRS: ${Boost_LIBRARY_DIRS}") - MESSAGE(STATUS "Boost_LIBRARIES: ${Boost_LIBRARIES}") - MESSAGE(STATUS "Boost_FIBER_FOUND: ${Boost_FIBER_FOUND}") - MESSAGE(STATUS "Boost_FIBER_LIBRARY: ${Boost_FIBER_LIBRARY}") - MESSAGE(STATUS "Boost_CONTEXT_FOUND: ${Boost_CONTEXT_FOUND}") - MESSAGE(STATUS "Boost_CONTEXT_LIBRARY: ${Boost_CONTEXT_LIBRARY}") - MESSAGE(STATUS "Boost_SYSTEM_FOUND: ${Boost_SYSTEM_FOUND}") - MESSAGE(STATUS "Boost_SYSTEM_LIBRARY: ${Boost_SYSTEM_LIBRARY}") - MESSAGE(STATUS "Boost_THREAD_FOUND: ${Boost_THREAD_FOUND}") - MESSAGE(STATUS "Boost_THREAD_LIBRARY: ${Boost_THREAD_LIBRARY}") - MESSAGE(STATUS "Boost_ATOMIC_FOUND: ${Boost_ATOMIC_FOUND}") - MESSAGE(STATUS "Boost_ATOMIC_LIBRARY: ${Boost_ATOMIC_LIBRARY}") - MESSAGE(STATUS "Boost_CHRONO_FOUND: ${Boost_CHRONO_FOUND}") - MESSAGE(STATUS "Boost_CHRONO_LIBRARY: ${Boost_CHRONO_LIBRARY}") - MESSAGE(STATUS "Boost_DATE_TIME_FOUND: ${Boost_DATE_TIME_FOUND}") - MESSAGE(STATUS "Boost_DATE_TIME_LIBRARY: ${Boost_DATE_TIME_LIBRARY}") - MESSAGE(STATUS "Boost_VERSION: ${Boost_VERSION}") - MESSAGE(STATUS "Boost_LIB_VERSION: ${Boost_LIB_VERSION}") - MESSAGE(STATUS "Boost_MAJOR_VERSION: ${Boost_MAJOR_VERSION}") - MESSAGE(STATUS "Boost_MINOR_VERSION: ${Boost_MINOR_VERSION}") - MESSAGE(STATUS "Boost_SUBMINOR_VERSION: ${Boost_SUBMINOR_VERSION}") - MESSAGE(STATUS "Boost_LIB_DIAGNOSTIC_DEFINITIONS: ${Boost_LIB_DIAGNOSTIC_DEFINITIONS}") - MESSAGE(STATUS "Boost cached:") - MESSAGE(STATUS "Boost_INCLUDE_DIR: ${Boost_INCLUDE_DIR}") - MESSAGE(STATUS "Boost_LIBRARY_DIR: ${Boost_LIBRARY_DIR}") -ENDIF() - -IF(NOT Boost_FOUND) - MESSAGE(WARNING "Required alpaka dependency Boost (>=${_ALPAKA_BOOST_MIN_VER}) could not be found!") - SET(_ALPAKA_FOUND FALSE) - -ELSE() - IF(Boost_FIBER_FOUND) - # Boost fiber and default header-only libraries - IF(TARGET Boost::fiber) - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC - Boost::boost - Boost::fiber Boost::context Boost::system Boost::thread - Boost::chrono Boost::date_time Boost::atomic - ) - ELSE() - # fallback: Boost version is too new for CMake - LIST(APPEND _ALPAKA_INCLUDE_DIRECTORIES_PUBLIC ${Boost_INCLUDE_DIRS}) - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC ${Boost_LIBRARIES}) - ENDIF() - ELSE() - # header-only libraries - IF(TARGET Boost::boost) - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC Boost::boost) - ELSE() - # fallback: Boost version is too new for CMake - LIST(APPEND _ALPAKA_INCLUDE_DIRECTORIES_PUBLIC ${Boost_INCLUDE_DIRS}) - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC ${Boost_LIBRARIES}) - ENDIF() - ENDIF() -ENDIF() - -#------------------------------------------------------------------------------- -# Find TBB. -IF(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE) - FIND_PACKAGE(TBB 2.2) - IF(NOT TBB_FOUND) - MESSAGE(STATUS "Optional alpaka dependency TBB could not be found! TBB grid block back-end disabled!") - SET(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE OFF CACHE BOOL "Enable the TBB grid block back-end" FORCE) - ELSE() - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC ${TBB_LIBRARIES}) - LIST(APPEND _ALPAKA_INCLUDE_DIRECTORIES_PUBLIC ${TBB_INCLUDE_DIRS}) - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC ${TBB_DEFINITIONS}) - ENDIF() -ENDIF() - -#------------------------------------------------------------------------------- -# Find OpenMP. -IF(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE OR ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE OR ALPAKA_ACC_CPU_BT_OMP4_ENABLE) - FIND_PACKAGE(OpenMP) - - # Manually find OpenMP for the clang compiler if it was not already found. - # Even CMake 3.5 is unable to find libiomp and provide the correct OpenMP flags. - IF(NOT OPENMP_FOUND) - IF(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - FIND_PATH(_ALPAKA_LIBIOMP_INCLUDE_DIR NAMES "omp.h" PATH_SUFFIXES "include" "libiomp" "include/libiomp") - IF(_ALPAKA_LIBIOMP_INCLUDE_DIR) - SET(OPENMP_FOUND TRUE) - SET(OpenMP_CXX_FLAGS "-fopenmp=libiomp5") - SET(OpenMP_C_FLAGS "-fopenmp=libiomp5") - LIST(APPEND _ALPAKA_INCLUDE_DIRECTORIES_PUBLIC "${_ALPAKA_LIBIOMP_INCLUDE_DIR}") - ENDIF() - ENDIF() - ENDIF() - - IF(NOT OPENMP_FOUND) - MESSAGE(STATUS "Optional alpaka dependency OpenMP could not be found! OpenMP back-ends disabled!") - SET(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE OFF CACHE BOOL "Enable the OpenMP 2.0 CPU grid block back-end" FORCE) - SET(ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE OFF CACHE BOOL "Enable the OpenMP 2.0 CPU block thread back-end" FORCE) - SET(ALPAKA_ACC_CPU_BT_OMP4_ENABLE OFF CACHE BOOL "Enable the OpenMP 4.0 CPU block and thread back-end" FORCE) - - ELSE() - - # Check whether OpenMP 4 is supported - IF(OpenMP_CXX_VERSION VERSION_LESS 4.0) - SET(ALPAKA_ACC_CPU_BT_OMP4_ENABLE OFF CACHE BOOL "Enable the OpenMP 4.0 CPU block and thread back-end" FORCE) - ENDIF() - - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC ${OpenMP_CXX_FLAGS}) - IF(NOT MSVC) - LIST(APPEND _ALPAKA_LINK_FLAGS_PUBLIC ${OpenMP_CXX_FLAGS}) - ENDIF() - - # clang versions beginning with 3.9 support OpenMP 4.0 but only when given the corresponding flag - IF(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - IF(ALPAKA_ACC_CPU_BT_OMP4_ENABLE) - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "-fopenmp-version=40") - ENDIF() - ENDIF() - ENDIF() -ENDIF() - -#------------------------------------------------------------------------------- -# Find CUDA. -IF(ALPAKA_ACC_GPU_CUDA_ENABLE) - - IF(NOT DEFINED ALPAKA_CUDA_VERSION) - SET(ALPAKA_CUDA_VERSION 8.0) - ENDIF() - - IF(ALPAKA_CUDA_VERSION VERSION_LESS 8.0) - MESSAGE(WARNING "CUDA Toolkit < 8.0 is not supported!") - SET(_ALPAKA_FOUND FALSE) - - ELSE() - FIND_PACKAGE(CUDA "${ALPAKA_CUDA_VERSION}") - IF(NOT CUDA_FOUND) - MESSAGE(STATUS "Optional alpaka dependency CUDA could not be found! CUDA back-end disabled!") - SET(ALPAKA_ACC_GPU_CUDA_ENABLE OFF CACHE BOOL "Enable the CUDA GPU back-end" FORCE) - - ELSE() - SET(ALPAKA_CUDA_VERSION "${CUDA_VERSION}") - IF(CUDA_VERSION VERSION_LESS 9.0) - SET(ALPAKA_CUDA_ARCH "20" CACHE STRING "GPU architecture") - ELSEIF(CUDA_VERSION VERSION_LESS 10.3) - SET(ALPAKA_CUDA_ARCH "30" CACHE STRING "GPU architecture") - ELSE() - SET(ALPAKA_CUDA_ARCH "35" CACHE STRING "GPU architecture") - ENDIF() - SET(ALPAKA_CUDA_COMPILER "nvcc" CACHE STRING "CUDA compiler") - SET_PROPERTY(CACHE ALPAKA_CUDA_COMPILER PROPERTY STRINGS "nvcc;clang") - - OPTION(ALPAKA_CUDA_FAST_MATH "Enable fast-math" ON) - OPTION(ALPAKA_CUDA_FTZ "Set flush to zero for GPU" OFF) - OPTION(ALPAKA_CUDA_SHOW_REGISTER "Show kernel registers and create PTX" OFF) - OPTION(ALPAKA_CUDA_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps (folder: nvcc_tmp)" OFF) - OPTION(ALPAKA_CUDA_NVCC_EXPT_EXTENDED_LAMBDA "Enable experimental, extended host-device lambdas in NVCC" ON) - OPTION(ALPAKA_CUDA_NVCC_EXPT_RELAXED_CONSTEXPR "Enable experimental, relaxed constexpr in NVCC" ON) - OPTION(ALPAKA_CUDA_NVCC_SEPARABLE_COMPILATION "Enable separable compilation in NVCC" OFF) - - IF(ALPAKA_CUDA_COMPILER MATCHES "clang") - IF(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - MESSAGE(FATAL_ERROR "Using clang as CUDA compiler is only possible if clang is the host compiler!") - ENDIF() - - IF(CMAKE_CXX_COMPILER_VERSION LESS 6.0) - IF(CUDA_VERSION GREATER_EQUAL 9.0) - MESSAGE(FATAL_ERROR "Clang versions lower than 6 do not support CUDA 9 or greater!") - ENDIF() - ELSEIF(CMAKE_CXX_COMPILER_VERSION LESS 7.0) - IF(CUDA_VERSION GREATER_EQUAL 9.1) - MESSAGE(FATAL_ERROR "Clang versions lower than 7 do not support CUDA 9.1 or greater!") - ENDIF() - ELSEIF(CMAKE_CXX_COMPILER_VERSION LESS 8.0) - IF(CUDA_VERSION GREATER_EQUAL 10.0) - MESSAGE(FATAL_ERROR "Clang versions lower than 8 do not support CUDA 10.0 or greater!") - ENDIF() - ELSEIF(CMAKE_CXX_COMPILER_VERSION LESS 9.0) - IF(CUDA_VERSION GREATER_EQUAL 10.1) - MESSAGE(FATAL_ERROR "Clang versions lower than 9 do not support CUDA 10.1 or greater!") - ENDIF() - ELSEIF(CMAKE_CXX_COMPILER_VERSION LESS 10.0) - IF(CUDA_VERSION GREATER_EQUAL 10.2) - MESSAGE(FATAL_ERROR "Clang versions lower than 10 do not support CUDA 10.2 or greater!") - ENDIF() - ENDIF() - - IF(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE) - MESSAGE(FATAL_ERROR "Clang as a CUDA compiler does not support boost.fiber!") - ENDIF() - IF(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE OR ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE) - MESSAGE(FATAL_ERROR "Clang as a CUDA compiler does not support OpenMP 2!") - ENDIF() - IF(ALPAKA_ACC_CPU_BT_OMP4_ENABLE) - MESSAGE(FATAL_ERROR "Clang as a CUDA compiler does not support OpenMP 4!") - ENDIF() - - FOREACH(_CUDA_ARCH_ELEM ${ALPAKA_CUDA_ARCH}) - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "--cuda-gpu-arch=sm_${_CUDA_ARCH_ELEM}") - ENDFOREACH() - - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "--cuda-path=${CUDA_TOOLKIT_ROOT_DIR}") - - # This flag silences the warning produced by the Dummy.cpp files: - # clang: warning: argument unused during compilation: '--cuda-gpu-arch=sm_XX' - # This seems to be a false positive as all flags are 'unused' for an empty file. - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "-Qunused-arguments") - - # Silences warnings that are produced by boost because clang is not correctly identified. - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "-Wno-unused-local-typedef") - - IF(ALPAKA_CUDA_FAST_MATH) - # -ffp-contract=fast enables the usage of FMA - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "-ffast-math" "-ffp-contract=fast") - ENDIF() - - IF(ALPAKA_CUDA_FTZ) - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "-fcuda-flush-denormals-to-zero") - ENDIF() - - IF(ALPAKA_CUDA_SHOW_REGISTER) - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "-Xcuda-ptxas=-v") - ENDIF() - - IF(ALPAKA_CUDA_KEEP_FILES) - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "-save-temps") - ENDIF() - - # When libstdc++ is used and -std=gnu++XX is set, we get the following compile error: - # /usr/lib/gcc/x86_64-linux-gnu/5.5.0/../../../../include/c++/5.5.0/type_traits:311:39: error: __float128 is not supported on this target struct __is_floating_point_helper<__float128> - # Clang doesn't support the __float128 type (at least when building CUDA device code) - # * Due to the minimum requirement to compile with C++11 and because extensions are enabled by default by CMake, it adds -std=gnu++11 instead of -std=c++11 to the command line. - # Due to alpaka being an INTERFACE library (header-only) we are not allowed to set CXX_EXTENSIONS to OFF and transitively disable extensions for inherited targets. - # * Defining __float128 on the command line is the least invasive workaround found here: https://bugs.llvm.org/show_bug.cgi?id=13530#c6 - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "__float128=void") - - # CMake 3.15 does not provide the `--std=c++11` argument to clang anymore. - # It is not necessary for basic c++ compilation because clangs default is already higher, but CUDA code compiled with -x cuda still defaults to c++98. - IF(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.15.0") - LIST(APPEND _ALPAKA_COMPILE_OPTIONS_PUBLIC "-std=c++${ALPAKA_CXX_STANDARD}") - ENDIF() - - ELSE() - IF("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") - IF(CUDA_VERSION VERSION_EQUAL 8.0) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 5.4) - MESSAGE(FATAL_ERROR "NVCC 8.0 does not support GCC 5.4+. Please use GCC 4.9 - 5.3!") - ENDIF() - ELSEIF((CUDA_VERSION VERSION_EQUAL 9.0) OR (CUDA_VERSION VERSION_EQUAL 9.1)) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 6.0) - MESSAGE(FATAL_ERROR "NVCC 9.0 - 9.1 do not support GCC 7+ and fail compiling the std::tuple implementation in GCC 6+. Please use GCC 4.9 - 5.5!") - ENDIF() - ELSEIF(CUDA_VERSION VERSION_EQUAL 9.2) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 8.0) - MESSAGE(FATAL_ERROR "NVCC 9.2 does not support GCC 8+. Please use GCC 4.9, 5, 6 or 7!") - ENDIF() - ELSEIF(CUDA_VERSION VERSION_EQUAL 10.0) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 8.0) - MESSAGE(FATAL_ERROR "NVCC 10.0 does not support GCC 8+. Please use GCC 4.9, 5, 6 or 7!") - ENDIF() - ELSEIF(CUDA_VERSION VERSION_EQUAL 10.1) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 9.0) - MESSAGE(FATAL_ERROR "NVCC 10.1 does not support GCC 9+. Please use GCC 4.9, 5, 6, 7 or 8!") - ENDIF() - ELSEIF(CUDA_VERSION VERSION_EQUAL 10.2) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 9.0) - MESSAGE(FATAL_ERROR "NVCC 10.2 does not support GCC 9+. Please use GCC 4.9, 5, 6, 7 or 8!") - ENDIF() - ENDIF() - ELSEIF("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - IF(CUDA_VERSION VERSION_EQUAL 8.0) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 4.0) - MESSAGE(FATAL_ERROR "NVCC 8.0 does not support clang 4+. Please use NVCC 9.1!") - ENDIF() - ELSEIF(CUDA_VERSION VERSION_EQUAL 9.0) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 4.0) - MESSAGE(FATAL_ERROR "NVCC 9.0 does not support clang 4+. Please use NVCC 9.1!") - ENDIF() - ELSEIF(CUDA_VERSION VERSION_EQUAL 9.1) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 5.0) - MESSAGE(FATAL_ERROR "NVCC 9.1 does not support clang 5+. Please use clang 4!") - ENDIF() - ELSEIF(CUDA_VERSION VERSION_EQUAL 9.2) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 5.0) - MESSAGE(FATAL_ERROR "NVCC 9.2 does not support clang 6+ and fails compiling with clang 5. Please use clang 4!") - ENDIF() - ELSEIF(CUDA_VERSION VERSION_EQUAL 10.0) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 7.0) - MESSAGE(FATAL_ERROR "NVCC 10.0 does not support clang 7+. Please use clang 4, 5 or 6!") - ENDIF() - ELSEIF(CUDA_VERSION VERSION_EQUAL 10.1) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 9.0) - MESSAGE(FATAL_ERROR "NVCC 10.1 does not support clang 9+. Please use clang 4, 5, 6, 7 or 8!") - ENDIF() - ELSEIF(CUDA_VERSION VERSION_EQUAL 10.2) - IF(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL 9.0) - MESSAGE(FATAL_ERROR "NVCC 10.2 does not support clang 9+. Please use clang 4, 5, 6, 7 or 8!") - ENDIF() - ENDIF() - ENDIF() - - # CUDA 9.0 removed the __CUDACC_VER__ macro. Boost versions lower than 1.65.1 still use this macro. - IF(CUDA_VERSION VERSION_GREATER_EQUAL 9.0 AND Boost_VERSION VERSION_LESS 1.65.1) - MESSAGE(WARNING "CUDA 9.0 or newer requires boost-1.65.1 or newer!") - SET(_ALPAKA_FOUND FALSE) - ENDIF() - - # CUDA 9.0 is the first to support c++14. - IF((CUDA_VERSION VERSION_LESS 9.0) AND (ALPAKA_CXX_STANDARD GREATER 11)) - MESSAGE(WARNING "CUDA 9.0 or newer is required for c++14 or higher!") - SET(_ALPAKA_FOUND FALSE) - ENDIF() - - IF(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE) - MESSAGE(FATAL_ERROR "NVCC does not support boost.fiber!") - ENDIF() - - # Clean up the flags. Else, multiple find calls would result in duplicate flags. Furthermore, other modules may have set different settings. - SET(CUDA_NVCC_FLAGS) - - IF(${ALPAKA_DEBUG} GREATER 1) - SET(CUDA_VERBOSE_BUILD ON) - ENDIF() - - SET(CUDA_PROPAGATE_HOST_FLAGS ON) - - IF(ALPAKA_CUDA_NVCC_SEPARABLE_COMPILATION) - SET(CUDA_SEPARABLE_COMPILATION ON) - ENDIF() - - # nvcc sets no linux/__linux macros on OpenPOWER linux - # nvidia bug id: 2448610 - IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") - IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "ppc64le") - LIST(APPEND CUDA_NVCC_FLAGS "-Dlinux") - ENDIF() - ENDIF() - - IF(CUDA_VERSION VERSION_EQUAL 8.0) - LIST(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") - ENDIF() - - - IF(ALPAKA_CUDA_NVCC_EXPT_EXTENDED_LAMBDA) - LIST(APPEND CUDA_NVCC_FLAGS "--expt-extended-lambda") - ENDIF() - IF(ALPAKA_CUDA_NVCC_EXPT_RELAXED_CONSTEXPR) - LIST(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr") - ENDIF() - - FOREACH(_CUDA_ARCH_ELEM ${ALPAKA_CUDA_ARCH}) - # set flags to create device code for the given architecture - LIST(APPEND CUDA_NVCC_FLAGS - --generate-code arch=compute_${_CUDA_ARCH_ELEM},code=sm_${_CUDA_ARCH_ELEM} - --generate-code arch=compute_${_CUDA_ARCH_ELEM},code=compute_${_CUDA_ARCH_ELEM} - ) - ENDFOREACH() - - IF(NOT MSVC) - LIST(APPEND CUDA_NVCC_FLAGS "-std=c++${ALPAKA_CXX_STANDARD}") - ENDIF() - - SET(CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}") - - IF(CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") - LIST(APPEND CUDA_NVCC_FLAGS "-g") - # https://github.com/ComputationalRadiationPhysics/alpaka/issues/428 - IF(((CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) OR - (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.8)) AND - CUDA_VERSION VERSION_LESS 9.0) - MESSAGE(WARNING "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION} does not support -G with CUDA <= 8! " - "Device debug symbols NOT added.") - ELSEIF(MSVC) - MESSAGE(WARNING "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION} does not support -G with CUDA! " - "Device debug symbols NOT added.") - ELSE() - LIST(APPEND CUDA_NVCC_FLAGS "-G") - ENDIF() - ENDIF() - - IF(ALPAKA_CUDA_FAST_MATH) - LIST(APPEND CUDA_NVCC_FLAGS "--use_fast_math") - ENDIF() - - IF(ALPAKA_CUDA_FTZ) - LIST(APPEND CUDA_NVCC_FLAGS "--ftz=true") - ELSE() - LIST(APPEND CUDA_NVCC_FLAGS "--ftz=false") - ENDIF() - - IF(ALPAKA_CUDA_SHOW_REGISTER) - LIST(APPEND CUDA_NVCC_FLAGS "-Xptxas=-v") - ENDIF() - - # Always add warning/error numbers which can be used for suppressions - LIST(APPEND CUDA_NVCC_FLAGS -Xcudafe --display_error_number) - - # avoids warnings on host-device signatured, default constructors/destructors - IF(CUDA_VERSION GREATER_EQUAL 9.0) - LIST(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored) - ENDIF() - - # avoids warnings on host-device signature of 'std::__shared_count<>' - IF(CUDA_VERSION EQUAL 10.0) - LIST(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=2905) - ELSEIF(CUDA_VERSION EQUAL 10.1) - LIST(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=2912) - ELSEIF(CUDA_VERSION EQUAL 10.2) - LIST(APPEND CUDA_NVCC_FLAGS -Xcudafe --diag_suppress=2976) - ENDIF() - - IF(ALPAKA_CUDA_KEEP_FILES) - MAKE_DIRECTORY("${PROJECT_BINARY_DIR}/nvcc_tmp") - LIST(APPEND CUDA_NVCC_FLAGS "--keep" "--keep-dir" "${PROJECT_BINARY_DIR}/nvcc_tmp") - ENDIF() - - OPTION(ALPAKA_CUDA_SHOW_CODELINES "Show kernel lines in cuda-gdb and cuda-memcheck" OFF) - IF(ALPAKA_CUDA_SHOW_CODELINES) - LIST(APPEND CUDA_NVCC_FLAGS "--source-in-ptx" "-lineinfo") - IF(NOT MSVC) - LIST(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "-rdynamic") - ENDIF() - SET(ALPAKA_CUDA_KEEP_FILES ON CACHE BOOL "activate keep files" FORCE) - ENDIF() - ENDIF() - - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC "general;${CUDA_CUDART_LIBRARY}") - LIST(APPEND _ALPAKA_INCLUDE_DIRECTORIES_PUBLIC ${CUDA_INCLUDE_DIRS}) - ENDIF() - ENDIF() -ENDIF() - -#------------------------------------------------------------------------------- -# Find HIP. -IF(ALPAKA_ACC_GPU_HIP_ENABLE) - - IF(NOT DEFINED ALPAKA_HIP_VERSION) - SET(ALPAKA_HIP_VERSION 1.5) - ENDIF() - - IF(ALPAKA_HIP_VERSION VERSION_LESS 1.5) - MESSAGE(WARNING "HIP < 1.5 is not supported!") - SET(_ALPAKA_FOUND FALSE) - - ELSE() - # must set this for HIP package (note that you also need certain env vars) - SET(HIP_PLATFORM "${ALPAKA_HIP_PLATFORM}" CACHE STRING "") - SET(HIP_RUNTIME "${ALPAKA_HIP_PLATFORM}" CACHE STRING "") - - FIND_PACKAGE(HIP "${ALPAKA_HIP_VERSION}") - IF(NOT HIP_FOUND) - MESSAGE(WARNING "Optional alpaka dependency HIP could not be found! HIP back-end disabled!") - SET(ALPAKA_ACC_GPU_HIP_ENABLE OFF CACHE BOOL "Enable the HIP GPU back-end" FORCE) - - ELSE() - SET(ALPAKA_HIP_VERSION "${HIP_VERSION}") - IF(ALPAKA_HIP_VERSION VERSION_LESS 1.5.19211) - MESSAGE(STATUS "HIP < 1.5.19211 untested!") - ENDIF() - SET(ALPAKA_HIP_COMPILER "hipcc" CACHE STRING "HIP compiler") - SET_PROPERTY(CACHE ALPAKA_HIP_COMPILER PROPERTY STRINGS "hipcc") - - OPTION(ALPAKA_HIP_FAST_MATH "Enable fast-math" ON) - OPTION(ALPAKA_HIP_FTZ "Set flush to zero for GPU" OFF) - OPTION(ALPAKA_HIP_SHOW_REGISTER "Show kernel registers and create PTX" OFF) - OPTION(ALPAKA_HIP_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps (folder: nvcc_tmp)" OFF) - - SET(HIP_HIPCC_FLAGS) - - IF(ALPAKA_HIP_PLATFORM MATCHES "nvcc") - FIND_PACKAGE(CUDA) - IF(NOT CUDA_FOUND) - MESSAGE(WARNING "Could not found CUDA while HIP platform is set to nvcc. Compiling might fail.") - ENDIF() - - IF(CUDA_VERSION VERSION_LESS 9.0) - SET(ALPAKA_CUDA_ARCH "20" CACHE STRING "GPU architecture") - ELSE() - SET(ALPAKA_CUDA_ARCH "30" CACHE STRING "GPU architecture") - ENDIF() - - # CUDA 9.0 removed the __CUDACC_VER__ macro. Boost versions lower than 1.65.1 still use this macro. - IF(CUDA_VERSION VERSION_GREATER_EQUAL 9.0 AND Boost_VERSION VERSION_LESS 1.65.1) - MESSAGE(WARNING "CUDA 9.0 or newer requires boost-1.65.1 or newer!") - SET(_ALPAKA_FOUND FALSE) - ENDIF() - - IF(CUDA_VERSION VERSION_EQUAL 8.0) - LIST(APPEND HIP_HIPCC_FLAGS "-Wno-deprecated-gpu-targets") - ENDIF() - - IF(CUDA_VERSION VERSION_LESS 8.0) - MESSAGE(WARNING "CUDA Toolkit < 8.0 is not supported!") - SET(_ALPAKA_FOUND FALSE) - ENDIF() - - IF(${ALPAKA_DEBUG} GREATER 1) - SET(HIP_VERBOSE_BUILD ON) - ENDIF() - - LIST(APPEND HIP_NVCC_FLAGS "--expt-extended-lambda") - LIST(APPEND HIP_NVCC_FLAGS "--expt-relaxed-constexpr") - LIST(APPEND _ALPAKA_HIP_LIBRARIES "cudart") - - FOREACH(_HIP_ARCH_ELEM ${ALPAKA_CUDA_ARCH}) - # set flags to create device code for the given architecture - LIST(APPEND CUDA_NVCC_FLAGS - --generate-code arch=compute_${_HIP_ARCH_ELEM},code=sm_${_HIP_ARCH_ELEM} - --generate-code arch=compute_${_HIP_ARCH_ELEM},code=compute_${_HIP_ARCH_ELEM} - ) - ENDFOREACH() - # for CUDA cmake adds automatically compiler flags as nvcc does not do this, - # but for HIP we have to do this here - LIST(APPEND HIP_NVCC_FLAGS "-D__CUDACC__") - LIST(APPEND HIP_NVCC_FLAGS "-ccbin ${CMAKE_CXX_COMPILER}") - LIST(APPEND HIP_NVCC_FLAGS "-Xcompiler" "-g") - - IF(CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") - LIST(APPEND HIP_HIPCC_FLAGS "-G") - ENDIF() - # propage host flags - # SET(CUDA_PROPAGATE_HOST_FLAGS ON) # does not exist in HIP, so do it manually - string(TOUPPER "${CMAKE_BUILD_TYPE}" build_config) - FOREACH( _flag ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${build_config}}) - LIST(APPEND HIP_NVCC_FLAGS "-Xcompiler ${_flag}") - ENDFOREACH() - - IF(ALPAKA_HIP_FAST_MATH) - LIST(APPEND HIP_HIPCC_FLAGS "--use_fast_math") - ENDIF() - - IF(ALPAKA_HIP_FTZ) - LIST(APPEND HIP_HIPCC_FLAGS "--ftz=true") - ELSE() - LIST(APPEND HIP_HIPCC_FLAGS "--ftz=false") - ENDIF() - - IF(ALPAKA_HIP_SHOW_REGISTER) - LIST(APPEND HIP_HIPCC_FLAGS "-Xptxas=-v") - ENDIF() - IF(CUDA_VERSION GREATER_EQUAL 9.0) - # avoids warnings on host-device signatured, default constructors/destructors - LIST(APPEND HIP_HIPCC_FLAGS "-Xcudafe --diag_suppress=esa_on_defaulted_function_ignored") - ENDIF() - - # random numbers library ( HIP(NVCC) ) /hiprand - # HIP_ROOT_DIR is set by FindHIP.cmake - FIND_PATH(HIP_RAND_INC - NAMES "hiprand_kernel.h" - PATHS "${HIP_ROOT_DIR}/hiprand" "${HIP_ROOT_DIR}/include" "hiprand" - PATHS "/opt/rocm/rocrand/hiprand" - PATH_SUFFIXES "include" "hiprand") - FIND_LIBRARY(HIP_RAND_LIBRARY - NAMES "hiprand-d" "hiprand" - PATHS "${HIP_ROOT_DIR}/hiprand" "${HIP_ROOT_DIR}" "hiprand" - PATHS "/opt/rocm/rocrand/hiprand" - ENV HIP_PATH - PATH_SUFFIXES "lib" "lib64") - IF(NOT HIP_RAND_INC) - MESSAGE(FATAL_ERROR "Could not find hipRAND include (also searched in: HIP_ROOT_DIR=${HIP_ROOT_DIR}).") - ENDIF() - IF(NOT HIP_RAND_LIBRARY) - MESSAGE(FATAL_ERROR "Could not find hipRAND library (also searched in: HIP_ROOT_DIR=${HIP_ROOT_DIR}).") - ENDIF() - LIST(APPEND _ALPAKA_INCLUDE_DIRECTORIES_PUBLIC "${HIP_RAND_INC}") - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC "${HIP_RAND_LIBRARY}") - ENDIF() # nvcc - - IF(ALPAKA_HIP_PLATFORM MATCHES "hcc") - - # random numbers library ( HIP(HCC) ) /rocrand - FIND_PATH(ROC_RAND_INC - rocrand_kernel.h - PATHS "${HIP_ROOT_DIR}/rocrand" "${HIP_ROOT_DIR}" "rocrand" - PATHS "/opt/rocm/rocrand" - ENV HIP_PATH - PATH_SUFFIXES "include") - FIND_LIBRARY(ROC_RAND_LIBRARY - rocrand-d - rocrand - PATHS "${HIP_ROOT_DIR}/rocrand" "${HIP_ROOT_DIR}" "rocrand" - PATHS "/opt/rocm/rocrand" - ENV HIP_PATH - PATH_SUFFIXES "lib" "lib64") - - # random numbers library ( HIP(HCC) ) rocrand/hiprand - FIND_PATH(HIP_RAND_INC - hiprand_kernel.h - PATHS "${HIP_ROOT_DIR}/hiprand" "${HIP_ROOT_DIR}" "hiprand" - PATHS "/opt/rocm/hiprand" - ENV HIP_PATH - PATH_SUFFIXES "include") - FIND_LIBRARY(HIP_RAND_LIBRARY - hiprand-d - hiprand - PATHS "${HIP_ROOT_DIR}/hiprand" "${HIP_ROOT_DIR}" "hiprand" - PATHS "/opt/rocm/hiprand" - ENV HIP_PATH - PATH_SUFFIXES "lib" "lib64") - IF(NOT HIP_RAND_INC OR NOT HIP_RAND_LIBRARY) - MESSAGE(FATAL_ERROR "Could not find hipRAND library") - ENDIF() - LIST(APPEND _ALPAKA_INCLUDE_DIRECTORIES_PUBLIC "${HIP_RAND_INC}") - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC "${HIP_RAND_LIBRARY}") - - IF(NOT ROC_RAND_INC OR NOT ROC_RAND_LIBRARY) - MESSAGE(FATAL_ERROR "Could not find rocRAND library") - ENDIF() - - LIST(APPEND _ALPAKA_INCLUDE_DIRECTORIES_PUBLIC "${ROC_RAND_INC}") - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC "${ROC_RAND_LIBRARY}") - - ENDIF() - - - LIST(APPEND HIP_HIPCC_FLAGS "-D__HIPCC__") - LIST(APPEND HIP_HIPCC_FLAGS "-std=c++${ALPAKA_CXX_STANDARD}") - - IF(CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") - LIST(APPEND HIP_HIPCC_FLAGS "-g") - ENDIF() - - - IF(ALPAKA_HIP_KEEP_FILES) - MAKE_DIRECTORY("${PROJECT_BINARY_DIR}/hip_tmp") - LIST(APPEND HIP_HIPCC_FLAGS "--keep" "--keep-dir" "${PROJECT_BINARY_DIR}/hip_tmp") - ENDIF() - - OPTION(ALPAKA_HIP_SHOW_CODELINES "Show kernel lines in cuda-gdb and cuda-memcheck" OFF) - IF(ALPAKA_HIP_SHOW_CODELINES) - LIST(APPEND HIP_HIPCC_FLAGS "--source-in-ptx" "-lineinfo") - LIST(APPEND HIP_HIPCC_FLAGS "-Xcompiler" "-rdynamic") - SET(ALPAKA_HIP_KEEP_FILES ON CACHE BOOL "activate keep files" FORCE) - ENDIF() - IF(_ALPAKA_HIP_LIBRARIES) - LIST(APPEND _ALPAKA_LINK_LIBRARIES_PUBLIC "general;${_ALPAKA_HIP_LIBRARIES}") - ENDIF() - ENDIF() - ENDIF() -ENDIF() # HIP - -#------------------------------------------------------------------------------- -# alpaka. -IF(ALPAKA_ACC_GPU_CUDA_ONLY_MODE) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_ACC_GPU_CUDA_ONLY_MODE") - MESSAGE(STATUS ALPAKA_ACC_GPU_CUDA_ONLY_MODE) -ENDIF() - -IF(ALPAKA_ACC_GPU_HIP_ONLY_MODE) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_ACC_GPU_HIP_ONLY_MODE") - MESSAGE(STATUS ALPAKA_ACC_GPU_HIP_ONLY_MODE) -ENDIF() - -IF(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLE) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED") - MESSAGE(STATUS ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED) -ENDIF() -IF(ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLE) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED") - MESSAGE(STATUS ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED) -ENDIF() -IF(ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLE) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLED") - MESSAGE(STATUS ALPAKA_ACC_CPU_B_SEQ_T_FIBERS_ENABLED) -ENDIF() -IF(ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLE) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED") - MESSAGE(STATUS ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED) -ENDIF() -IF(ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLE) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED") - MESSAGE(STATUS ALPAKA_ACC_CPU_B_OMP2_T_SEQ_ENABLED) -ENDIF() -IF(ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLE) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED") - MESSAGE(STATUS ALPAKA_ACC_CPU_B_SEQ_T_OMP2_ENABLED) -ENDIF() -IF(ALPAKA_ACC_CPU_BT_OMP4_ENABLE) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_ACC_CPU_BT_OMP4_ENABLED") - MESSAGE(STATUS ALPAKA_ACC_CPU_BT_OMP4_ENABLED) -ENDIF() -IF(ALPAKA_ACC_GPU_CUDA_ENABLE) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_ACC_GPU_CUDA_ENABLED") - MESSAGE(STATUS ALPAKA_ACC_GPU_CUDA_ENABLED) -ENDIF() -IF(ALPAKA_ACC_GPU_HIP_ENABLE) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_ACC_GPU_HIP_ENABLED") - MESSAGE(STATUS ALPAKA_ACC_GPU_HIP_ENABLED) -ENDIF() - -LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_DEBUG=${ALPAKA_DEBUG}") - -IF(ALPAKA_CI) - LIST(APPEND _ALPAKA_COMPILE_DEFINITIONS_PUBLIC "ALPAKA_CI") -ENDIF() - -SET(_ALPAKA_INCLUDE_DIRECTORY "${_ALPAKA_ROOT_DIR}/include") -SET(_ALPAKA_SUFFIXED_INCLUDE_DIR "${_ALPAKA_INCLUDE_DIRECTORY}/alpaka") - -SET(_ALPAKA_LINK_LIBRARY) - -# # cxx flags will not be forwarded to hip wrapped compiler, so it has to be provided manually -IF(ALPAKA_ACC_GPU_HIP_ENABLE) - SET(_ALPAKA_COMPILE_DEFINITIONS_HIP ${_ALPAKA_COMPILE_DEFINITIONS_PUBLIC}) - LIST_ADD_PREFIX("-D" _ALPAKA_COMPILE_DEFINITIONS_HIP) - LIST(APPEND HIP_HIPCC_FLAGS - ${_ALPAKA_COMPILE_DEFINITIONS_HIP} - ) - HIP_INCLUDE_DIRECTORIES( - # ${_ALPAKA_INCLUDE_DIRECTORY} - # ${_ALPAKA_INCLUDE_DIRECTORIES_PUBLIC} - ${HIP_INCLUDE_DIRS} - ${Boost_INCLUDE_DIRS} - ${_ALPAKA_ROOT_DIR}/test/common/include - ) - - IF(OPENMP_FOUND) # remove fopenmp link from nvcc, otherwise linker error will occur - LIST(REMOVE_ITEM _ALPAKA_LINK_FLAGS_PUBLIC "${OpenMP_CXX_FLAGS}") - LIST(APPEND _ALPAKA_LINK_FLAGS_PUBLIC "-Xcompiler ${OpenMP_CXX_FLAGS}") - ENDIF() - IF(ALPAKA_HIP_PLATFORM MATCHES "hcc") - # GFX600, GFX601, GFX700, GFX701, GFX702, GFX703, GFX704, GFX801, GFX802, GFX803, GFX810, GFX900, GFX902 - SET(_ALPAKA_LINK_LIBRARIES_PUBLIC "${_ALPAKA_LINK_LIBRARIES_PUBLIC}" "--amdgpu-target=gfx803 --amdgpu-target=gfx900 --amdgpu-target=gfx906") - ENDIF() -ENDIF() - -# Add all the source and include files in all recursive subdirectories and group them accordingly. -append_recursive_files_add_to_src_group("${_ALPAKA_SUFFIXED_INCLUDE_DIR}" "${_ALPAKA_SUFFIXED_INCLUDE_DIR}" "hpp" _ALPAKA_FILES_HEADER) -append_recursive_files_add_to_src_group("${_ALPAKA_SUFFIXED_INCLUDE_DIR}" "${_ALPAKA_SUFFIXED_INCLUDE_DIR}" "h" _ALPAKA_FILES_HEADER) - -append_recursive_files_add_to_src_group("${_ALPAKA_ROOT_DIR}/script" "${_ALPAKA_ROOT_DIR}" "sh" _ALPAKA_FILES_SCRIPT) -SET_SOURCE_FILES_PROPERTIES(${_ALPAKA_FILES_SCRIPT} PROPERTIES HEADER_FILE_ONLY TRUE) - -append_recursive_files_add_to_src_group("${_ALPAKA_ROOT_DIR}/cmake" "${_ALPAKA_ROOT_DIR}" "cmake" _ALPAKA_FILES_CMAKE) -LIST(APPEND _ALPAKA_FILES_CMAKE "${_ALPAKA_ROOT_DIR}/alpakaConfig.cmake" "${_ALPAKA_ROOT_DIR}/Findalpaka.cmake" "${_ALPAKA_ROOT_DIR}/CMakeLists.txt" "${_ALPAKA_ROOT_DIR}/cmake/dev.cmake" "${_ALPAKA_ROOT_DIR}/cmake/common.cmake" "${_ALPAKA_ROOT_DIR}/cmake/addExecutable.cmake" "${_ALPAKA_ADD_LIBRRAY_FILE}") -SET_SOURCE_FILES_PROPERTIES(${_ALPAKA_FILES_CMAKE} PROPERTIES HEADER_FILE_ONLY TRUE) - -append_recursive_files_add_to_src_group("${_ALPAKA_ROOT_DIR}/doc/markdown" "${_ALPAKA_ROOT_DIR}" "md" _ALPAKA_FILES_DOC) -SET_SOURCE_FILES_PROPERTIES(${_ALPAKA_FILES_DOC} PROPERTIES HEADER_FILE_ONLY TRUE) - -SET(_ALPAKA_FILES_OTHER "${_ALPAKA_ROOT_DIR}/.gitignore" "${_ALPAKA_ROOT_DIR}/.travis.yml" "${_ALPAKA_ROOT_DIR}/.zenodo.json" "${_ALPAKA_ROOT_DIR}/LICENSE" "${_ALPAKA_ROOT_DIR}/README.md") -SET_SOURCE_FILES_PROPERTIES(${_ALPAKA_FILES_OTHER} PROPERTIES HEADER_FILE_ONLY TRUE) - -#------------------------------------------------------------------------------- -# Target. -IF(NOT TARGET "alpaka") - ADD_LIBRARY("alpaka" INTERFACE) - - # HACK: Workaround for the limitation that files added to INTERFACE targets (target_sources) can not be marked as PUBLIC or PRIVATE but only as INTERFACE. - # Therefore those files will be added to projects "linking" to the INTERFACE library, but are not added to the project itself within an IDE. - add_custom_target("alpakaIde" - SOURCES ${_ALPAKA_FILES_HEADER} ${_ALPAKA_FILES_SCRIPT} ${_ALPAKA_FILES_CMAKE} ${_ALPAKA_FILES_DOC} ${_ALPAKA_FILES_OTHER} - ) - - target_compile_features("alpaka" - INTERFACE cxx_std_${ALPAKA_CXX_STANDARD} - ) - - # Compile options. - IF(${ALPAKA_DEBUG} GREATER 1) - MESSAGE(STATUS "_ALPAKA_COMPILE_OPTIONS_PUBLIC: ${_ALPAKA_COMPILE_OPTIONS_PUBLIC}") - ENDIF() - LIST( - LENGTH - _ALPAKA_COMPILE_OPTIONS_PUBLIC - _ALPAKA_COMPILE_OPTIONS_PUBLIC_LENGTH) - IF(${_ALPAKA_COMPILE_OPTIONS_PUBLIC_LENGTH} GREATER 0) - TARGET_COMPILE_OPTIONS( - "alpaka" - INTERFACE ${_ALPAKA_COMPILE_OPTIONS_PUBLIC}) - ENDIF() - - # Compile definitions. - IF(${ALPAKA_DEBUG} GREATER 1) - MESSAGE(STATUS "_ALPAKA_COMPILE_DEFINITIONS_PUBLIC: ${_ALPAKA_COMPILE_DEFINITIONS_PUBLIC}") - ENDIF() - LIST( - LENGTH - _ALPAKA_COMPILE_DEFINITIONS_PUBLIC - _ALPAKA_COMPILE_DEFINITIONS_PUBLIC_LENGTH) - IF(${_ALPAKA_COMPILE_DEFINITIONS_PUBLIC_LENGTH} GREATER 0) - TARGET_COMPILE_DEFINITIONS( - "alpaka" - INTERFACE ${_ALPAKA_COMPILE_DEFINITIONS_PUBLIC}) - ENDIF() - - # Include directories. - IF(${ALPAKA_DEBUG} GREATER 1) - MESSAGE(STATUS "_ALPAKA_INCLUDE_DIRECTORIES_PUBLIC: ${_ALPAKA_INCLUDE_DIRECTORIES_PUBLIC}") - ENDIF() - LIST( - LENGTH - _ALPAKA_INCLUDE_DIRECTORIES_PUBLIC - _ALPAKA_INCLUDE_DIRECTORIES_PUBLIC_LENGTH) - IF(${_ALPAKA_INCLUDE_DIRECTORIES_PUBLIC_LENGTH} GREATER 0) - TARGET_INCLUDE_DIRECTORIES( - "alpaka" - SYSTEM - INTERFACE ${_ALPAKA_INCLUDE_DIRECTORIES_PUBLIC}) - ENDIF() - # the alpaka library itself - TARGET_INCLUDE_DIRECTORIES( - "alpaka" - INTERFACE ${_ALPAKA_INCLUDE_DIRECTORY} - ) - - # Link libraries. - # There are no PUBLIC_LINK_FLAGS in CMAKE: - # http://stackoverflow.com/questions/26850889/cmake-keeping-link-flags-of-internal-libs - IF(${ALPAKA_DEBUG} GREATER 1) - MESSAGE(STATUS "_ALPAKA_LINK_LIBRARIES_PUBLIC: ${_ALPAKA_LINK_LIBRARIES_PUBLIC}") - ENDIF() - LIST( - LENGTH - _ALPAKA_LINK_LIBRARIES_PUBLIC - _ALPAKA_LINK_LIBRARIES_PUBLIC_LENGTH) - IF(${_ALPAKA_LINK_LIBRARIES_PUBLIC_LENGTH} GREATER 0) - TARGET_LINK_LIBRARIES( - "alpaka" - INTERFACE ${_ALPAKA_LINK_LIBRARIES_PUBLIC} ${_ALPAKA_LINK_FLAGS_PUBLIC}) - ENDIF() -ENDIF() - -# NVCC does not incorporate the COMPILE_OPTIONS of a target but only the CMAKE_CXX_FLAGS -IF((ALPAKA_ACC_GPU_CUDA_ENABLE OR ALPAKA_ACC_GPU_HIP_ENABLE) AND ALPAKA_CUDA_COMPILER MATCHES "nvcc") - STRING(REPLACE ";" " " _ALPAKA_COMPILE_OPTIONS_STRING "${_ALPAKA_COMPILE_OPTIONS_PUBLIC}") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_ALPAKA_COMPILE_OPTIONS_STRING}") -ENDIF() - -#------------------------------------------------------------------------------- -# Find alpaka version. -file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/include/alpaka/version.hpp" ALPAKA_VERSION_MAJOR_HPP REGEX "#define ALPAKA_VERSION_MAJOR ") -file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/include/alpaka/version.hpp" ALPAKA_VERSION_MINOR_HPP REGEX "#define ALPAKA_VERSION_MINOR ") -file(STRINGS "${CMAKE_CURRENT_LIST_DIR}/include/alpaka/version.hpp" ALPAKA_VERSION_PATCH_HPP REGEX "#define ALPAKA_VERSION_PATCH ") - -string(REGEX MATCH "([0-9]+)" ALPAKA_VERSION_MAJOR ${ALPAKA_VERSION_MAJOR_HPP}) -string(REGEX MATCH "([0-9]+)" ALPAKA_VERSION_MINOR ${ALPAKA_VERSION_MINOR_HPP}) -string(REGEX MATCH "([0-9]+)" ALPAKA_VERSION_PATCH ${ALPAKA_VERSION_PATCH_HPP}) - -SET(PACKAGE_VERSION "${ALPAKA_VERSION_MAJOR}.${ALPAKA_VERSION_MINOR}.${ALPAKA_VERSION_PATCH}") - -#------------------------------------------------------------------------------- -# Set return values. -SET(alpaka_VERSION "${ALPAKA_VERSION_MAJOR}.${ALPAKA_VERSION_MINOR}.${ALPAKA_VERSION_PATCH}") -SET(alpaka_COMPILE_OPTIONS ${_ALPAKA_COMPILE_OPTIONS_PUBLIC}) -SET(alpaka_COMPILE_DEFINITIONS ${_ALPAKA_COMPILE_DEFINITIONS_PUBLIC}) -# Add '-D' to the definitions -SET(alpaka_DEFINITIONS ${_ALPAKA_COMPILE_DEFINITIONS_PUBLIC}) -list_add_prefix("-D" alpaka_DEFINITIONS) -# Add the compile options to the definitions. -LIST(APPEND alpaka_DEFINITIONS ${_ALPAKA_COMPILE_OPTIONS_PUBLIC}) -SET(alpaka_INCLUDE_DIR ${_ALPAKA_INCLUDE_DIRECTORY}) -SET(alpaka_INCLUDE_DIRS ${_ALPAKA_INCLUDE_DIRECTORIES_PUBLIC}) -LIST(APPEND alpaka_INCLUDE_DIRS ${_ALPAKA_INCLUDE_DIRECTORY}) -SET(alpaka_LIBRARY ${_ALPAKA_LINK_LIBRARY}) -SET(alpaka_LIBRARIES ${_ALPAKA_LINK_FLAGS_PUBLIC}) -LIST(APPEND alpaka_LIBRARIES ${_ALPAKA_LINK_LIBRARIES_PUBLIC}) - -#------------------------------------------------------------------------------- -# Print the return values. -IF(${ALPAKA_DEBUG} GREATER 0) - MESSAGE(STATUS "alpaka_FOUND: ${alpaka_FOUND}") - MESSAGE(STATUS "alpaka_VERSION: ${alpaka_VERSION}") - MESSAGE(STATUS "alpaka_COMPILE_OPTIONS: ${alpaka_COMPILE_OPTIONS}") - MESSAGE(STATUS "alpaka_COMPILE_DEFINITIONS: ${alpaka_COMPILE_DEFINITIONS}") - MESSAGE(STATUS "alpaka_DEFINITIONS: ${alpaka_DEFINITIONS}") - MESSAGE(STATUS "alpaka_INCLUDE_DIR: ${alpaka_INCLUDE_DIR}") - MESSAGE(STATUS "alpaka_INCLUDE_DIRS: ${alpaka_INCLUDE_DIRS}") - MESSAGE(STATUS "alpaka_LIBRARY: ${alpaka_LIBRARY}") - MESSAGE(STATUS "alpaka_LIBRARIES: ${alpaka_LIBRARIES}") -ENDIF() - -# Unset already set variables if not found. -IF(NOT _ALPAKA_FOUND) - UNSET(alpaka_FOUND) - UNSET(alpaka_VERSION) - UNSET(alpaka_COMPILE_OPTIONS) - UNSET(alpaka_COMPILE_DEFINITIONS) - UNSET(alpaka_DEFINITIONS) - UNSET(alpaka_INCLUDE_DIR) - UNSET(alpaka_INCLUDE_DIRS) - UNSET(alpaka_LIBRARY) - UNSET(alpaka_LIBRARIES) - - UNSET(_ALPAKA_FOUND) - UNSET(_ALPAKA_COMPILE_OPTIONS_PUBLIC) - UNSET(_ALPAKA_COMPILE_DEFINITIONS_PUBLIC) - UNSET(_ALPAKA_COMPILE_DEFINITIONS_HIP) - UNSET(_ALPAKA_HIP_LIBRARIES) - UNSET(_ALPAKA_INCLUDE_DIRECTORY) - UNSET(_ALPAKA_INCLUDE_DIRECTORIES_PUBLIC) - UNSET(_ALPAKA_LINK_LIBRARY) - UNSET(_ALPAKA_LINK_LIBRARIES_PUBLIC) - UNSET(_ALPAKA_LINK_FLAGS_PUBLIC) - UNSET(_ALPAKA_COMMON_FILE) - UNSET(_ALPAKA_ADD_EXECUTABLE_FILE) - UNSET(_ALPAKA_ADD_LIBRARY_FILE) - UNSET(_ALPAKA_FILES_HEADER) - UNSET(_ALPAKA_FILES_OTHER) - UNSET(_ALPAKA_BOOST_MIN_VER) -ELSE() - # Make internal variables advanced options in the GUI. - MARK_AS_ADVANCED( - alpaka_INCLUDE_DIR - alpaka_LIBRARY - _ALPAKA_COMPILE_OPTIONS_PUBLIC - _ALPAKA_COMPILE_DEFINITIONS_PUBLIC - _ALPAKA_INCLUDE_DIRECTORY - _ALPAKA_INCLUDE_DIRECTORIES_PUBLIC - _ALPAKA_LINK_LIBRARY - _ALPAKA_LINK_LIBRARIES_PUBLIC - _ALPAKA_LINK_FLAGS_PUBLIC - _ALPAKA_COMMON_FILE - _ALPAKA_ADD_EXECUTABLE_FILE - _ALPAKA_ADD_LIBRARY_FILE - _ALPAKA_FILES_HEADER - _ALPAKA_FILES_OTHER - _ALPAKA_BOOST_MIN_VER) -ENDIF() - -############################################################################### -# FindPackage options - -# Handles the REQUIRED, QUIET and version-related arguments for FIND_PACKAGE. -# NOTE: We do not check for alpaka_LIBRARIES and alpaka_DEFINITIONS because they can be empty. -INCLUDE(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS( - "alpaka" - FOUND_VAR alpaka_FOUND - REQUIRED_VARS alpaka_INCLUDE_DIR - VERSION_VAR alpaka_VERSION) diff --git a/thirdParty/alpaka/cmake/addExecutable.cmake b/thirdParty/alpaka/cmake/addExecutable.cmake deleted file mode 100644 index b602374544..0000000000 --- a/thirdParty/alpaka/cmake/addExecutable.cmake +++ /dev/null @@ -1,64 +0,0 @@ -# -# Copyright 2014-2019 Benjamin Worpitz -# -# This file is part of Alpaka. -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# - -CMAKE_MINIMUM_REQUIRED(VERSION 3.11.4) - -#------------------------------------------------------------------------------ -# Calls CUDA_ADD_EXECUTABLE or ADD_EXECUTABLE depending on the enabled alpaka accelerators. -# Using a macro to stay in the scope (fixes lost assignment of linker command in FindHIP.cmake) -# https://github.com/ROCm-Developer-Tools/HIP/issues/631 -MACRO(ALPAKA_ADD_EXECUTABLE In_Name) - IF(ALPAKA_ACC_GPU_CUDA_ENABLE) - IF(ALPAKA_CUDA_COMPILER MATCHES "clang") - FOREACH(_file ${ARGN}) - IF((${_file} MATCHES "\\.cpp$") OR (${_file} MATCHES "\\.cxx$") OR (${_file} MATCHES "\\.cu$")) - SET_SOURCE_FILES_PROPERTIES(${_file} PROPERTIES COMPILE_FLAGS "-x cuda") - ENDIF() - ENDFOREACH() - ADD_EXECUTABLE( - ${In_Name} - ${ARGN}) - ELSE() - FOREACH(_file ${ARGN}) - IF((${_file} MATCHES "\\.cpp$") OR (${_file} MATCHES "\\.cxx$")) - SET_SOURCE_FILES_PROPERTIES(${_file} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ) - ENDIF() - ENDFOREACH() - IF (CMAKE_VERSION VERSION_LESS 3.9.0) - CMAKE_POLICY(SET CMP0023 OLD) # CUDA_ADD_EXECUTABLE calls TARGET_LINK_LIBRARIES without keywords. - ELSE() - SET(CUDA_LINK_LIBRARIES_KEYWORD "PUBLIC") - ENDIF() - CUDA_ADD_EXECUTABLE( - ${In_Name} - ${ARGN}) - ENDIF() - ELSEIF(ALPAKA_ACC_GPU_HIP_ENABLE) - FOREACH(_file ${ARGN}) - IF((${_file} MATCHES "\\.cpp$") OR (${_file} MATCHES "\\.cxx$")) - SET_SOURCE_FILES_PROPERTIES(${_file} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT OBJ) - ENDIF() - ENDFOREACH() - IF (CMAKE_VERSION VERSION_LESS 3.9.0) - CMAKE_POLICY(SET CMP0023 OLD) # CUDA_ADD_EXECUTABLE calls TARGET_LINK_LIBRARIES without keywords. - ELSE() - SET(HIP_LINK_LIBRARIES_KEYWORD "PUBLIC") - ENDIF() - - HIP_ADD_EXECUTABLE( - ${In_Name} - ${ARGN}) - - ELSE() - ADD_EXECUTABLE( - ${In_Name} - ${ARGN}) - ENDIF() -ENDMACRO() diff --git a/thirdParty/alpaka/cmake/addLibrary.cmake b/thirdParty/alpaka/cmake/addLibrary.cmake deleted file mode 100644 index 2d1c497341..0000000000 --- a/thirdParty/alpaka/cmake/addLibrary.cmake +++ /dev/null @@ -1,155 +0,0 @@ -# -# Copyright 2015-2019 Benjamin Worpitz, Maximilian Knespel -# -# This file is part of Alpaka. -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# - -CMAKE_MINIMUM_REQUIRED(VERSION 3.11.4) - -#------------------------------------------------------------------------------ -# Calls CUDA_ADD_LIBRARY or ADD_LIBRARY depending on the enabled alpaka -# accelerators. -# -# ALPAKA_ADD_LIBRARY( cuda_target file0 file1 ... [STATIC | SHARED | MODULE] -# [EXCLUDE_FROM_ALL] [OPTIONS ... ] ) -# -# In order to be compliant with both ADD_LIBRARY and CUDA_ADD_LIBRARY -# the position of STATIC, SHARED, MODULE, EXCLUDE_FROM_ALL options don't matter. -# This also means you won't be able to include files with those exact same -# case-sensitive names. -# After OPTIONS only nvcc compiler flags are allowed though. And for readiblity -# and portability you shouldn't completely mix STATIC, ... with the source -# code filenames! -# OPTIONS and the arguments thereafter are ignored if not using CUDA, they -# won't throw an error in that case. -MACRO(ALPAKA_ADD_LIBRARY libraryName) - # CUDA_ADD_LIBRARY( cuda_target file0 file1 ... - # [STATIC | SHARED | MODULE] - # [EXCLUDE_FROM_ALL] [OPTIONS ... ] ) - # add_library( [STATIC | SHARED | MODULE] - # [EXCLUDE_FROM_ALL] - # source1 [source2 ...] ) - - # traverse arguments and sort them by option and source files - SET( arguments ${ARGN} ) - SET( optionsEncountered OFF ) - UNSET( libraryType ) - UNSET( excludeFromAll ) - UNSET( optionArguments ) - FOREACH( argument IN LISTS arguments ) - # 1.) check for OPTIONS - IF( argument STREQUAL "OPTIONS" ) - IF ( optionsEncountered ) - MESSAGE( FATAL_ERROR "[ALPAKA_ADD_LIBRARY] OPTIONS subcommand specified more than one time. This is not allowed!" ) - ELSE() - SET( optionsEncountered ON ) - ENDIF() - ENDIF() - - # 2.) check if inside OPTIONS, because then all other checks are - # unnecessary although they could give hints about wrong locations - # of those subcommands - IF( optionsEncountered ) - LIST( APPEND optionArguments "${argument}" ) - CONTINUE() - ENDIF() - - # 3.) check for libraryType and EXCLUDE_FROM_ALL - IF( ( argument STREQUAL "STATIC" ) OR - ( argument STREQUAL "SHARED" ) OR - ( argument STREQUAL "MODULE" ) - ) - IF( DEFINED libraryType ) - message( FATAL_ERROR "Setting more than one library type option ( STATIC SHARED MODULE ) not allowed!" ) - ENDIF() - set( libraryType ${argument} ) - CONTINUE() - ENDIF() - IF( argument STREQUAL "EXCLUDE_FROM_ALL" ) - SET( excludeFromAll ${argument} ) - CONTINUE() - ENDIF() - - # 4.) ELSE the argument is a file name - list( APPEND sourceFileNames "${argument}" ) - ENDFOREACH() - UNSET( optionsEncountered ) - #message( "libraryType = ${libraryType}" ) - #message( "sourceFileNames = ${sourceFileNames}" ) - - # call add_library or cuda_add_library now - IF( ALPAKA_ACC_GPU_CUDA_ENABLE ) - IF(ALPAKA_CUDA_COMPILER MATCHES "clang") - FOREACH( _file ${ARGN} ) - IF( ( ${_file} MATCHES "\\.cpp$" ) OR - ( ${_file} MATCHES "\\.cxx$" ) OR - ( ${_file} MATCHES "\\.cu$" ) - ) - SET_SOURCE_FILES_PROPERTIES( ${_file} PROPERTIES COMPILE_FLAGS "-x cuda" ) - ENDIF() - ENDFOREACH() - ADD_LIBRARY( - ${libraryName} - ${sourceFileNames} - ${libraryType} - ${excludeFromAll} - ${optionArguments} - ) - ELSE() - FOREACH( _file ${ARGN} ) - IF( ( ${_file} MATCHES "\\.cpp$" ) OR - ( ${_file} MATCHES "\\.cxx$" ) - ) - SET_SOURCE_FILES_PROPERTIES( ${_file} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ ) - ENDIF() - ENDFOREACH() - IF (CMAKE_VERSION VERSION_LESS 3.9.0) - CMAKE_POLICY(SET CMP0023 OLD) # CUDA_ADD_EXECUTABLE calls TARGET_LINK_LIBRARIES without keywords. - ELSE() - SET(CUDA_LINK_LIBRARIES_KEYWORD "PUBLIC") - ENDIF() - CUDA_ADD_LIBRARY( - ${libraryName} - ${sourceFileNames} - ${libraryType} - ${excludeFromAll} - ${optionArguments} - ) - ENDIF() - ELSEIF( ALPAKA_ACC_GPU_HIP_ENABLE ) - FOREACH( _file ${ARGN} ) - IF( ( ${_file} MATCHES "\\.cpp$" ) OR - ( ${_file} MATCHES "\\.cxx$" ) - ) - SET_SOURCE_FILES_PROPERTIES( ${_file} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT OBJ ) - ENDIF() - ENDFOREACH() - CMAKE_POLICY(SET CMP0023 OLD) # CUDA_ADD_LIBRARY calls TARGET_LINK_LIBRARIES without keywords. - HIP_ADD_LIBRARY( - ${libraryName} - ${sourceFileNames} - ${libraryType} - ${excludeFromAll} - ${optionArguments} - ) - - ELSE() - #message( "add_library( ${libraryName} ${libraryType} ${excludeFromAll} ${sourceFileNames} )" ) - ADD_LIBRARY( - ${libraryName} - ${libraryType} - ${excludeFromAll} - ${sourceFileNames} - ) - ENDIF() - - # UNSET variables (not sure if necessary) - UNSET( libraryType ) - UNSET( sourceFileNames ) - UNSET( excludeFromAll ) - UNSET( optionArguments ) -ENDMACRO() diff --git a/thirdParty/alpaka/cmake/common.cmake b/thirdParty/alpaka/cmake/common.cmake deleted file mode 100644 index af212c1c2e..0000000000 --- a/thirdParty/alpaka/cmake/common.cmake +++ /dev/null @@ -1,212 +0,0 @@ -# -# Copyright 2014-2019 Benjamin Worpitz -# -# This file is part of Alpaka. -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# - -#------------------------------------------------------------------------------ -# Gets all recursive files with the given ending in the given directory and recursively below. -# This makes adding files easier because we do not have to update a list each time a file is added but this prevents CMake from detecting if it should be rerun! -FUNCTION(append_recursive_files In_RootDir In_FileExtension Out_FilePathsListVariableName) - #MESSAGE("In_RootDir: ${In_RootDir}") - #MESSAGE("In_FileExtension: ${In_FileExtension}") - #MESSAGE("Out_FilePathsListVariableName: ${Out_FilePathsListVariableName}") - # Get all recursive files. - FILE( - GLOB_RECURSE - relativeFilePathsList - "${In_RootDir}/*.${In_FileExtension}") - #MESSAGE( "relativeFilePathsList: ${relativeFilePathsList}" ) - # Set the return value (append it to the value in the parent scope). - SET( - ${Out_FilePathsListVariableName} - "${${Out_FilePathsListVariableName}}" "${relativeFilePathsList}" - PARENT_SCOPE) -ENDFUNCTION() - -#------------------------------------------------------------------------------ -# Gets all recursive relative subdirectories. -FUNCTION(append_recursive_relative_subdirs In_RootDir Out_RecursiveRelativeSubDirsVariableName) - #MESSAGE("In_RootDir: ${In_RootDir}") - # Get all the recursive files with their relative paths. - FILE( - GLOB_RECURSE - recursiveRelativeFiles - RELATIVE "${In_RootDir}/" "${In_RootDir}/*") - #MESSAGE("recursiveRelativeFiles: ${recursiveRelativeFiles}") - - # Get the paths to all the recursive files. - # Create empty list for the case of no subdirectories being present. - SET(recursiveRelativeSubDirs) - FOREACH( - relativeFilePath - IN LISTS recursiveRelativeFiles) - GET_FILENAME_COMPONENT( - relativeSubDir - "${relativeFilePath}" - PATH) - LIST( - APPEND - recursiveRelativeSubDirs - "${relativeSubDir}") - ENDFOREACH() - #MESSAGE("recursiveRelativeSubDirs: ${recursiveRelativeSubDirs}") - - # If the list is not empty. - LIST( - LENGTH - recursiveRelativeSubDirs - recursiveRelativeSubDirsLength) - IF("${recursiveRelativeSubDirsLength}") - # Remove duplicates from the list. - LIST( - REMOVE_DUPLICATES - recursiveRelativeSubDirs) - #MESSAGE("recursiveRelativeSubDirs: ${recursiveRelativeSubDirs}") - - # Set the return value (append it to the value in the parent scope). - #MESSAGE("Out_RecursiveRelativeSubDirsVariableName: ${Out_RecursiveRelativeSubDirsVariableName}") - SET( - ${Out_RecursiveRelativeSubDirsVariableName} - "${${Out_RecursiveRelativeSubDirsVariableName}}" "${recursiveRelativeSubDirs}" - PARENT_SCOPE) - ENDIF() -ENDFUNCTION() - -#------------------------------------------------------------------------------ -# Groups the files in the same way the directories are structured. -FUNCTION(add_recursive_files_to_src_group In_RootDir In_SrcGroupIgnorePrefix In_FileExtension) - #MESSAGE("In_RootDir: ${In_RootDir}") - #MESSAGE("In_SrcGroupIgnorePrefix: ${In_SrcGroupIgnorePrefix}") - #MESSAGE("In_FileExtension: ${In_FileExtension}") - SET(recursiveRelativeSubDirs) - # Get all recursive subdirectories. - append_recursive_relative_subdirs( - "${In_RootDir}" - recursiveRelativeSubDirs) - #MESSAGE("recursiveRelativeSubDirs: ${recursiveRelativeSubDirs}") - - # For the folder itself and each sub-folder... - FOREACH( - currentRelativeSubDir - IN - LISTS recursiveRelativeSubDirs - ITEMS "") - # Appended the current subdirectory. - IF(currentRelativeSubDir STREQUAL "") - SET( - currentSubDir - "${In_RootDir}") - ELSE() - SET( - currentSubDir - "${In_RootDir}/${currentRelativeSubDir}") - ENDIF() - #MESSAGE("currentSubDir: ${currentSubDir}") - # Get all the files in this sub-folder. - SET( - wildcardFilePath - "${currentSubDir}/*.${In_FileExtension}") - #MESSAGE("wildcardFilePath: ${wildcardFilePath}") - FILE( - GLOB - filesInSubDirList - "${wildcardFilePath}") - #MESSAGE("filesInSubDirList: ${filesInSubDirList}") - - LIST( - LENGTH - filesInSubDirList - filesInSubDirListLength) - IF("${filesInSubDirListLength}") - # Group the include files into a project sub-folder analogously to the filesystem hierarchy. - SET( - groupExpression - "${currentSubDir}") - #MESSAGE("groupExpression: ${groupExpression}") - # Remove the parent directory from the path. - # NOTE: This is not correct because it does not only replace at the beginning of the string. - # "STRING(REGEX REPLACE" would be correct if there was an easy way to escape arbitrary strings. - STRING( - REPLACE "${In_SrcGroupIgnorePrefix}" "" - groupExpression - "${groupExpression}") - # Remove leading slash. - STRING( - REGEX REPLACE "^/" "" - groupExpression - "${groupExpression}") - #MESSAGE("groupExpression: ${groupExpression}") - # Replace the directory separators in the path to build valid grouping expressions. - STRING( - REPLACE "/" "\\" - groupExpression - "${groupExpression}") - #MESSAGE("groupExpression: ${groupExpression}") - SOURCE_GROUP( - "${groupExpression}" - FILES ${filesInSubDirList}) - ENDIF() - ENDFOREACH() -ENDFUNCTION() - -#------------------------------------------------------------------------------ -# Gets all files with the given ending in the given directory. -# Groups the files in the same way the directories are structured. -# This makes adding files easier because we do not have to update a list each time a file is added but this prevents CMake from detecting if it should be rerun! -FUNCTION(append_recursive_files_add_to_src_group In_RootDir In_SrcGroupIgnorePrefix In_FileExtension Out_FilePathsListVariableName) - #MESSAGE("In_RootDir: ${In_RootDir}") - #MESSAGE("In_SrcGroupIgnorePrefix: ${In_SrcGroupIgnorePrefix}") - #MESSAGE("In_FileExtension: ${In_FileExtension}") - #MESSAGE("Out_FilePathsListVariableName: ${Out_FilePathsListVariableName}") - # We have to use a local variable and give it to the parent because append_recursive_files only gives it to our scope but not the one calling this function. - SET( - allFilePathsList - "${${Out_FilePathsListVariableName}}") - append_recursive_files( - "${In_RootDir}" - "${In_FileExtension}" - allFilePathsList) - #MESSAGE( "allFilePathsList: ${allFilePathsList}" ) - # Set the return value (append it to the value in the parent scope). - SET( - ${Out_FilePathsListVariableName} - "${${Out_FilePathsListVariableName}}" "${allFilePathsList}" - PARENT_SCOPE) - - add_recursive_files_to_src_group( - "${In_RootDir}" - "${In_SrcGroupIgnorePrefix}" - "${In_FileExtension}") -ENDFUNCTION() - -#------------------------------------------------------------------------------ -# void list_add_prefix(string In_Prefix, list* In_ListVariableName); -# - returns The In_ListVariableName with In_Prefix prepended to all items. -# - original list is modified -FUNCTION(list_add_prefix In_Prefix In_ListVariableName) - SET(local_list) - - FOREACH( - item - IN LISTS ${In_ListVariableName}) - IF(POLICY CMP0054) - CMAKE_POLICY(SET CMP0054 NEW) # Only interpret if() arguments as variables or keywords when unquoted. - ENDIF() - IF(NOT "${item}" STREQUAL "") - LIST( - APPEND - local_list - "${In_Prefix}${item}") - ENDIF() - ENDFOREACH() - - SET( - ${In_ListVariableName} - "${local_list}" - PARENT_SCOPE) -ENDFUNCTION() diff --git a/thirdParty/alpaka/cmake/dev.cmake b/thirdParty/alpaka/cmake/dev.cmake deleted file mode 100644 index e1ca9d99ca..0000000000 --- a/thirdParty/alpaka/cmake/dev.cmake +++ /dev/null @@ -1,146 +0,0 @@ -# -# Copyright 2014-2019 Benjamin Worpitz -# -# This file is part of Alpaka. -# -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at http://mozilla.org/MPL/2.0/. -# - -#------------------------------------------------------------------------------- -# Compiler settings. -#------------------------------------------------------------------------------- -# By marking the boost headers as system headers, warnings produced within them are ignored. -# Marking the boost headers as system headers does not work for nvcc (FindCUDA always uses -I) -TARGET_INCLUDE_DIRECTORIES( - "alpaka" - SYSTEM - INTERFACE ${Boost_INCLUDE_DIRS}) - -#MSVC -IF(MSVC) - # Force to always compile with W4 and WX - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "/W4") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "/WX") - # Improve debugging. - IF(CMAKE_BUILD_TYPE MATCHES "Debug") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-d2Zi+") - ENDIF() - IF(MSVC_VERSION GREATER 1900) - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "/permissive-") - IF(MSVC_VERSION GREATER 1910) - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "/Zc:twoPhase-") - ENDIF() - ENDIF() - IF(MSVC_VERSION GREATER 1800) - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "/Zc:throwingNew" "/Zc:strictStrings") - ENDIF() -ELSE() - IF(NOT(ALPAKA_ACC_GPU_CUDA_ENABLE) OR ALPAKA_CUDA_COMPILER MATCHES "clang" - OR(ALPAKA_ACC_GPU_HIP_ENABLE AND HIP_PLATFORM MATCHES "nvcc")) - # GNU - IF(CMAKE_COMPILER_IS_GNUCXX) - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wall") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wextra") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-pedantic") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Werror") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wdouble-promotion") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wmissing-include-dirs") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wunknown-pragmas") - # Higher levels (max is 5) produce some strange warnings - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wstrict-overflow=2") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wtrampolines") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wfloat-equal") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wundef") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wshadow") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wcast-qual") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wcast-align") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wwrite-strings") - # Too noisy as it warns for every operation using numeric types smaller then int. - # Such values are converted to int implicitly before the calculation is done. - # E.g.: uint16_t = uint16_t * uint16_t will trigger the following warning: - # conversion to ‘short unsigned int’ from ‘int’ may alter its value - #LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wconversion") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wsign-conversion") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wvector-operation-performance") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wzero-as-null-pointer-constant") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wdate-time") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wuseless-cast") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wlogical-op") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wno-aggressive-loop-optimizations") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wmissing-declarations") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wno-multichar") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wopenmp-simd") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wpacked") - # Too much noise - #LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wpadded") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wredundant-decls") - # Too much noise - #LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Winline") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wdisabled-optimization") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wformat-nonliteral") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wformat-security") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wformat-y2k") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wctor-dtor-privacy") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wdelete-non-virtual-dtor") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wliteral-suffix") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wnon-virtual-dtor") - # This warns about members that have not explicitly been listed in the constructor initializer list. - # This could be useful even for members that have a default constructor. - # However, it also issues this warning for defaulted constructurs. - #LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Weffc++") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Woverloaded-virtual") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wsign-promo") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wconditionally-supported") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wnoexcept") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wold-style-cast") - IF(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wsuggest-final-types") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wsuggest-final-methods") - # This does not work correctly as it suggests override to methods that are already marked with final. - # Because final implies override, this is not useful. - #LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wsuggest-override") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wnormalized") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wformat-signedness") - ENDIF() - IF(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.0) - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wnull-dereference") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wduplicated-cond") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wsubobject-linkage") - ENDIF() - IF(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0) - # This warning might be useful but it is triggered by comile-time code where it does not make any sense: - # E.g. "vec::Vec, TElem>" when both values are equal - #LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wduplicated-branches") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Walloc-zero") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Walloca") - ENDIF() - IF(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8.0) - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wcast-align=strict") - ENDIF() - - # Clang or AppleClang - ELSEIF(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Werror") - # Weverything really means everything (including Wall, Wextra, pedantic, ...) - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Weverything") - # We are not C++98 compatible (we use C++11 features) - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wno-c++98-compat") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wno-c++98-compat-pedantic") - # The following warnings are triggered by all instantiations of BOOST_AUTO_TEST_SUITE - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wno-disabled-macro-expansion") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wno-global-constructors") - # This padding warning is generated by the execution tasks depending on the argument types - # as they are stored as members. Therefore, the padding warning is triggered by the calling code - # and does not indicate a failure within alpaka. - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wno-padded") - # ICC - ELSEIF(${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Wall") - # PGI - ELSEIF(${CMAKE_CXX_COMPILER_ID} STREQUAL "PGI") - LIST(APPEND ALPAKA_DEV_COMPILE_OPTIONS "-Minform=inform") - ENDIF() - ENDIF() -ENDIF() diff --git a/thirdParty/alpaka/cmake/modules/FindHIP.cmake b/thirdParty/alpaka/cmake/modules/FindHIP.cmake deleted file mode 100644 index dd55e18228..0000000000 --- a/thirdParty/alpaka/cmake/modules/FindHIP.cmake +++ /dev/null @@ -1,601 +0,0 @@ -# /* -# Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# */ - -############################################################################### -# FindHIP.cmake -############################################################################### - -############################################################################### -# SET: Variable defaults -############################################################################### -# User defined flags -set(HIP_HIPCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HIPCC") -set(HIP_HCC_FLAGS "" CACHE STRING "Semicolon delimited flags for HCC") -set(HIP_NVCC_FLAGS "" CACHE STRING "Semicolon delimted flags for NVCC") -mark_as_advanced(HIP_HIPCC_FLAGS HIP_HCC_FLAGS HIP_NVCC_FLAGS) -set(_hip_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo) -list(REMOVE_DUPLICATES _hip_configuration_types) -foreach(config ${_hip_configuration_types}) - string(TOUPPER ${config} config_upper) - set(HIP_HIPCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HIPCC") - set(HIP_HCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for HCC") - set(HIP_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semicolon delimited flags for NVCC") - mark_as_advanced(HIP_HIPCC_FLAGS_${config_upper} HIP_HCC_FLAGS_${config_upper} HIP_NVCC_FLAGS_${config_upper}) -endforeach() -option(HIP_HOST_COMPILATION_CPP "Host code compilation mode" ON) -option(HIP_VERBOSE_BUILD "Print out the commands run while compiling the HIP source file. With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF) -mark_as_advanced(HIP_HOST_COMPILATION_CPP) - -############################################################################### -# Set HIP CMAKE Flags -############################################################################### -# Copy the invocation styles from CXX to HIP -set(CMAKE_HIP_ARCHIVE_CREATE ${CMAKE_CXX_ARCHIVE_CREATE}) -set(CMAKE_HIP_ARCHIVE_APPEND ${CMAKE_CXX_ARCHIVE_APPEND}) -set(CMAKE_HIP_ARCHIVE_FINISH ${CMAKE_CXX_ARCHIVE_FINISH}) -set(CMAKE_SHARED_LIBRARY_SONAME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_SONAME_CXX_FLAG}) -set(CMAKE_SHARED_LIBRARY_CREATE_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS}) -set(CMAKE_SHARED_LIBRARY_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_CXX_FLAGS}) -#set(CMAKE_SHARED_LIBRARY_LINK_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS}) -set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG}) -set(CMAKE_SHARED_LIBRARY_RUNTIME_HIP_FLAG_SEP ${CMAKE_SHARED_LIBRARY_RUNTIME_CXX_FLAG_SEP}) -set(CMAKE_SHARED_LIBRARY_LINK_STATIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_STATIC_CXX_FLAGS}) -set(CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_HIP_FLAGS ${CMAKE_SHARED_LIBRARY_LINK_DYNAMIC_CXX_FLAGS}) - -# Set the CMake Flags to use the HCC Compilier. -set(CMAKE_HIP_CREATE_SHARED_LIBRARY "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o ") -set(CMAKE_HIP_CREATE_SHARED_MODULE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o -shared" ) -set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_PATH} -o ") - -############################################################################### -# FIND: HIP and associated helper binaries -############################################################################### -# HIP is supported on Linux only -if(UNIX AND NOT APPLE AND NOT CYGWIN) - # Search for HIP installation - if(NOT HIP_ROOT_DIR) - # Search in user specified path first - find_path( - HIP_ROOT_DIR - NAMES hipconfig - PATHS - ENV ROCM_PATH - ENV HIP_PATH - PATH_SUFFIXES bin - DOC "HIP installed location" - NO_DEFAULT_PATH - ) - # Now search in default path - find_path( - HIP_ROOT_DIR - NAMES hipconfig - PATHS - /opt/rocm - /opt/rocm/hip - PATH_SUFFIXES bin - DOC "HIP installed location" - ) - - # Check if we found HIP installation - if(HIP_ROOT_DIR) - # If so, fix the path - string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" HIP_ROOT_DIR ${HIP_ROOT_DIR}) - # And push it back to the cache - set(HIP_ROOT_DIR ${HIP_ROOT_DIR} CACHE PATH "HIP installed location" FORCE) - endif() - if(NOT EXISTS ${HIP_ROOT_DIR}) - if(HIP_FIND_REQUIRED) - message(FATAL_ERROR "Specify HIP_ROOT_DIR") - elseif(NOT HIP_FIND_QUIETLY) - message("HIP_ROOT_DIR not found or specified") - endif() - endif() - endif() - - # Find HIPCC executable - find_program( - HIP_HIPCC_EXECUTABLE - NAMES hipcc - PATHS - "${HIP_ROOT_DIR}" - ENV ROCM_PATH - ENV HIP_PATH - /opt/rocm - /opt/rocm/hip - PATH_SUFFIXES bin - NO_DEFAULT_PATH - ) - if(NOT HIP_HIPCC_EXECUTABLE) - # Now search in default paths - find_program(HIP_HIPCC_EXECUTABLE hipcc) - endif() - mark_as_advanced(HIP_HIPCC_EXECUTABLE) - - # Find HIPCONFIG executable - find_program( - HIP_HIPCONFIG_EXECUTABLE - NAMES hipconfig - PATHS - "${HIP_ROOT_DIR}" - ENV ROCM_PATH - ENV HIP_PATH - /opt/rocm - /opt/rocm/hip - PATH_SUFFIXES bin - NO_DEFAULT_PATH - ) - if(NOT HIP_HIPCONFIG_EXECUTABLE) - # Now search in default paths - find_program(HIP_HIPCONFIG_EXECUTABLE hipconfig) - endif() - mark_as_advanced(HIP_HIPCONFIG_EXECUTABLE) - - # Find HIPCC_CMAKE_LINKER_HELPER executable - find_program( - HIP_HIPCC_CMAKE_LINKER_HELPER - NAMES hipcc_cmake_linker_helper - PATHS - "${HIP_ROOT_DIR}" - ENV ROCM_PATH - ENV HIP_PATH - /opt/rocm - /opt/rocm/hip - PATH_SUFFIXES bin - NO_DEFAULT_PATH - ) - if(NOT HIP_HIPCC_CMAKE_LINKER_HELPER) - # Now search in default paths - find_program(HIP_HIPCC_CMAKE_LINKER_HELPER hipcc_cmake_linker_helper) - endif() - mark_as_advanced(HIP_HIPCC_CMAKE_LINKER_HELPER) - - if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_VERSION) - # Compute the version - execute_process( - COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --version - OUTPUT_VARIABLE _hip_version - ERROR_VARIABLE _hip_error - OUTPUT_STRIP_TRAILING_WHITESPACE - ERROR_STRIP_TRAILING_WHITESPACE - ) - if(NOT _hip_error) - set(HIP_VERSION ${_hip_version} CACHE STRING "Version of HIP as computed from hipcc") - else() - set(HIP_VERSION "0.0.0" CACHE STRING "Version of HIP as computed by FindHIP()") - endif() - mark_as_advanced(HIP_VERSION) - endif() - if(HIP_VERSION) - string(REPLACE "." ";" _hip_version_list "${HIP_VERSION}") - list(GET _hip_version_list 0 HIP_VERSION_MAJOR) - list(GET _hip_version_list 1 HIP_VERSION_MINOR) - list(GET _hip_version_list 2 HIP_VERSION_PATCH) - set(HIP_VERSION_STRING "${HIP_VERSION}") - endif() - - if(HIP_HIPCONFIG_EXECUTABLE AND NOT HIP_PLATFORM) - # Compute the platform - execute_process( - COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --platform - OUTPUT_VARIABLE _hip_platform - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - set(HIP_PLATFORM ${_hip_platform} CACHE STRING "HIP platform as computed by hipconfig") - mark_as_advanced(HIP_PLATFORM) - endif() -endif() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args( - HIP - REQUIRED_VARS - HIP_ROOT_DIR - HIP_HIPCC_EXECUTABLE - HIP_HIPCONFIG_EXECUTABLE - HIP_PLATFORM - VERSION_VAR HIP_VERSION - ) - -############################################################################### -# MACRO: Locate helper files -############################################################################### -macro(HIP_FIND_HELPER_FILE _name _extension) - set(_hip_full_name "${_name}.${_extension}") - get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) - set(HIP_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindHIP/${_hip_full_name}") - if(NOT EXISTS "${HIP_${_name}}") - set(error_message "${_hip_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindHIP") - if(HIP_FIND_REQUIRED) - message(FATAL_ERROR "${error_message}") - else() - if(NOT HIP_FIND_QUIETLY) - message(STATUS "${error_message}") - endif() - endif() - endif() - # Set this variable as internal, so the user isn't bugged with it. - set(HIP_${_name} ${HIP_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE) -endmacro() - -############################################################################### -hip_find_helper_file(run_make2cmake cmake) -hip_find_helper_file(run_hipcc cmake) -############################################################################### - -############################################################################### -# MACRO: Reset compiler flags -############################################################################### -macro(HIP_RESET_FLAGS) - unset(HIP_HIPCC_FLAGS) - unset(HIP_HCC_FLAGS) - unset(HIP_NVCC_FLAGS) - foreach(config ${_hip_configuration_types}) - string(TOUPPER ${config} config_upper) - unset(HIP_HIPCC_FLAGS_${config_upper}) - unset(HIP_HCC_FLAGS_${config_upper}) - unset(HIP_NVCC_FLAGS_${config_upper}) - endforeach() -endmacro() - -############################################################################### -# MACRO: Separate the options from the sources -############################################################################### -macro(HIP_GET_SOURCES_AND_OPTIONS _sources _cmake_options _hipcc_options _hcc_options _nvcc_options) - set(${_sources}) - set(${_cmake_options}) - set(${_hipcc_options}) - set(${_hcc_options}) - set(${_nvcc_options}) - set(_hipcc_found_options FALSE) - set(_hcc_found_options FALSE) - set(_nvcc_found_options FALSE) - foreach(arg ${ARGN}) - if("x${arg}" STREQUAL "xHIPCC_OPTIONS") - set(_hipcc_found_options TRUE) - set(_hcc_found_options FALSE) - set(_nvcc_found_options FALSE) - elseif("x${arg}" STREQUAL "xHCC_OPTIONS") - set(_hipcc_found_options FALSE) - set(_hcc_found_options TRUE) - set(_nvcc_found_options FALSE) - elseif("x${arg}" STREQUAL "xNVCC_OPTIONS") - set(_hipcc_found_options FALSE) - set(_hcc_found_options FALSE) - set(_nvcc_found_options TRUE) - elseif( - "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR - "x${arg}" STREQUAL "xSTATIC" OR - "x${arg}" STREQUAL "xSHARED" OR - "x${arg}" STREQUAL "xMODULE" - ) - list(APPEND ${_cmake_options} ${arg}) - else() - if(_hipcc_found_options) - list(APPEND ${_hipcc_options} ${arg}) - elseif(_hcc_found_options) - list(APPEND ${_hcc_options} ${arg}) - elseif(_nvcc_found_options) - list(APPEND ${_nvcc_options} ${arg}) - else() - # Assume this is a file - list(APPEND ${_sources} ${arg}) - endif() - endif() - endforeach() -endmacro() - -############################################################################### -# MACRO: Add include directories to pass to the hipcc command -############################################################################### -set(HIP_HIPCC_INCLUDE_ARGS_USER "") -macro(HIP_INCLUDE_DIRECTORIES) - foreach(dir ${ARGN}) - list(APPEND HIP_HIPCC_INCLUDE_ARGS_USER $<$:-I${dir}>) - endforeach() -endmacro() - -############################################################################### -# FUNCTION: Helper to avoid clashes of files with the same basename but different paths -############################################################################### -function(HIP_COMPUTE_BUILD_PATH path build_path) - # Convert to cmake style paths - file(TO_CMAKE_PATH "${path}" bpath) - if(IS_ABSOLUTE "${bpath}") - string(FIND "${bpath}" "${CMAKE_CURRENT_BINARY_DIR}" _binary_dir_pos) - if(_binary_dir_pos EQUAL 0) - file(RELATIVE_PATH bpath "${CMAKE_CURRENT_BINARY_DIR}" "${bpath}") - else() - file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}") - endif() - endif() - - # Remove leading / - string(REGEX REPLACE "^[/]+" "" bpath "${bpath}") - # Avoid absolute paths by removing ':' - string(REPLACE ":" "_" bpath "${bpath}") - # Avoid relative paths that go up the tree - string(REPLACE "../" "__/" bpath "${bpath}") - # Avoid spaces - string(REPLACE " " "_" bpath "${bpath}") - # Strip off the filename - get_filename_component(bpath "${bpath}" PATH) - - set(${build_path} "${bpath}" PARENT_SCOPE) -endfunction() - -############################################################################### -# MACRO: Parse OPTIONS from ARGN & set variables prefixed by _option_prefix -############################################################################### -macro(HIP_PARSE_HIPCC_OPTIONS _option_prefix) - set(_hip_found_config) - foreach(arg ${ARGN}) - # Determine if we are dealing with a per-configuration flag - foreach(config ${_hip_configuration_types}) - string(TOUPPER ${config} config_upper) - if(arg STREQUAL "${config_upper}") - set(_hip_found_config _${arg}) - # Clear arg to prevent it from being processed anymore - set(arg) - endif() - endforeach() - if(arg) - list(APPEND ${_option_prefix}${_hip_found_config} "${arg}") - endif() - endforeach() -endmacro() - -############################################################################### -# MACRO: Try and include dependency file if it exists -############################################################################### -macro(HIP_INCLUDE_HIPCC_DEPENDENCIES dependency_file) - set(HIP_HIPCC_DEPEND) - set(HIP_HIPCC_DEPEND_REGENERATE FALSE) - - # Create the dependency file if it doesn't exist - if(NOT EXISTS ${dependency_file}) - file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n") - endif() - # Include the dependency file - include(${dependency_file}) - - # Verify the existence of all the included files - if(HIP_HIPCC_DEPEND) - foreach(f ${HIP_HIPCC_DEPEND}) - if(NOT EXISTS ${f}) - # If they aren't there, regenerate the file again - set(HIP_HIPCC_DEPEND_REGENERATE TRUE) - endif() - endforeach() - else() - # No dependencies, so regenerate the file - set(HIP_HIPCC_DEPEND_REGENERATE TRUE) - endif() - - # Regenerate the dependency file if needed - if(HIP_HIPCC_DEPEND_REGENERATE) - set(HIP_HIPCC_DEPEND ${dependency_file}) - file(WRITE ${dependency_file} "# Generated by: FindHIP.cmake. Do not edit.\n") - endif() -endmacro() - -############################################################################### -# MACRO: Prepare cmake commands for the target -############################################################################### -macro(HIP_PREPARE_TARGET_COMMANDS _target _format _generated_files _source_files) - set(_hip_flags "") - string(TOUPPER "${CMAKE_BUILD_TYPE}" _hip_build_configuration) - if(HIP_HOST_COMPILATION_CPP) - set(HIP_C_OR_CXX CXX) - else() - set(HIP_C_OR_CXX C) - endif() - set(generated_extension ${CMAKE_${HIP_C_OR_CXX}_OUTPUT_EXTENSION}) - - # Initialize list of includes with those specified by the user. Append with - # ones specified to cmake directly. - set(HIP_HIPCC_INCLUDE_ARGS ${HIP_HIPCC_INCLUDE_ARGS_USER}) - - # Add the include directories - set(include_directories_generator "$") - list(APPEND HIP_HIPCC_INCLUDE_ARGS "$<$:-I$>") - - get_directory_property(_hip_include_directories INCLUDE_DIRECTORIES) - list(REMOVE_DUPLICATES _hip_include_directories) - if(_hip_include_directories) - foreach(dir ${_hip_include_directories}) - list(APPEND HIP_HIPCC_INCLUDE_ARGS $<$:-I${dir}>) - endforeach() - endif() - - HIP_GET_SOURCES_AND_OPTIONS(_hip_sources _hip_cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) - HIP_PARSE_HIPCC_OPTIONS(HIP_HIPCC_FLAGS ${_hipcc_options}) - HIP_PARSE_HIPCC_OPTIONS(HIP_HCC_FLAGS ${_hcc_options}) - HIP_PARSE_HIPCC_OPTIONS(HIP_NVCC_FLAGS ${_nvcc_options}) - - # Add the compile definitions - set(compile_definition_generator "$") - list(APPEND HIP_HIPCC_FLAGS "$<$:-D$>") - - # Check if we are building shared library. - set(_hip_build_shared_libs FALSE) - list(FIND _hip_cmake_options SHARED _hip_found_SHARED) - list(FIND _hip_cmake_options MODULE _hip_found_MODULE) - if(_hip_found_SHARED GREATER -1 OR _hip_found_MODULE GREATER -1) - set(_hip_build_shared_libs TRUE) - endif() - list(FIND _hip_cmake_options STATIC _hip_found_STATIC) - if(_hip_found_STATIC GREATER -1) - set(_hip_build_shared_libs FALSE) - endif() - - # If we are building a shared library, add extra flags to HIP_HIPCC_FLAGS - if(_hip_build_shared_libs) - list(APPEND HIP_HCC_FLAGS "-fPIC") - list(APPEND HIP_NVCC_FLAGS "--shared -Xcompiler '-fPIC'") - endif() - - # Set host compiler - set(HIP_HOST_COMPILER "${CMAKE_${HIP_C_OR_CXX}_COMPILER}") - - # Set compiler flags - set(_HIP_HOST_FLAGS "set(CMAKE_HOST_FLAGS ${CMAKE_${HIP_C_OR_CXX}_FLAGS})") - set(_HIP_HIPCC_FLAGS "set(HIP_HIPCC_FLAGS ${HIP_HIPCC_FLAGS})") - set(_HIP_HCC_FLAGS "set(HIP_HCC_FLAGS ${HIP_HCC_FLAGS})") - set(_HIP_NVCC_FLAGS "set(HIP_NVCC_FLAGS ${HIP_NVCC_FLAGS})") - foreach(config ${_hip_configuration_types}) - string(TOUPPER ${config} config_upper) - set(_HIP_HOST_FLAGS "${_HIP_HOST_FLAGS}\nset(CMAKE_HOST_FLAGS_${config_upper} ${CMAKE_${HIP_C_OR_CXX}_FLAGS_${config_upper}})") - set(_HIP_HIPCC_FLAGS "${_HIP_HIPCC_FLAGS}\nset(HIP_HIPCC_FLAGS_${config_upper} ${HIP_HIPCC_FLAGS_${config_upper}})") - set(_HIP_HCC_FLAGS "${_HIP_HCC_FLAGS}\nset(HIP_HCC_FLAGS_${config_upper} ${HIP_HCC_FLAGS_${config_upper}})") - set(_HIP_NVCC_FLAGS "${_HIP_NVCC_FLAGS}\nset(HIP_NVCC_FLAGS_${config_upper} ${HIP_NVCC_FLAGS_${config_upper}})") - endforeach() - - # Reset the output variable - set(_hip_generated_files "") - set(_hip_source_files "") - - # Iterate over all arguments and create custom commands for all source files - foreach(file ${ARGN}) - # Ignore any file marked as a HEADER_FILE_ONLY - get_source_file_property(_is_header ${file} HEADER_FILE_ONLY) - # Allow per source file overrides of the format. Also allows compiling non .cu files. - get_source_file_property(_hip_source_format ${file} HIP_SOURCE_PROPERTY_FORMAT) - if((${file} MATCHES "\\.cu$" OR _hip_source_format) AND NOT _is_header) - set(host_flag FALSE) - else() - set(host_flag TRUE) - endif() - - if(NOT host_flag) - # Determine output directory - HIP_COMPUTE_BUILD_PATH("${file}" hip_build_path) - set(hip_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_target}.dir/${hip_build_path}") - - get_filename_component(basename ${file} NAME) - set(generated_file_path "${hip_compile_output_dir}/${CMAKE_CFG_INTDIR}") - set(generated_file_basename "${_target}_generated_${basename}${generated_extension}") - - # Set file names - set(generated_file "${generated_file_path}/${generated_file_basename}") - set(cmake_dependency_file "${hip_compile_output_dir}/${generated_file_basename}.depend") - set(custom_target_script_pregen "${hip_compile_output_dir}/${generated_file_basename}.cmake.pre-gen") - set(custom_target_script "${hip_compile_output_dir}/${generated_file_basename}.cmake") - - # Set properties for object files - set_source_files_properties("${generated_file}" - PROPERTIES - EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked - ) - - # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path - get_filename_component(file_path "${file}" PATH) - if(IS_ABSOLUTE "${file_path}") - set(source_file "${file}") - else() - set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}") - endif() - - # Bring in the dependencies - HIP_INCLUDE_HIPCC_DEPENDENCIES(${cmake_dependency_file}) - - # Configure the build script - configure_file("${HIP_run_hipcc}" "${custom_target_script_pregen}" @ONLY) - file(GENERATE - OUTPUT "${custom_target_script}" - INPUT "${custom_target_script_pregen}" - ) - set(main_dep DEPENDS ${source_file}) - if(CMAKE_GENERATOR MATCHES "Makefiles") - set(verbose_output "$(VERBOSE)") - elseif(HIP_VERBOSE_BUILD) - set(verbose_output ON) - else() - set(verbose_output OFF) - endif() - - # Create up the comment string - file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}") - set(hip_build_comment_string "Building HIPCC object ${generated_file_relative_path}") - - # Build the generated file and dependency file - add_custom_command( - OUTPUT ${generated_file} - # These output files depend on the source_file and the contents of cmake_dependency_file - ${main_dep} - DEPENDS ${HIP_HIPCC_DEPEND} - DEPENDS ${custom_target_script} - # Make sure the output directory exists before trying to write to it. - COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}" - COMMAND ${CMAKE_COMMAND} ARGS - -D verbose:BOOL=${verbose_output} - -D build_configuration:STRING=${_hip_build_configuration} - -D "generated_file:STRING=${generated_file}" - -P "${custom_target_script}" - WORKING_DIRECTORY "${hip_compile_output_dir}" - COMMENT "${hip_build_comment_string}" - ) - - # Make sure the build system knows the file is generated - set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE) - list(APPEND _hip_generated_files ${generated_file}) - list(APPEND _hip_source_files ${file}) - endif() - endforeach() - - # Set the return parameter - set(${_generated_files} ${_hip_generated_files}) - set(${_source_files} ${_hip_source_files}) -endmacro() - -############################################################################### -# HIP_ADD_EXECUTABLE -############################################################################### -macro(HIP_ADD_EXECUTABLE hip_target) - # Separate the sources from the options - HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) - HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) - if(_source_files) - list(REMOVE_ITEM _sources ${_source_files}) - endif() - if("x${HCC_HOME}" STREQUAL "x") - set(HCC_HOME "/opt/rocm/hcc") - endif() - set(CMAKE_HIP_LINK_EXECUTABLE "${HIP_HIPCC_CMAKE_LINKER_HELPER} ${HCC_HOME} -o ") - add_executable(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) - set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE HIP) -endmacro() - -############################################################################### -# HIP_ADD_LIBRARY -############################################################################### -macro(HIP_ADD_LIBRARY hip_target) - # Separate the sources from the options - HIP_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _hipcc_options _hcc_options _nvcc_options ${ARGN}) - HIP_PREPARE_TARGET_COMMANDS(${hip_target} OBJ _generated_files _source_files ${_sources} ${_cmake_options} HIPCC_OPTIONS ${_hipcc_options} HCC_OPTIONS ${_hcc_options} NVCC_OPTIONS ${_nvcc_options}) - if(_source_files) - list(REMOVE_ITEM _sources ${_source_files}) - endif() - add_library(${hip_target} ${_cmake_options} ${_generated_files} ${_sources}) - set_target_properties(${hip_target} PROPERTIES LINKER_LANGUAGE ${HIP_C_OR_CXX}) -endmacro() - -# vim: ts=4:sw=4:expandtab:smartindent diff --git a/thirdParty/alpaka/cmake/modules/FindHIP/run_hipcc.cmake b/thirdParty/alpaka/cmake/modules/FindHIP/run_hipcc.cmake deleted file mode 100644 index c9582bdbd4..0000000000 --- a/thirdParty/alpaka/cmake/modules/FindHIP/run_hipcc.cmake +++ /dev/null @@ -1,190 +0,0 @@ -# /* -# Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# */ - -############################################################################### -# Runs commands using HIPCC -############################################################################### - -############################################################################### -# This file runs the hipcc commands to produce the desired output file -# along with the dependency file needed by CMake to compute dependencies. -# -# Input variables: -# -# verbose:BOOL=<> OFF: Be as quiet as possible (default) -# ON : Describe each step -# build_configuration:STRING=<> Build configuration. Defaults to Debug. -# generated_file:STRING=<> File to generate. Mandatory argument. - -if(NOT build_configuration) - set(build_configuration Debug) -endif() -if(NOT generated_file) - message(FATAL_ERROR "You must specify generated_file on the command line") -endif() - -# Set these up as variables to make reading the generated file easier -set(HIP_HIPCC_EXECUTABLE "@HIP_HIPCC_EXECUTABLE@") # path -set(HIP_HIPCONFIG_EXECUTABLE "@HIP_HIPCONFIG_EXECUTABLE@") #path -set(HIP_HOST_COMPILER "@HIP_HOST_COMPILER@") # path -set(CMAKE_COMMAND "@CMAKE_COMMAND@") # path -set(HIP_run_make2cmake "@HIP_run_make2cmake@") # path -set(HCC_HOME "@HCC_HOME@") #path - -@HIP_HOST_FLAGS@ -@_HIP_HIPCC_FLAGS@ -@_HIP_HCC_FLAGS@ -@_HIP_NVCC_FLAGS@ -set(HIP_HIPCC_INCLUDE_ARGS "@HIP_HIPCC_INCLUDE_ARGS@") # list (needs to be in quotes to handle spaces properly) - -set(cmake_dependency_file "@cmake_dependency_file@") # path -set(source_file "@source_file@") # path -set(host_flag "@host_flag@") # bool - -# Determine compiler and compiler flags -execute_process(COMMAND ${HIP_HIPCONFIG_EXECUTABLE} --platform OUTPUT_VARIABLE HIP_PLATFORM OUTPUT_STRIP_TRAILING_WHITESPACE) -if(NOT host_flag) - set(__CC ${HIP_HIPCC_EXECUTABLE}) - if(HIP_PLATFORM STREQUAL "hcc") - if(NOT "x${HCC_HOME}" STREQUAL "x") - set(ENV{HCC_HOME} ${HCC_HOME}) - endif() - set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_HCC_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_HCC_FLAGS_${build_configuration}}) - else() - set(__CC_FLAGS ${HIP_HIPCC_FLAGS} ${HIP_NVCC_FLAGS} ${HIP_HIPCC_FLAGS_${build_configuration}} ${HIP_NVCC_FLAGS_${build_configuration}}) - endif() -else() - set(__CC ${HIP_HOST_COMPILER}) - set(__CC_FLAGS ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}}) -endif() -set(__CC_INCLUDES ${HIP_HIPCC_INCLUDE_ARGS}) - -# hip_execute_process - Executes a command with optional command echo and status message. -# status - Status message to print if verbose is true -# command - COMMAND argument from the usual execute_process argument structure -# ARGN - Remaining arguments are the command with arguments -# HIP_result - Return value from running the command -macro(hip_execute_process status command) - set(_command ${command}) - if(NOT "x${_command}" STREQUAL "xCOMMAND") - message(FATAL_ERROR "Malformed call to hip_execute_process. Missing COMMAND as second argument. (command = ${command})") - endif() - if(verbose) - execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status}) - # Build command string to print - set(hip_execute_process_string) - foreach(arg ${ARGN}) - # Escape quotes if any - string(REPLACE "\"" "\\\"" arg ${arg}) - # Surround args with spaces with quotes - if(arg MATCHES " ") - list(APPEND hip_execute_process_string "\"${arg}\"") - else() - list(APPEND hip_execute_process_string ${arg}) - endif() - endforeach() - # Echo the command - execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${hip_execute_process_string}) - endif() - # Run the command - execute_process(COMMAND ${ARGN} RESULT_VARIABLE HIP_result) -endmacro() - -# Delete the target file -hip_execute_process( - "Removing ${generated_file}" - COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" - ) - -# Generate the dependency file -hip_execute_process( - "Generating dependency file: ${cmake_dependency_file}.pre" - COMMAND "${__CC}" - -M - "${source_file}" - -o "${cmake_dependency_file}.pre" - ${__CC_FLAGS} - ${__CC_INCLUDES} - ) - -if(HIP_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Generate the cmake readable dependency file to a temp file -hip_execute_process( - "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp" - COMMAND "${CMAKE_COMMAND}" - -D "input_file:FILEPATH=${cmake_dependency_file}.pre" - -D "output_file:FILEPATH=${cmake_dependency_file}.tmp" - -D "verbose=${verbose}" - -P "${HIP_run_make2cmake}" - ) - -if(HIP_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Copy the file if it is different -hip_execute_process( - "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}" - COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}" - ) - -if(HIP_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Delete the temporary file -hip_execute_process( - "Removing ${cmake_dependency_file}.tmp and ${cmake_dependency_file}.pre" - COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${cmake_dependency_file}.pre" - ) - -if(HIP_result) - message(FATAL_ERROR "Error generating ${generated_file}") -endif() - -# Generate the output file -hip_execute_process( - "Generating ${generated_file}" - COMMAND "${__CC}" - -c - "${source_file}" - -o "${generated_file}" - ${__CC_FLAGS} - ${__CC_INCLUDES} - ) - -if(HIP_result) - # Make sure that we delete the output file - hip_execute_process( - "Removing ${generated_file}" - COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}" - ) - message(FATAL_ERROR "Error generating file ${generated_file}") -else() - if(verbose) - message("Generated ${generated_file} successfully.") - endif() -endif() -# vim: ts=4:sw=4:expandtab:smartindent diff --git a/thirdParty/alpaka/cmake/modules/FindHIP/run_make2cmake.cmake b/thirdParty/alpaka/cmake/modules/FindHIP/run_make2cmake.cmake deleted file mode 100644 index 48a51fa039..0000000000 --- a/thirdParty/alpaka/cmake/modules/FindHIP/run_make2cmake.cmake +++ /dev/null @@ -1,72 +0,0 @@ -# /* -# Copyright (c) 2015-2016 Advanced Micro Devices, Inc. All rights reserved. - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# */ - -############################################################################### -# Computes dependencies using HIPCC -############################################################################### - -############################################################################### -# This file converts dependency files generated using hipcc to a format that -# cmake can understand. - -# Input variables: -# -# input_file:STRING=<> Dependency file to parse. Required argument -# output_file:STRING=<> Output file to generate. Required argument - -if(NOT input_file OR NOT output_file) - message(FATAL_ERROR "You must specify input_file and output_file on the command line") -endif() - -file(READ ${input_file} depend_text) - -if (NOT "${depend_text}" STREQUAL "") - string(REPLACE " /" "\n/" depend_text ${depend_text}) - string(REGEX REPLACE "^.*:" "" depend_text ${depend_text}) - string(REGEX REPLACE "[ \\\\]*\n" ";" depend_text ${depend_text}) - - set(dependency_list "") - - foreach(file ${depend_text}) - string(REGEX REPLACE "^ +" "" file ${file}) - if(NOT EXISTS "${file}") - message(WARNING " Removing non-existent dependency file: ${file}") - set(file "") - endif() - - if(NOT IS_DIRECTORY "${file}") - get_filename_component(file_absolute "${file}" ABSOLUTE) - list(APPEND dependency_list "${file_absolute}") - endif() - endforeach() -endif() - -# Remove the duplicate entries and sort them. -list(REMOVE_DUPLICATES dependency_list) -list(SORT dependency_list) - -foreach(file ${dependency_list}) - set(hip_hipcc_depend "${hip_hipcc_depend} \"${file}\"\n") -endforeach() - -file(WRITE ${output_file} "# Generated by: FindHIP.cmake. Do not edit.\nSET(HIP_HIPCC_DEPEND\n ${hip_hipcc_depend})\n\n") -# vim: ts=4:sw=4:expandtab:smartindent diff --git a/thirdParty/alpaka/cmake/modules/FindTBB.cmake b/thirdParty/alpaka/cmake/modules/FindTBB.cmake deleted file mode 100644 index 4cfabee852..0000000000 --- a/thirdParty/alpaka/cmake/modules/FindTBB.cmake +++ /dev/null @@ -1,246 +0,0 @@ -# The MIT License (MIT) -# -# Copyright (c) 2015 Justus Calvin -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -# -# FindTBB -# ------- -# -# Find TBB include directories and libraries. -# -# Usage: -# -# find_package(TBB [major[.minor]] [EXACT] -# [QUIET] [REQUIRED] -# [[COMPONENTS] [components...]] -# [OPTIONAL_COMPONENTS components...]) -# -# where the allowed components are tbbmalloc and tbb_preview. Users may modify -# the behavior of this module with the following variables: -# -# * TBB_ROOT_DIR - The base directory the of TBB installation. -# * TBB_INCLUDE_DIR - The directory that contains the TBB headers files. -# * TBB_LIBRARY - The directory that contains the TBB library files. -# * TBB__LIBRARY - The path of the TBB the corresponding TBB library. -# These libraries, if specified, override the -# corresponding library search results, where -# may be tbb, tbb_debug, tbbmalloc, tbbmalloc_debug, -# tbb_preview, or tbb_preview_debug. -# * TBB_USE_DEBUG_BUILD - The debug version of tbb libraries, if present, will -# be used instead of the release version. -# -# Users may modify the behavior of this module with the following environment -# variables: -# -# * TBB_INSTALL_DIR -# * TBBROOT -# * LIBRARY_PATH -# -# This module will set the following variables: -# -# * TBB_FOUND - Set to false, or undefined, if we haven’t found, or -# don’t want to use TBB. -# * TBB__FOUND - If False, optional part of TBB sytem is -# not available. -# * TBB_VERSION - The full version string -# * TBB_VERSION_MAJOR - The major version -# * TBB_VERSION_MINOR - The minor version -# * TBB_INTERFACE_VERSION - The interface version number defined in -# tbb/tbb_stddef.h. -# * TBB__LIBRARY_RELEASE - The path of the TBB release version of -# , where may be tbb, tbb_debug, -# tbbmalloc, tbbmalloc_debug, tbb_preview, or -# tbb_preview_debug. -# * TBB__LIBRARY_DEGUG - The path of the TBB release version of -# , where may be tbb, tbb_debug, -# tbbmalloc, tbbmalloc_debug, tbb_preview, or -# tbb_preview_debug. -# -# The following varibles should be used to build and link with TBB: -# -# * TBB_INCLUDE_DIRS - The include directory for TBB. -# * TBB_LIBRARIES - The libraries to link against to use TBB. -# * TBB_DEFINITIONS - Definitions to use when compiling code that uses TBB. - -include(FindPackageHandleStandardArgs) - -if(NOT TBB_FOUND) - - ################################## - # Check the build type - ################################## - - if(NOT DEFINED TBB_USE_DEBUG_BUILD) - if(CMAKE_BUILD_TYPE STREQUAL "Debug" OR CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") - message(STATUS "Set TBB_USE_DEBUG_BUILD to TRUE because CMAKE_BUILD_TYPE is one of the debug configurations.") - set(TBB_USE_DEBUG_BUILD TRUE) - else() - set(TBB_USE_DEBUG_BUILD FALSE) - endif() - endif() - - ################################## - # Set the TBB search directories - ################################## - - # Define search paths based on user input and environment variables - set(TBB_SEARCH_DIR ${TBB_ROOT_DIR} $ENV{TBB_INSTALL_DIR} $ENV{TBBROOT}) - - # Define the search directories based on the current platform - if(CMAKE_SYSTEM_NAME STREQUAL "Windows") - set(TBB_DEFAULT_SEARCH_DIR "C:/Program Files/Intel/TBB" - "C:/Program Files (x86)/Intel/TBB") - - # Set the target architecture - if(CMAKE_SIZEOF_VOID_P EQUAL 8) - set(TBB_ARCHITECTURE "intel64") - else() - set(TBB_ARCHITECTURE "ia32") - endif() - - # Set the TBB search library path search suffix based on the version of VC - if(WINDOWS_STORE) - set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc11_ui") - elseif(MSVC14) - set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc14") - elseif(MSVC12) - set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc12") - elseif(MSVC11) - set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc11") - elseif(MSVC10) - set(TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc10") - endif() - - # Add the library path search suffix for the VC independent version of TBB - list(APPEND TBB_LIB_PATH_SUFFIX "lib/${TBB_ARCHITECTURE}/vc_mt") - - elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin") - # OS X - set(TBB_DEFAULT_SEARCH_DIR "/opt/intel/tbb") - - # TODO: Check to see which C++ library is being used by the compiler. - if(NOT ${CMAKE_SYSTEM_VERSION} VERSION_LESS 13.0) - # The default C++ library on OS X 10.9 and later is libc++ - set(TBB_LIB_PATH_SUFFIX "lib/libc++") - else() - set(TBB_LIB_PATH_SUFFIX "lib") - endif() - elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux") - # Linux - set(TBB_DEFAULT_SEARCH_DIR "/opt/intel/tbb") - - # TODO: Check compiler version to see the suffix should be /gcc4.1 or - # /gcc4.1. For now, assume that the compiler is more recent than - # gcc 4.4.x or later. - if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") - set(TBB_LIB_PATH_SUFFIX "lib/intel64/gcc4.4") - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$") - set(TBB_LIB_PATH_SUFFIX "lib/ia32/gcc4.4") - endif() - endif() - - ################################## - # Find the TBB include dir - ################################## - - find_path(TBB_INCLUDE_DIRS tbb/tbb.h - HINTS ${TBB_INCLUDE_DIR} ${TBB_SEARCH_DIR} - PATHS ${TBB_DEFAULT_SEARCH_DIR} - PATH_SUFFIXES include) - - ################################## - # Find TBB components - ################################## - - # Find each component - foreach(_comp tbb_preview tbbmalloc tbb) - # Search for the libraries - find_library(TBB_${_comp}_LIBRARY_RELEASE ${_comp} - HINTS ${TBB_LIBRARY} ${TBB_SEARCH_DIR} - PATHS ${TBB_DEFAULT_SEARCH_DIR} - PATH_SUFFIXES ${TBB_LIB_PATH_SUFFIX}) - - find_library(TBB_${_comp}_LIBRARY_DEBUG ${_comp}_debug - HINTS ${TBB_LIBRARY} ${TBB_SEARCH_DIR} - PATHS ${TBB_DEFAULT_SEARCH_DIR} ENV LIBRARY_PATH - PATH_SUFFIXES ${TBB_LIB_PATH_SUFFIX}) - - # Set the library to be used for the component - if(NOT TBB_${_comp}_LIBRARY) - if(TBB_USE_DEBUG_BUILD AND TBB_${_comp}_LIBRARY_DEBUG) - set(TBB_${_comp}_LIBRARY "${TBB_${_comp}_LIBRARY_DEBUG}") - elseif(TBB_${_comp}_LIBRARY_RELEASE) - set(TBB_${_comp}_LIBRARY "${TBB_${_comp}_LIBRARY_RELEASE}") - elseif(TBB_${_comp}_LIBRARY_DEBUG) - set(TBB_${_comp}_LIBRARY "${TBB_${_comp}_LIBRARY_DEBUG}") - message(STATUS "Using the debug library of '${_comp}' because the release library could not be found!") - endif() - endif() - - # Set the TBB library list and component found variables - if(TBB_${_comp}_LIBRARY) - list(APPEND TBB_LIBRARIES "${TBB_${_comp}_LIBRARY}") - set(TBB_${_comp}_FOUND TRUE) - else() - set(TBB_${_comp}_FOUND FALSE) - endif() - - mark_as_advanced(TBB_${_comp}_LIBRARY_RELEASE) - mark_as_advanced(TBB_${_comp}_LIBRARY_DEBUG) - mark_as_advanced(TBB_${_comp}_LIBRARY) - - endforeach() - - ################################## - # Set compile flags - ################################## - - if(TBB_tbb_LIBRARY MATCHES "debug") - set(TBB_DEFINITIONS "-DTBB_USE_DEBUG=1") - endif() - - ################################## - # Set version strings - ################################## - - if(TBB_INCLUDE_DIRS) - file(READ "${TBB_INCLUDE_DIRS}/tbb/tbb_stddef.h" _tbb_version_file) - string(REGEX REPLACE ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1" - TBB_VERSION_MAJOR "${_tbb_version_file}") - string(REGEX REPLACE ".*#define TBB_VERSION_MINOR ([0-9]+).*" "\\1" - TBB_VERSION_MINOR "${_tbb_version_file}") - string(REGEX REPLACE ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" - TBB_INTERFACE_VERSION "${_tbb_version_file}") - set(TBB_VERSION "${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR}") - endif() - - find_package_handle_standard_args(TBB - REQUIRED_VARS TBB_INCLUDE_DIRS TBB_LIBRARIES - HANDLE_COMPONENTS - VERSION_VAR TBB_VERSION) - - mark_as_advanced(TBB_INCLUDE_DIRS TBB_LIBRARIES) - - unset(TBB_ARCHITECTURE) - unset(TBB_LIB_PATH_SUFFIX) - unset(TBB_DEFAULT_SEARCH_DIR) - -endif() diff --git a/thirdParty/alpaka/doc/doxygen/Doxyfile b/thirdParty/alpaka/doc/doxygen/Doxyfile deleted file mode 100644 index 1f7127b6a2..0000000000 --- a/thirdParty/alpaka/doc/doxygen/Doxyfile +++ /dev/null @@ -1,2501 +0,0 @@ -# Doxyfile 1.8.13 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project. -# -# All text after a double hash (##) is considered a comment and is placed in -# front of the TAG it is preceding. -# -# All text after a single hash (#) is considered a comment and will be ignored. -# The format is: -# TAG = value [value, ...] -# For lists, items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (\" \"). - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all text -# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv -# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv -# for the list of possible encodings. -# The default value is: UTF-8. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by -# double-quotes, unless you are using Doxywizard) that should identify the -# project for which the documentation is generated. This name is used in the -# title of most generated pages and in a few other places. -# The default value is: My Project. - -PROJECT_NAME = alpaka - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. This -# could be handy for archiving the generated documentation or if some version -# control system is used. - -PROJECT_NUMBER = - -# Using the PROJECT_BRIEF tag one can provide an optional one line description -# for a project that appears at the top of each page and should give viewer a -# quick idea about the purpose of the project. Keep the description short. - -PROJECT_BRIEF = "Abstraction Library for Parallel Kernel Acceleration" - -# With the PROJECT_LOGO tag one can specify a logo or an icon that is included -# in the documentation. The maximum height of the logo should not exceed 55 -# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy -# the logo to the output directory. - -PROJECT_LOGO = alpaka_doxygen.png - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path -# into which the generated documentation will be written. If a relative path is -# entered, it will be relative to the location where doxygen was started. If -# left blank the current directory will be used. - -OUTPUT_DIRECTORY = - -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this -# option can be useful when feeding doxygen a huge amount of source files, where -# putting all generated files in the same directory would otherwise causes -# performance problems for the file system. -# The default value is: NO. - -CREATE_SUBDIRS = NO - -# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII -# characters to appear in the names of generated files. If set to NO, non-ASCII -# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode -# U+3044. -# The default value is: NO. - -ALLOW_UNICODE_NAMES = YES - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. -# The default value is: English. - -OUTPUT_LANGUAGE = English - -# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member -# descriptions after the members that are listed in the file and class -# documentation (similar to Javadoc). Set to NO to disable this. -# The default value is: YES. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief -# description of a member or function before the detailed description -# -# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. -# The default value is: YES. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator that is -# used to form the text in various listings. Each string in this list, if found -# as the leading text of the brief description, will be stripped from the text -# and the result, after processing the whole list, is used as the annotated -# text. Otherwise, the brief description is used as-is. If left blank, the -# following values are used ($name is automatically replaced with the name of -# the entity):The $name class, The $name widget, The $name file, is, provides, -# specifies, contains, represents, a, an and the. - -ABBREVIATE_BRIEF = "The $name class" \ - "The $name widget" \ - "The $name file" \ - is \ - provides \ - specifies \ - contains \ - represents \ - a \ - an \ - the - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# doxygen will generate a detailed section even if there is only a brief -# description. -# The default value is: NO. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. -# The default value is: NO. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path -# before files name in the file list and in the header files. If set to NO the -# shortest path that makes the file name unique will be used -# The default value is: YES. - -FULL_PATH_NAMES = YES - -# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. -# Stripping is only done if one of the specified strings matches the left-hand -# part of the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the path to -# strip. -# -# Note that you can specify absolute paths here, but also relative paths, which -# will be relative from the directory where doxygen is started. -# This tag requires that the tag FULL_PATH_NAMES is set to YES. - -STRIP_FROM_PATH = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the -# path mentioned in the documentation of a class, which tells the reader which -# header file to include in order to use a class. If left blank only the name of -# the header file containing the class definition is used. Otherwise one should -# specify the list of include paths that are normally passed to the compiler -# using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but -# less readable) file names. This can be useful is your file systems doesn't -# support long names like on DOS, Mac, or CD-ROM. -# The default value is: NO. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the -# first line (until the first dot) of a Javadoc-style comment as the brief -# description. If set to NO, the Javadoc-style will behave just like regular Qt- -# style comments (thus requiring an explicit @brief command for a brief -# description.) -# The default value is: NO. - -JAVADOC_AUTOBRIEF = NO - -# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first -# line (until the first dot) of a Qt-style comment as the brief description. If -# set to NO, the Qt-style will behave just like regular Qt-style comments (thus -# requiring an explicit \brief command for a brief description.) -# The default value is: NO. - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a -# multi-line C++ special comment block (i.e. a block of //! or /// comments) as -# a brief description. This used to be the default behavior. The new default is -# to treat a multi-line C++ comment block as a detailed description. Set this -# tag to YES if you prefer the old behavior instead. -# -# Note that setting this tag to YES also means that rational rose comments are -# not recognized any more. -# The default value is: NO. - -MULTILINE_CPP_IS_BRIEF = YES - -# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the -# documentation from any documented member that it re-implements. -# The default value is: YES. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new -# page for each member. If set to NO, the documentation of a member will be part -# of the file/class/namespace that contains it. -# The default value is: NO. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen -# uses this value to replace tabs by spaces in code fragments. -# Minimum value: 1, maximum value: 16, default value: 4. - -TAB_SIZE = 4 - -# This tag can be used to specify a number of aliases that act as commands in -# the documentation. An alias has the form: -# name=value -# For example adding -# "sideeffect=@par Side Effects:\n" -# will allow you to put the command \sideeffect (or @sideeffect) in the -# documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines. - -ALIASES = - -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources -# only. Doxygen will then generate output that is more tailored for C. For -# instance, some of the names that are used will be different. The list of all -# members will be omitted, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or -# Python sources only. Doxygen will then generate output that is more tailored -# for that language. For instance, namespaces will be presented as packages, -# qualified scopes will look different, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources. Doxygen will then generate output that is tailored for Fortran. -# The default value is: NO. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for VHDL. -# The default value is: NO. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Doxygen selects the parser to use depending on the extension of the files it -# parses. With this tag you can assign which parser to use for a given -# extension. Doxygen has a built-in mapping, but you can override or extend it -# using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, Javascript, -# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran: -# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran: -# Fortran. In the later case the parser tries to guess whether the code is fixed -# or free formatted code, this is the default for Fortran type files), VHDL. For -# instance to make doxygen treat .inc files as Fortran files (default is PHP), -# and .f files as C (default is Fortran), use: inc=Fortran f=C. -# -# Note: For files without extension you can use no_extension as a placeholder. -# -# Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. - -EXTENSION_MAPPING = - -# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments -# according to the Markdown format, which allows for more readable -# documentation. See http://daringfireball.net/projects/markdown/ for details. -# The output of markdown processing is further processed by doxygen, so you can -# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in -# case of backward compatibilities issues. -# The default value is: YES. - -MARKDOWN_SUPPORT = YES - -# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up -# to that level are automatically included in the table of contents, even if -# they do not have an id attribute. -# Note: This feature currently applies only to Markdown headings. -# Minimum value: 0, maximum value: 99, default value: 0. -# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. - -TOC_INCLUDE_HEADINGS = 0 - -# When enabled doxygen tries to link words that correspond to documented -# classes, or namespaces to their corresponding documentation. Such a link can -# be prevented in individual cases by putting a % sign in front of the word or -# globally by setting AUTOLINK_SUPPORT to NO. -# The default value is: YES. - -AUTOLINK_SUPPORT = YES - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should set this -# tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); -# versus func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. -# The default value is: NO. - -BUILTIN_STL_SUPPORT = YES - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. -# The default value is: NO. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen -# will parse them like normal C++ but will assume all classes use public instead -# of private inheritance when no explicit protection keyword is present. -# The default value is: NO. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate -# getter and setter methods for a property. Setting this option to YES will make -# doxygen to replace the get and set methods by a property in the documentation. -# This will only work if the methods are indeed getting or setting a simple -# type. If this is not the case, or you want to show the methods anyway, you -# should set this option to NO. -# The default value is: YES. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. -# The default value is: NO. - -DISTRIBUTE_GROUP_DOC = NO - -# If one adds a struct or class to a group and this option is enabled, then also -# any nested class or struct is added to the same group. By default this option -# is disabled and one has to add nested compounds explicitly via \ingroup. -# The default value is: NO. - -GROUP_NESTED_COMPOUNDS = NO - -# Set the SUBGROUPING tag to YES to allow class member groups of the same type -# (for instance a group of public functions) to be put as a subgroup of that -# type (e.g. under the Public Functions section). Set it to NO to prevent -# subgrouping. Alternatively, this can be done per class using the -# \nosubgrouping command. -# The default value is: YES. - -SUBGROUPING = YES - -# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions -# are shown inside the group in which they are included (e.g. using \ingroup) -# instead of on a separate page (for HTML and Man pages) or section (for LaTeX -# and RTF). -# -# Note that this feature does not work in combination with -# SEPARATE_MEMBER_PAGES. -# The default value is: NO. - -INLINE_GROUPED_CLASSES = NO - -# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions -# with only public data fields or simple typedef fields will be shown inline in -# the documentation of the scope in which they are defined (i.e. file, -# namespace, or group documentation), provided this scope is documented. If set -# to NO, structs, classes, and unions are shown on a separate page (for HTML and -# Man pages) or section (for LaTeX and RTF). -# The default value is: NO. - -INLINE_SIMPLE_STRUCTS = NO - -# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or -# enum is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically be -# useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. -# The default value is: NO. - -TYPEDEF_HIDES_STRUCT = NO - -# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This -# cache is used to resolve symbols given their name and scope. Since this can be -# an expensive process and often the same symbol appears multiple times in the -# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small -# doxygen will become slower. If the cache is too large, memory is wasted. The -# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range -# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 -# symbols. At the end of a run doxygen will report the cache usage and suggest -# the optimal cache size from a speed point of view. -# Minimum value: 0, maximum value: 9, default value: 0. - -LOOKUP_CACHE_SIZE = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in -# documentation are documented, even if no documentation was available. Private -# class members and static file members will be hidden unless the -# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. -# Note: This will also disable the warnings about undocumented members that are -# normally produced when WARNINGS is set to YES. -# The default value is: NO. - -EXTRACT_ALL = YES - -# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will -# be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIVATE = NO - -# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal -# scope will be included in the documentation. -# The default value is: NO. - -EXTRACT_PACKAGE = NO - -# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be -# included in the documentation. -# The default value is: NO. - -EXTRACT_STATIC = YES - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined -# locally in source files will be included in the documentation. If set to NO, -# only classes defined in header files are included. Does not have any effect -# for Java sources. -# The default value is: YES. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. If set to YES, local methods, -# which are defined in the implementation section but not in the interface are -# included in the documentation. If set to NO, only methods in the interface are -# included. -# The default value is: NO. - -EXTRACT_LOCAL_METHODS = YES - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base name of -# the file that contains the anonymous namespace. By default anonymous namespace -# are hidden. -# The default value is: NO. - -EXTRACT_ANON_NSPACES = YES - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all -# undocumented members inside documented classes or files. If set to NO these -# members will be included in the various overviews, but no documentation -# section is generated. This option has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. If set -# to NO, these classes will be included in the various overviews. This option -# has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# (class|struct|union) declarations. If set to NO, these declarations will be -# included in the documentation. -# The default value is: NO. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any -# documentation blocks found inside the body of a function. If set to NO, these -# blocks will be appended to the function's detailed documentation block. -# The default value is: NO. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation that is typed after a -# \internal command is included. If the tag is set to NO then the documentation -# will be excluded. Set it to YES to include the internal documentation. -# The default value is: NO. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES, upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. -# The default value is: system dependent. - -CASE_SENSE_NAMES = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with -# their full class and namespace scopes in the documentation. If set to YES, the -# scope will be hidden. -# The default value is: NO. - -HIDE_SCOPE_NAMES = NO - -# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will -# append additional text to a page's title, such as Class Reference. If set to -# YES the compound reference will be hidden. -# The default value is: NO. - -HIDE_COMPOUND_REFERENCE= NO - -# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of -# the files that are included by a file in the documentation of that file. -# The default value is: YES. - -SHOW_INCLUDE_FILES = YES - -# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each -# grouped member an include statement to the documentation, telling the reader -# which file to include in order to use the member. -# The default value is: NO. - -SHOW_GROUPED_MEMB_INC = NO - -# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include -# files with double quotes in the documentation rather than with sharp brackets. -# The default value is: NO. - -FORCE_LOCAL_INCLUDES = NO - -# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the -# documentation for inline members. -# The default value is: YES. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the -# (detailed) documentation of file and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. -# The default value is: YES. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief -# descriptions of file, namespace and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. Note that -# this will also influence the order of the classes in the class list. -# The default value is: NO. - -SORT_BRIEF_DOCS = YES - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the -# (brief and detailed) documentation of class members so that constructors and -# destructors are listed first. If set to NO the constructors will appear in the -# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. -# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief -# member documentation. -# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting -# detailed member documentation. -# The default value is: NO. - -SORT_MEMBERS_CTORS_1ST = YES - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy -# of group names into alphabetical order. If set to NO the group names will -# appear in their defined order. -# The default value is: NO. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by -# fully-qualified names, including namespaces. If set to NO, the class list will -# be sorted only by class name, not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the alphabetical -# list. -# The default value is: NO. - -SORT_BY_SCOPE_NAME = YES - -# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper -# type resolution of all parameters of a function it will reject a match between -# the prototype and the implementation of a member function even if there is -# only one candidate or it is obvious which candidate to choose by doing a -# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still -# accept a match between prototype and implementation in such cases. -# The default value is: NO. - -STRICT_PROTO_MATCHING = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo -# list. This list is created by putting \todo commands in the documentation. -# The default value is: YES. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test -# list. This list is created by putting \test commands in the documentation. -# The default value is: YES. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug -# list. This list is created by putting \bug commands in the documentation. -# The default value is: YES. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) -# the deprecated list. This list is created by putting \deprecated commands in -# the documentation. -# The default value is: YES. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional documentation -# sections, marked by \if ... \endif and \cond -# ... \endcond blocks. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the -# initial value of a variable or macro / define can have for it to appear in the -# documentation. If the initializer consists of more lines than specified here -# it will be hidden. Use a value of 0 to hide initializers completely. The -# appearance of the value of individual variables and macros / defines can be -# controlled using \showinitializer or \hideinitializer command in the -# documentation regardless of this setting. -# Minimum value: 0, maximum value: 10000, default value: 30. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at -# the bottom of the documentation of classes and structs. If set to YES, the -# list will mention the files that were used to generate the documentation. -# The default value is: YES. - -SHOW_USED_FILES = YES - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This -# will remove the Files entry from the Quick Index and from the Folder Tree View -# (if specified). -# The default value is: YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces -# page. This will remove the Namespaces entry from the Quick Index and from the -# Folder Tree View (if specified). -# The default value is: YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the -# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided -# by doxygen. Whatever the program writes to standard output is used as the file -# version. For an example see the documentation. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed -# by doxygen. The layout file controls the global structure of the generated -# output files in an output format independent way. To create the layout file -# that represents doxygen's defaults, run doxygen with the -l option. You can -# optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. -# -# Note that if you run doxygen from a directory containing a file called -# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE -# tag is left empty. - -LAYOUT_FILE = - -# The CITE_BIB_FILES tag can be used to specify one or more bib files containing -# the reference definitions. This must be a list of .bib files. The .bib -# extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also http://en.wikipedia.org/wiki/BibTeX for more info. -# For LaTeX the style of the bibliography can be controlled using -# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the -# search path. See also \cite for info how to create references. - -CITE_BIB_FILES = - -#--------------------------------------------------------------------------- -# Configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated to -# standard output by doxygen. If QUIET is set to YES this implies that the -# messages are off. -# The default value is: NO. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES -# this implies that the warnings are on. -# -# Tip: Turn warnings on while writing the documentation. -# The default value is: YES. - -WARNINGS = YES - -# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate -# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag -# will automatically be disabled. -# The default value is: YES. - -WARN_IF_UNDOCUMENTED = YES - -# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. -# The default value is: YES. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that -# are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. -# The default value is: NO. - -WARN_NO_PARAMDOC = YES - -# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when -# a warning is encountered. -# The default value is: NO. - -WARN_AS_ERROR = NO - -# The WARN_FORMAT tag determines the format of the warning messages that doxygen -# can produce. The string should contain the $file, $line, and $text tags, which -# will be replaced by the file and line number from which the warning originated -# and the warning text. Optionally the format may contain $version, which will -# be replaced by the version of the file (if it could be obtained via -# FILE_VERSION_FILTER) -# The default value is: $file:$line: $text. - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning and error -# messages should be written. If left blank the output is written to standard -# error (stderr). - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# Configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag is used to specify the files and/or directories that contain -# documented source files. You may enter file names like myfile.cpp or -# directories like /usr/src/myproject. Separate the files or directories with -# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING -# Note: If this tag is empty the current directory is searched. - -INPUT = ../../include/ \ - ../../README.md - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses -# libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: http://www.gnu.org/software/libiconv) for the list of -# possible encodings. -# The default value is: UTF-8. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and -# *.h) to filter out the source-files in the directories. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# read by doxygen. -# -# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, -# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, -# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf. - -FILE_PATTERNS = *.c \ - *.cc \ - *.cxx \ - *.cpp \ - *.c++ \ - *.java \ - *.ii \ - *.ixx \ - *.ipp \ - *.i++ \ - *.inl \ - *.idl \ - *.ddl \ - *.odl \ - *.h \ - *.hh \ - *.hxx \ - *.hpp \ - *.h++ \ - *.cs \ - *.d \ - *.php \ - *.php4 \ - *.php5 \ - *.phtml \ - *.inc \ - *.m \ - *.markdown \ - *.md \ - *.mm \ - *.dox \ - *.py \ - *.f90 \ - *.f \ - *.for \ - *.tcl \ - *.vhd \ - *.vhdl \ - *.ucf \ - *.qsf \ - *.as \ - *.js \ - *.c \ - *.cu - -# The RECURSIVE tag can be used to specify whether or not subdirectories should -# be searched for input files as well. -# The default value is: NO. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should be -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. -# -# Note that relative paths are relative to the directory from which doxygen is -# run. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or -# directories that are symbolic links (a Unix file system feature) are excluded -# from the input. -# The default value is: NO. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories for example use the pattern */test/* - -EXCLUDE_PATTERNS = - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories use the pattern */test/* - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or directories -# that contain example code fragments that are included (see the \include -# command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank all -# files are included. - -EXAMPLE_PATTERNS = * - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude commands -# irrespective of the value of the RECURSIVE tag. -# The default value is: NO. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or directories -# that contain images that are to be included in the documentation (see the -# \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command: -# -# -# -# where is the value of the INPUT_FILTER tag, and is the -# name of an input file. Doxygen will then use the output that the filter -# program writes to standard output. If FILTER_PATTERNS is specified, this tag -# will be ignored. -# -# Note that the filter must not add or remove lines; it is applied before the -# code is scanned, but not when the output code is generated. If lines are added -# or removed, the anchors will not be placed correctly. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: pattern=filter -# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how -# filters are used. If the FILTER_PATTERNS tag is empty or if none of the -# patterns match the file name, INPUT_FILTER is applied. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will also be used to filter the input files that are used for -# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). -# The default value is: NO. - -FILTER_SOURCE_FILES = NO - -# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file -# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and -# it is also possible to disable source filtering for a specific pattern using -# *.ext= (so without naming a filter). -# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. - -FILTER_SOURCE_PATTERNS = - -# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that -# is part of the input, its contents will be placed on the main page -# (index.html). This can be useful if you have a project on for instance GitHub -# and want to reuse the introduction page also for the doxygen output. - -USE_MDFILE_AS_MAINPAGE = ../../README.md - -#--------------------------------------------------------------------------- -# Configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will be -# generated. Documented entities will be cross-referenced with these sources. -# -# Note: To get rid of all source code in the generated output, make sure that -# also VERBATIM_HEADERS is set to NO. -# The default value is: NO. - -SOURCE_BROWSER = YES - -# Setting the INLINE_SOURCES tag to YES will include the body of functions, -# classes and enums directly into the documentation. -# The default value is: NO. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any -# special comment blocks from generated source code fragments. Normal C, C++ and -# Fortran comments will always remain visible. -# The default value is: YES. - -STRIP_CODE_COMMENTS = NO - -# If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# function all documented functions referencing it will be listed. -# The default value is: NO. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES then for each documented function -# all documented entities called/used by that function will be listed. -# The default value is: NO. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set -# to YES then the hyperlinks from functions in REFERENCES_RELATION and -# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will -# link to the documentation. -# The default value is: YES. - -REFERENCES_LINK_SOURCE = YES - -# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the -# source code will show a tooltip with additional information such as prototype, -# brief description and links to the definition and documentation. Since this -# will make the HTML file larger and loading of large files a bit slower, you -# can opt to disable this feature. -# The default value is: YES. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -SOURCE_TOOLTIPS = YES - -# If the USE_HTAGS tag is set to YES then the references to source code will -# point to the HTML generated by the htags(1) tool instead of doxygen built-in -# source browser. The htags tool is part of GNU's global source tagging system -# (see http://www.gnu.org/software/global/global.html). You will need version -# 4.8.6 or higher. -# -# To use it do the following: -# - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the config file -# - Make sure the INPUT points to the root of the source tree -# - Run doxygen as normal -# -# Doxygen will invoke htags (and that will in turn invoke gtags), so these -# tools must be available from the command line (i.e. in the search path). -# -# The result: instead of the source browser generated by doxygen, the links to -# source code will now point to the output of htags. -# The default value is: NO. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a -# verbatim copy of the header file for each class for which an include is -# specified. Set to NO to disable this. -# See also: Section \class. -# The default value is: YES. - -VERBATIM_HEADERS = YES - -# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the -# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the -# cost of reduced performance. This can be particularly helpful with template -# rich C++ code for which doxygen's built-in parser lacks the necessary type -# information. -# Note: The availability of this option depends on whether or not doxygen was -# generated with the -Duse-libclang=ON option for CMake. -# The default value is: NO. - -CLANG_ASSISTED_PARSING = NO - -# If clang assisted parsing is enabled you can provide the compiler with command -# line options that you would normally use when invoking the compiler. Note that -# the include paths will already be set by doxygen for the files and directories -# specified with INPUT and INCLUDE_PATH. -# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. - -CLANG_OPTIONS = - -#--------------------------------------------------------------------------- -# Configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all -# compounds will be generated. Enable this if the project contains a lot of -# classes, structs, unions or interfaces. -# The default value is: YES. - -ALPHABETICAL_INDEX = NO - -# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in -# which the alphabetical index list will be split. -# Minimum value: 1, maximum value: 20, default value: 5. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output -# The default value is: YES. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each -# generated HTML page (for example: .htm, .php, .asp). -# The default value is: .html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a user-defined HTML header file for -# each generated HTML page. If the tag is left blank doxygen will generate a -# standard header. -# -# To get valid HTML the header file that includes any scripts and style sheets -# that doxygen needs, which is dependent on the configuration options used (e.g. -# the setting GENERATE_TREEVIEW). It is highly recommended to start with a -# default header using -# doxygen -w html new_header.html new_footer.html new_stylesheet.css -# YourConfigFile -# and then modify the file new_header.html. See also section "Doxygen usage" -# for information on how to generate the default header that doxygen normally -# uses. -# Note: The header is subject to change so you typically have to regenerate the -# default header when upgrading to a newer version of doxygen. For a description -# of the possible markers and block names see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each -# generated HTML page. If the tag is left blank doxygen will generate a standard -# footer. See HTML_HEADER for more information on how to generate a default -# footer and what special commands can be used inside the footer. See also -# section "Doxygen usage" for information on how to generate the default footer -# that doxygen normally uses. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style -# sheet that is used by each HTML page. It can be used to fine-tune the look of -# the HTML output. If left blank doxygen will generate a default style sheet. -# See also section "Doxygen usage" for information on how to generate the style -# sheet that doxygen normally uses. -# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as -# it is more robust and this tag (HTML_STYLESHEET) will in the future become -# obsolete. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_STYLESHEET = - -# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined -# cascading style sheets that are included after the standard style sheets -# created by doxygen. Using this option one can overrule certain style aspects. -# This is preferred over using HTML_STYLESHEET since it does not replace the -# standard style sheet and is therefore more robust against future updates. -# Doxygen will copy the style sheet files to the output directory. -# Note: The order of the extra style sheet files is of importance (e.g. the last -# style sheet in the list overrules the setting of the previous ones in the -# list). For an example see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_STYLESHEET = - -# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or -# other source files which should be copied to the HTML output directory. Note -# that these files will be copied to the base HTML output directory. Use the -# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these -# files. In the HTML_STYLESHEET file, use the file name only. Also note that the -# files will be copied as-is; there are no commands or markers available. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_FILES = - -# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen -# will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see -# http://en.wikipedia.org/wiki/Hue for more information. For instance the value -# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 -# purple, and 360 is red again. -# Minimum value: 0, maximum value: 359, default value: 220. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_HUE = 220 - -# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A -# value of 255 will produce the most vivid colors. -# Minimum value: 0, maximum value: 255, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_SAT = 100 - -# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the -# luminance component of the colors in the HTML output. Values below 100 -# gradually make the output lighter, whereas values above 100 make the output -# darker. The value divided by 100 is the actual gamma applied, so 80 represents -# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not -# change the gamma. -# Minimum value: 40, maximum value: 240, default value: 80. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_GAMMA = 80 - -# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML -# page will contain the date and time when the page was generated. Setting this -# to YES can help to show when doxygen was last run and thus if the -# documentation is up to date. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_TIMESTAMP = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_SECTIONS = YES - -# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries -# shown in the various tree structured indices initially; the user can expand -# and collapse entries dynamically later on. Doxygen will expand the tree to -# such a level that at most the specified number of entries are visible (unless -# a fully collapsed tree already exceeds this amount). So setting the number of -# entries 1 will produce a full collapsed tree by default. 0 is a special value -# representing an infinite number of entries and will result in a full expanded -# tree by default. -# Minimum value: 0, maximum value: 9999, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_INDEX_NUM_ENTRIES = 100 - -# If the GENERATE_DOCSET tag is set to YES, additional index files will be -# generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: http://developer.apple.com/tools/xcode/), introduced with -# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html -# for more information. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_DOCSET = NO - -# This tag determines the name of the docset feed. A documentation feed provides -# an umbrella under which multiple documentation sets from a single provider -# (such as a company or product suite) can be grouped. -# The default value is: Doxygen generated docs. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# This tag specifies a string that should uniquely identify the documentation -# set bundle. This should be a reverse domain-name style string, e.g. -# com.mycompany.MyDocSet. Doxygen will append .docset to the name. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify -# the documentation publisher. This should be a reverse domain-name style -# string, e.g. com.mycompany.MyDocSet.documentation. -# The default value is: org.doxygen.Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_ID = org.doxygen.Publisher - -# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. -# The default value is: Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_NAME = Publisher - -# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three -# additional HTML index files: index.hhp, index.hhc, and index.hhk. The -# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. -# -# The HTML Help Workshop contains a compiler that can convert all HTML output -# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML -# files are now used as the Windows 98 help format, and will replace the old -# Windows help format (.hlp) on all Windows platforms in the future. Compressed -# HTML files also contain an index, a table of contents, and you can search for -# words in the documentation. The HTML workshop also contains a viewer for -# compressed HTML files. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_HTMLHELP = NO - -# The CHM_FILE tag can be used to specify the file name of the resulting .chm -# file. You can add a path in front of the file if the result should not be -# written to the html output directory. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_FILE = - -# The HHC_LOCATION tag can be used to specify the location (absolute path -# including file name) of the HTML help compiler (hhc.exe). If non-empty, -# doxygen will try to run the HTML help compiler on the generated index.hhp. -# The file has to be specified with full path. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -HHC_LOCATION = - -# The GENERATE_CHI flag controls if a separate .chi index file is generated -# (YES) or that it should be included in the master .chm file (NO). -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -GENERATE_CHI = NO - -# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) -# and project file content. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_INDEX_ENCODING = - -# The BINARY_TOC flag controls whether a binary table of contents is generated -# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it -# enables the Previous and Next buttons. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members to -# the table of contents of the HTML help documentation and to the tree view. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and -# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that -# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help -# (.qch) of the generated HTML documentation. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify -# the file name of the resulting .qch file. The path specified is relative to -# the HTML output folder. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help -# Project output. For more information please see Qt Help Project / Namespace -# (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#namespace). -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_NAMESPACE = org.doxygen.Project - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt -# Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#virtual- -# folders). -# The default value is: doc. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_VIRTUAL_FOLDER = doc - -# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom -# filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the -# custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this -# project's filter section matches. Qt Help Project / Filter Attributes (see: -# http://qt-project.org/doc/qt-4.8/qthelpproject.html#filter-attributes). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_SECT_FILTER_ATTRS = - -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHG_LOCATION = - -# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be -# generated, together with the HTML files, they form an Eclipse help plugin. To -# install this plugin and make it available under the help contents menu in -# Eclipse, the contents of the directory containing the HTML and XML files needs -# to be copied into the plugins directory of eclipse. The name of the directory -# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. -# After copying Eclipse needs to be restarted before the help appears. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_ECLIPSEHELP = NO - -# A unique identifier for the Eclipse help plugin. When installing the plugin -# the directory name containing the HTML and XML files should also have this -# name. Each documentation set should have its own identifier. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. - -ECLIPSE_DOC_ID = org.doxygen.Project - -# If you want full control over the layout of the generated HTML pages it might -# be necessary to disable the index and replace it with your own. The -# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top -# of each HTML page. A value of NO enables the index and the value YES disables -# it. Since the tabs in the index contain the same information as the navigation -# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -DISABLE_INDEX = NO - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. If the tag -# value is set to YES, a side panel will be generated containing a tree-like -# index structure (just like the one that is generated for HTML Help). For this -# to work a browser that supports JavaScript, DHTML, CSS and frames is required -# (i.e. any modern browser). Windows users are probably better off using the -# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_TREEVIEW = YES - -# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that -# doxygen will group on one line in the generated HTML documentation. -# -# Note that a value of 0 will completely suppress the enum values from appearing -# in the overview section. -# Minimum value: 0, maximum value: 20, default value: 4. -# This tag requires that the tag GENERATE_HTML is set to YES. - -ENUM_VALUES_PER_LINE = 1 - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used -# to set the initial width (in pixels) of the frame in which the tree is shown. -# Minimum value: 0, maximum value: 1500, default value: 250. -# This tag requires that the tag GENERATE_HTML is set to YES. - -TREEVIEW_WIDTH = 250 - -# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to -# external symbols imported via tag files in a separate window. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -EXT_LINKS_IN_WINDOW = NO - -# Use this tag to change the font size of LaTeX formulas included as images in -# the HTML documentation. When you change the font size after a successful -# doxygen run you need to manually remove any form_*.png images from the HTML -# output directory to force them to be regenerated. -# Minimum value: 8, maximum value: 50, default value: 10. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_FONTSIZE = 10 - -# Use the FORMULA_TRANPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_TRANSPARENT = YES - -# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# http://www.mathjax.org) which uses client side Javascript for the rendering -# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX -# installed or if you want to formulas look prettier in the HTML output. When -# enabled you may also need to install MathJax separately and configure the path -# to it using the MATHJAX_RELPATH option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -USE_MATHJAX = NO - -# When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. -# Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. -# The default value is: HTML-CSS. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_FORMAT = HTML-CSS - -# When MathJax is enabled you need to specify the location relative to the HTML -# output directory using the MATHJAX_RELPATH option. The destination directory -# should contain the MathJax.js script. For instance, if the mathjax directory -# is located at the same level as the HTML output directory, then -# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax -# Content Delivery Network so you can quickly see the result without installing -# MathJax. However, it is strongly recommended to install a local copy of -# MathJax from http://www.mathjax.org before deployment. -# The default value is: http://cdn.mathjax.org/mathjax/latest. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest - -# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax -# extension names that should be enabled during MathJax rendering. For example -# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_EXTENSIONS = - -# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces -# of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an -# example see the documentation. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_CODEFILE = - -# When the SEARCHENGINE tag is enabled doxygen will generate a search box for -# the HTML output. The underlying search engine uses javascript and DHTML and -# should work on any modern browser. Note that when using HTML help -# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) -# there is already a search function so this one should typically be disabled. -# For large projects the javascript based search engine can be slow, then -# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to -# search using the keyboard; to jump to the search box use + S -# (what the is depends on the OS and browser, but it is typically -# , /