Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Linux aarch64 via sse2neon #69

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
58 changes: 58 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: CI

on:
push:
branches:
- master
pull_request:
branches:
- master

jobs:
build:
name: Linux x86_64
runs-on: ubuntu-22.04

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Build
run: |
set -x
cd src
./configure
make -j
file spaln | grep x86-64
file makmdm | grep x86-64
file sortgrcd | grep x86-64
file makdbs | grep x86-64

build-aarch64:
name: Linux aarch64
runs-on: ubuntu-22.04

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Build
uses: uraimo/run-on-arch-action@v2
with:
arch: aarch64
distro: ubuntu20.04
githubToken: ${{ github.token }}
dockerRunArgs: |
--volume "${PWD}:/spaln"
install: |
apt-get update -q -y
apt-get install -q -y make g++-10 zlib1g-dev file
run: |
set -x
cd /spaln/src
./configure
make -j CXX="g++-10 -O3 -march=native -DM_THREAD=1"
file spaln | grep aarch64
file makmdm | grep aarch64
file sortgrcd | grep aarch64
file makdbs | grep aarch64
28 changes: 14 additions & 14 deletions src/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -87,36 +87,36 @@ wc:
ls:
ls $(SRC) $(HDR)

spaln: spaln.cc blksrc.o $(SLIB)
spaln: spaln.cc blksrc.o sblib.a
$(CXX) -o $@ spaln.cc blksrc.o $(SLIB) $(ILIB)
sortgrcd: sortgrcd.cc $(SLIB)
sortgrcd: sortgrcd.cc sblib.a
$(CXX) -o $@ sortgrcd.cc $(SLIB) $(ILIB)
makdbs: makdbs.cc dbs.h seq.h bitpat.h $(SLIB)
makdbs: makdbs.cc dbs.h seq.h bitpat.h sblib.a
$(CXX) -o $@ makdbs.cc $(SLIB) $(ILIB)
makmdm: makmdm.cc mdm.h $(CLIB)
makmdm: makmdm.cc mdm.h clib.a
$(CXX) -o $@ makmdm.cc $(CLIB) $(ILIB)

dvn: dvn.cc autocomp.h $(ULIB)
dvn: dvn.cc autocomp.h ublib.a
$(CXX) -o $@ dvn.cc $(ULIB) $(ILIB)
exinpot: exinpot.cc codepot.h utilseq.h eijunc.h $(SLIB)
exinpot: exinpot.cc codepot.h utilseq.h eijunc.h sblib.a
$(CXX) -o $@ exinpot.cc $(SLIB) $(ILIB)
kmers: kmers.cc $(SLIB)
kmers: kmers.cc sblib.a
$(CXX) -o $@ kmers.cc $(SLIB) $(ILIB)
npssm: npssm.cc $(SLIB)
npssm: npssm.cc sblib.a
$(CXX) -o $@ npssm.cc $(SLIB) $(ILIB)
rdn: rdn.cc $(ULIB)
rdn: rdn.cc ublib.a
$(CXX) -o $@ rdn.cc $(ULIB) $(ILIB)
utn: utn.o $(ULIB)
utn: utn.o ublib.a
$(CXX) -o $@ utn.o $(ULIB) $(ILIB)

compild: compild.cc calcserv.h ildpdf.o $(CLIB)
compild: compild.cc calcserv.h ildpdf.o clib.a
$(CXX) -o compild compild.cc ildpdf.o $(CLIB) -lm -lgsl -lgslcblas
decompild: decompild.o ildpdf.o fitild.o $(CLIB)
decompild: decompild.o ildpdf.o fitild.o clib.a
$(CXX) -o decompild decompild.o ildpdf.o fitild.o $(CLIB) -lgsl -lgslcblas
fitild: fitild.cc ildpdf.o $(CLIB)
fitild: fitild.cc ildpdf.o clib.a
$(CXX) -o $@ -DMAIN fitild.cc ildpdf.o $(CLIB) \
-L/usr/local/lib -lm -lgsl -lgslcblas
plotild: plotild.cc ildpdf.o $(CLIB)
plotild: plotild.cc ildpdf.o clib.a
$(CXX) -o plotild plotild.cc ildpdf.o $(CLIB) -lm -lgsl -lgslcblas

clib.o: clib.cc stdtype.h
Expand Down
8 changes: 4 additions & 4 deletions src/codepot.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ class ExinPot;
struct SGPT2 {
STYPE sig5;
STYPE sig3;
char phs5;
char phs3;
signed char phs5;
signed char phs3;
};

struct SGPT6 {
Expand All @@ -38,8 +38,8 @@ struct SGPT6 {
STYPE sigT;
STYPE sigE;
STYPE sigI;
char phs5;
char phs3;
signed char phs5;
signed char phs3;
};

static const SGPT2 ZeroSGPT2 = {0, 0, -2, -2};
Expand Down
12 changes: 6 additions & 6 deletions src/fwd2b1.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1128,9 +1128,9 @@ const int mode = simd > 1? 1: (cvol < USHRT_MAX? 3: 5);
SimdAln2s1<short, 32, __m512i, __m512i>
# elif __AVX2__
SimdAln2s1<short, 16, __m256i, __m256i>
# elif __SSE4_1__
# elif __SSE4_1__ || __ARM_NEON
SimdAln2s1<short, 8, __m128i, __m128i>
# else // __ARM_NEON
# else // __ARM_NEON_NO
SimdAln2s1<short, 8, int8x16_t, int8x16_t>
# endif
trbfwd(seqs, pwd, wdw, 0, 0, mode, vmf);
Expand Down Expand Up @@ -1319,9 +1319,9 @@ const int mode =
SimdAln2s1<short, 32, __m512i, __m512i>
# elif __AVX2__
SimdAln2s1<short, 16, __m256i, __m256i>
# elif __SSE4_1__ // __SSE4_1__
# elif __SSE4_1__ || __ARM_NEON // __SSE4_1__
SimdAln2s1<short, 8, __m128i, __m128i>
# else // __ARM_NEON
# else // __ARM_NEON_NO
SimdAln2s1<short, 8, int8x16_t. int8x16_t>
# endif
sb1(seqs, pwd, wdw, 0, 0, mode);
Expand Down Expand Up @@ -1586,9 +1586,9 @@ const int m = a->right - a->left;
SimdAln2s1<short, 32, __m512i, __m512i>
# elif __AVX2__
SimdAln2s1<short, 16, __m256i, __m256i>
# elif __SSE4_1__ // __SSE4_1__
# elif __SSE4_1__ || __ARM_NEON // __SSE4_1__
SimdAln2s1<short, 8, __m128i, __m128i>
# else // __ARM_NEON
# else // __ARM_NEON_NO
SimdAln2s1<short, 8, int8x16_t, int8x16_t>
# endif
fwds(seqs, pwd, wdw, 0, 0, 0);
Expand Down
8 changes: 4 additions & 4 deletions src/fwd2h1.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2023,9 +2023,9 @@ const int mode = simd > 1? 1: (cvol < USHRT_MAX? 3: 5);
SimdAln2h1<short, 32, __m512i, __m512i>
# elif __AVX2__
SimdAln2h1<short, 16, __m256i, __m256i>
# elif __SSE4_1__ // __SSE4_1__
# elif __SSE4_1__ || __ARM_NEON // __SSE4_1__
SimdAln2h1<short, 8, __m128i, __m128i>
# else // __ARM_NEON
# else // __ARM_NEON_NO
SimdAln2h1<short, 8, int8x16_t, int8x16_t>
# endif //
trbfwd(seqs, pwd, wdw, spjcs, cip, mode, vmf);
Expand Down Expand Up @@ -2228,9 +2228,9 @@ const int mode =
SimdAln2h1<short, 32, __m512i, __m512i>
# elif __AVX2__
SimdAln2h1<short, 16, __m256i, __m256i>
# elif __SSE4_1__ // __SSE4_1__
# elif __SSE4_1__ || __ARM_NEON // __SSE4_1__
SimdAln2h1<short, 8, __m128i, __m128i>
# else // __ARM_NEON
# else // __ARM_NEON_NO
SimdAln2s1<short, 8, int8x16_t. int8x16_t>
# endif
hb1(seqs, pwd, wdw, spjcs, cip, mode);
Expand Down
12 changes: 6 additions & 6 deletions src/fwd2s1.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1696,9 +1696,9 @@ const int mode = simd > 1? 1: (cvol < USHRT_MAX? 3: 5);
SimdAln2s1<short, 32, __m512i, __m512i>
# elif __AVX2__
SimdAln2s1<short, 16, __m256i, __m256i>
# elif __SSE4_1__
# elif __SSE4_1__ || __ARM_NEON
SimdAln2s1<short, 8, __m128i, __m128i>
# else // __ARM_NEON
# else // __ARM_NEON_NO
SimdAln2s1<short, 8, int8x16_t, int8x16_t>
# endif
trbfwd(seqs, pwd, wdw, spjcs, cip, mode, vmf);
Expand Down Expand Up @@ -1898,9 +1898,9 @@ const int mode =
SimdAln2s1<short, 32, __m512i, __m512i>
# elif __AVX2__
SimdAln2s1<short, 16, __m256i, __m256i>
# elif __SSE4_1__
# elif __SSE4_1__ || __ARM_NEON
SimdAln2s1<short, 8, __m128i, __m128i>
# else // __ARM_NEON
# else // __ARM_NEON_NO
SimdAln2s1<short, 8, int8x16_t. int8x16_t>
# endif
sb1(seqs, pwd, wdw, spjcs, cip, mode);
Expand Down Expand Up @@ -2740,9 +2740,9 @@ const int m = a->right - a->left;
SimdAln2s1<short, 32, __m512i, __m512i>
# elif __AVX2__
SimdAln2s1<short, 16, __m256i, __m256i>
# elif __SSE4_1__
# elif __SSE4_1__ || __ARM_NEON
SimdAln2s1<short, 8, __m128i, __m128i>
# else // __ARM_NEON
# else // __ARM_NEON_NO
SimdAln2s1<short, 8, int8x16_t, int8x16_t>
# endif
fwds(seqs, pwd, wdw, alnv.spjcs, alnv.cip, 0);
Expand Down
6 changes: 3 additions & 3 deletions src/simd_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#if defined(__SSE4_1__)
#include <x86intrin.h>
#elif defined(__ARM_NEON)
#include <arm_neon.h>
#include "sse2neon.h"
#endif

#define SIMD_INLINE inline __attribute__((always_inline))
Expand Down Expand Up @@ -79,7 +79,7 @@ struct Simd_functions {

// class specialization

#if __SSE4_1__
#if __SSE4_1__ || __ARM_NEON

template <typename regist_v>
struct Simd_functions<char, 16, regist_v> {
Expand Down Expand Up @@ -387,7 +387,7 @@ struct Simd_functions<float, 4, regist_v> {
};
#endif // __SSE4_1__

#if __ARM_NEON
#if __ARM_NEON_NO

template <typename regist_v>
struct Simd_functions<char, 16, regist_v> {
Expand Down
Loading